1 /*
2  * Copyright (C) 2013, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "dictionary/header/header_read_write_utils.h"
18 
19 #include <cctype>
20 #include <cstdio>
21 #include <memory>
22 #include <vector>
23 
24 #include "defines.h"
25 #include "dictionary/utils/buffer_with_extendable_buffer.h"
26 #include "dictionary/utils/byte_array_utils.h"
27 
28 namespace latinime {
29 
30 // Number of base-10 digits in the largest integer + 1 to leave room for a zero terminator.
31 // As such, this is the maximum number of characters will be needed to represent an int as a
32 // string, including the terminator; this is used as the size of a string buffer large enough to
33 // hold any value that is intended to fit in an integer, e.g. in the code that reads the header
34 // of the binary dictionary where a {key,value} string pair scheme is used.
35 const int HeaderReadWriteUtils::LARGEST_INT_DIGIT_COUNT = 11;
36 
37 const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
38 const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 2048;
39 
40 const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
41 const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
42 const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
43 const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
44 const char *const HeaderReadWriteUtils::CODE_POINT_TABLE_KEY = "codePointTable";
45 
46 const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
47 
48 typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
49 
getHeaderSize(const uint8_t * const dictBuf)50 /* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) {
51     // See the format of the header in the comment in
52     // BinaryDictionaryFormatUtils::detectFormatVersion()
53     return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE
54             + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE);
55 }
56 
57 /* static */ HeaderReadWriteUtils::DictionaryFlags
getFlags(const uint8_t * const dictBuf)58         HeaderReadWriteUtils::getFlags(const uint8_t *const dictBuf) {
59     return ByteArrayUtils::readUint16(dictBuf,
60             HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE);
61 }
62 
63 /* static */ HeaderReadWriteUtils::DictionaryFlags
createAndGetDictionaryFlagsUsingAttributeMap(const AttributeMap * const attributeMap)64         HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
65                 const AttributeMap *const attributeMap) {
66     return NO_FLAGS;
67 }
68 
fetchAllHeaderAttributes(const uint8_t * const dictBuf,AttributeMap * const headerAttributes)69 /* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
70         AttributeMap *const headerAttributes) {
71     const int headerSize = getHeaderSize(dictBuf);
72     int pos = getHeaderOptionsPosition();
73     if (pos == NOT_A_DICT_POS) {
74         // The header doesn't have header options.
75         return;
76     }
77     int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH];
78     std::unique_ptr<int[]> valueBuffer(new int[MAX_ATTRIBUTE_VALUE_LENGTH]);
79     while (pos < headerSize) {
80         // The values in the header don't use the code point table for their encoding.
81         const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
82                 MAX_ATTRIBUTE_KEY_LENGTH, nullptr /* codePointTable */, keyBuffer, &pos);
83         std::vector<int> key;
84         key.insert(key.end(), keyBuffer, keyBuffer + keyLength);
85         const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
86                 MAX_ATTRIBUTE_VALUE_LENGTH, nullptr /* codePointTable */, valueBuffer.get(), &pos);
87         std::vector<int> value;
88         value.insert(value.end(), valueBuffer.get(), valueBuffer.get() + valueLength);
89         headerAttributes->insert(AttributeMap::value_type(key, value));
90     }
91 }
92 
readCodePointTable(AttributeMap * const headerAttributes)93 /* static */ const int *HeaderReadWriteUtils::readCodePointTable(
94         AttributeMap *const headerAttributes) {
95     AttributeMap::key_type keyVector;
96     insertCharactersIntoVector(CODE_POINT_TABLE_KEY, &keyVector);
97     AttributeMap::const_iterator it = headerAttributes->find(keyVector);
98     if (it == headerAttributes->end()) {
99         return nullptr;
100     }
101     return it->second.data();
102 }
103 
writeDictionaryVersion(BufferWithExtendableBuffer * const buffer,const FormatUtils::FORMAT_VERSION version,int * const writingPos)104 /* static */ bool HeaderReadWriteUtils::writeDictionaryVersion(
105         BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version,
106         int *const writingPos) {
107     if (!buffer->writeUintAndAdvancePosition(FormatUtils::MAGIC_NUMBER, HEADER_MAGIC_NUMBER_SIZE,
108             writingPos)) {
109         return false;
110     }
111     switch (version) {
112         case FormatUtils::VERSION_2:
113         case FormatUtils::VERSION_201:
114         case FormatUtils::VERSION_202:
115             // None of the static dictionaries (v2x) support writing
116             return false;
117         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
118         case FormatUtils::VERSION_402:
119         case FormatUtils::VERSION_403:
120             return buffer->writeUintAndAdvancePosition(version /* data */,
121                     HEADER_DICTIONARY_VERSION_SIZE, writingPos);
122         default:
123             return false;
124     }
125 }
126 
writeDictionaryFlags(BufferWithExtendableBuffer * const buffer,const DictionaryFlags flags,int * const writingPos)127 /* static */ bool HeaderReadWriteUtils::writeDictionaryFlags(
128         BufferWithExtendableBuffer *const buffer, const DictionaryFlags flags,
129         int *const writingPos) {
130     return buffer->writeUintAndAdvancePosition(flags, HEADER_FLAG_SIZE, writingPos);
131 }
132 
writeDictionaryHeaderSize(BufferWithExtendableBuffer * const buffer,const int size,int * const writingPos)133 /* static */ bool HeaderReadWriteUtils::writeDictionaryHeaderSize(
134         BufferWithExtendableBuffer *const buffer, const int size, int *const writingPos) {
135     return buffer->writeUintAndAdvancePosition(size, HEADER_SIZE_FIELD_SIZE, writingPos);
136 }
137 
writeHeaderAttributes(BufferWithExtendableBuffer * const buffer,const AttributeMap * const headerAttributes,int * const writingPos)138 /* static */ bool HeaderReadWriteUtils::writeHeaderAttributes(
139         BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes,
140         int *const writingPos) {
141     for (AttributeMap::const_iterator it = headerAttributes->begin();
142             it != headerAttributes->end(); ++it) {
143         if (it->first.empty() || it->second.empty()) {
144             continue;
145         }
146         // Write a key.
147         if (!buffer->writeCodePointsAndAdvancePosition(&(it->first.at(0)), it->first.size(),
148                 true /* writesTerminator */, writingPos)) {
149             return false;
150         }
151         // Write a value.
152         if (!buffer->writeCodePointsAndAdvancePosition(&(it->second.at(0)), it->second.size(),
153                 true /* writesTerminator */, writingPos)) {
154             return false;
155         }
156     }
157     return true;
158 }
159 
setCodePointVectorAttribute(AttributeMap * const headerAttributes,const char * const key,const std::vector<int> & value)160 /* static */ void HeaderReadWriteUtils::setCodePointVectorAttribute(
161         AttributeMap *const headerAttributes, const char *const key,
162         const std::vector<int> &value) {
163     AttributeMap::key_type keyVector;
164     insertCharactersIntoVector(key, &keyVector);
165     (*headerAttributes)[keyVector] = value;
166 }
167 
setBoolAttribute(AttributeMap * const headerAttributes,const char * const key,const bool value)168 /* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes,
169         const char *const key, const bool value) {
170     setIntAttribute(headerAttributes, key, value ? 1 : 0);
171 }
172 
setIntAttribute(AttributeMap * const headerAttributes,const char * const key,const int value)173 /* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes,
174         const char *const key, const int value) {
175     AttributeMap::key_type keyVector;
176     insertCharactersIntoVector(key, &keyVector);
177     setIntAttributeInner(headerAttributes, &keyVector, value);
178 }
179 
setIntAttributeInner(AttributeMap * const headerAttributes,const AttributeMap::key_type * const key,const int value)180 /* static */ void HeaderReadWriteUtils::setIntAttributeInner(AttributeMap *const headerAttributes,
181         const AttributeMap::key_type *const key, const int value) {
182     AttributeMap::mapped_type valueVector;
183     char charBuf[LARGEST_INT_DIGIT_COUNT];
184     snprintf(charBuf, sizeof(charBuf), "%d", value);
185     insertCharactersIntoVector(charBuf, &valueVector);
186     (*headerAttributes)[*key] = valueVector;
187 }
188 
readCodePointVectorAttributeValue(const AttributeMap * const headerAttributes,const char * const key)189 /* static */ const std::vector<int> HeaderReadWriteUtils::readCodePointVectorAttributeValue(
190         const AttributeMap *const headerAttributes, const char *const key) {
191     AttributeMap::key_type keyVector;
192     insertCharactersIntoVector(key, &keyVector);
193     AttributeMap::const_iterator it = headerAttributes->find(keyVector);
194     if (it == headerAttributes->end()) {
195         return std::vector<int>();
196     } else {
197         return it->second;
198     }
199 }
200 
readBoolAttributeValue(const AttributeMap * const headerAttributes,const char * const key,const bool defaultValue)201 /* static */ bool HeaderReadWriteUtils::readBoolAttributeValue(
202         const AttributeMap *const headerAttributes, const char *const key,
203         const bool defaultValue) {
204     const int intDefaultValue = defaultValue ? 1 : 0;
205     const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue);
206     return intValue != 0;
207 }
208 
readIntAttributeValue(const AttributeMap * const headerAttributes,const char * const key,const int defaultValue)209 /* static */ int HeaderReadWriteUtils::readIntAttributeValue(
210         const AttributeMap *const headerAttributes, const char *const key,
211         const int defaultValue) {
212     AttributeMap::key_type keyVector;
213     insertCharactersIntoVector(key, &keyVector);
214     return readIntAttributeValueInner(headerAttributes, &keyVector, defaultValue);
215 }
216 
readIntAttributeValueInner(const AttributeMap * const headerAttributes,const AttributeMap::key_type * const key,const int defaultValue)217 /* static */ int HeaderReadWriteUtils::readIntAttributeValueInner(
218         const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
219         const int defaultValue) {
220     AttributeMap::const_iterator it = headerAttributes->find(*key);
221     if (it != headerAttributes->end()) {
222         int value = 0;
223         bool isNegative = false;
224         for (size_t i = 0; i < it->second.size(); ++i) {
225             if (i == 0 && it->second.at(i) == '-') {
226                 isNegative = true;
227             } else {
228                 if (!isdigit(it->second.at(i))) {
229                     // If not a number.
230                     return defaultValue;
231                 }
232                 value *= 10;
233                 value += it->second.at(i) - '0';
234             }
235         }
236         return isNegative ? -value : value;
237     }
238     return defaultValue;
239 }
240 
insertCharactersIntoVector(const char * const characters,std::vector<int> * const vector)241 /* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters,
242         std::vector<int> *const vector) {
243     for (int i = 0; characters[i]; ++i) {
244         vector->push_back(characters[i]);
245     }
246 }
247 
248 } // namespace latinime
249