1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8  * Copyright 2006-2007 Jeremias Maerki.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  *      http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <limits>
24 #include <memory>
25 #include <vector>
26 
27 #include "xfa/fxbarcode/BC_Dimension.h"
28 #include "xfa/fxbarcode/BC_UtilCodingConvert.h"
29 #include "xfa/fxbarcode/common/BC_CommonBitMatrix.h"
30 #include "xfa/fxbarcode/datamatrix/BC_ASCIIEncoder.h"
31 #include "xfa/fxbarcode/datamatrix/BC_Base256Encoder.h"
32 #include "xfa/fxbarcode/datamatrix/BC_C40Encoder.h"
33 #include "xfa/fxbarcode/datamatrix/BC_EdifactEncoder.h"
34 #include "xfa/fxbarcode/datamatrix/BC_Encoder.h"
35 #include "xfa/fxbarcode/datamatrix/BC_EncoderContext.h"
36 #include "xfa/fxbarcode/datamatrix/BC_HighLevelEncoder.h"
37 #include "xfa/fxbarcode/datamatrix/BC_SymbolInfo.h"
38 #include "xfa/fxbarcode/datamatrix/BC_SymbolShapeHint.h"
39 #include "xfa/fxbarcode/datamatrix/BC_TextEncoder.h"
40 #include "xfa/fxbarcode/datamatrix/BC_X12Encoder.h"
41 #include "xfa/fxbarcode/utils.h"
42 
43 FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_C40 = 230;
44 FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_BASE256 = 231;
45 FX_WCHAR CBC_HighLevelEncoder::UPPER_SHIFT = 235;
46 FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_ANSIX12 = 238;
47 FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_TEXT = 239;
48 FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_EDIFACT = 240;
49 FX_WCHAR CBC_HighLevelEncoder::C40_UNLATCH = 254;
50 FX_WCHAR CBC_HighLevelEncoder::X12_UNLATCH = 254;
51 FX_WCHAR CBC_HighLevelEncoder::PAD = 129;
52 FX_WCHAR CBC_HighLevelEncoder::MACRO_05 = 236;
53 FX_WCHAR CBC_HighLevelEncoder::MACRO_06 = 237;
54 const wchar_t* CBC_HighLevelEncoder::MACRO_05_HEADER = L"[)>05";
55 const wchar_t* CBC_HighLevelEncoder::MACRO_06_HEADER = L"[)>06";
56 const wchar_t CBC_HighLevelEncoder::MACRO_TRAILER = 0x0004;
57 
CBC_HighLevelEncoder()58 CBC_HighLevelEncoder::CBC_HighLevelEncoder() {}
~CBC_HighLevelEncoder()59 CBC_HighLevelEncoder::~CBC_HighLevelEncoder() {}
60 
getBytesForMessage(CFX_WideString msg)61 CFX_ArrayTemplate<uint8_t>& CBC_HighLevelEncoder::getBytesForMessage(
62     CFX_WideString msg) {
63   CFX_ByteString bytestr;
64   CBC_UtilCodingConvert::UnicodeToUTF8(msg, bytestr);
65   for (int32_t i = 0; i < bytestr.GetLength(); i++) {
66     m_bytearray.Add(bytestr.GetAt(i));
67   }
68   return m_bytearray;
69 }
encodeHighLevel(CFX_WideString msg,CFX_WideString ecLevel,int32_t & e)70 CFX_WideString CBC_HighLevelEncoder::encodeHighLevel(CFX_WideString msg,
71                                                      CFX_WideString ecLevel,
72                                                      int32_t& e) {
73   return encodeHighLevel(msg, ecLevel, FORCE_NONE, nullptr, nullptr, e);
74 }
encodeHighLevel(CFX_WideString msg,CFX_WideString ecLevel,SymbolShapeHint shape,CBC_Dimension * minSize,CBC_Dimension * maxSize,int32_t & e)75 CFX_WideString CBC_HighLevelEncoder::encodeHighLevel(CFX_WideString msg,
76                                                      CFX_WideString ecLevel,
77                                                      SymbolShapeHint shape,
78                                                      CBC_Dimension* minSize,
79                                                      CBC_Dimension* maxSize,
80                                                      int32_t& e) {
81   CBC_EncoderContext context(msg, ecLevel, e);
82   if (e != BCExceptionNO)
83     return CFX_WideString();
84   context.setSymbolShape(shape);
85   context.setSizeConstraints(minSize, maxSize);
86   if ((msg.Mid(0, 6) == MACRO_05_HEADER) &&
87       (msg.Mid(msg.GetLength() - 1, 1) == MACRO_TRAILER)) {
88     context.writeCodeword(MACRO_05);
89     context.setSkipAtEnd(2);
90     context.m_pos += 6;
91   } else if ((msg.Mid(0, 6) == MACRO_06_HEADER) &&
92              (msg.Mid(msg.GetLength() - 1, 1) == MACRO_TRAILER)) {
93     context.writeCodeword(MACRO_06);
94     context.setSkipAtEnd(2);
95     context.m_pos += 6;
96   }
97 
98   std::vector<std::unique_ptr<CBC_Encoder>> encoders;
99   encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_ASCIIEncoder()));
100   encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_C40Encoder()));
101   encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_TextEncoder()));
102   encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_X12Encoder()));
103   encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_EdifactEncoder()));
104   encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_Base256Encoder()));
105   int32_t encodingMode = ASCII_ENCODATION;
106   while (context.hasMoreCharacters()) {
107     encoders[encodingMode]->Encode(context, e);
108     if (e != BCExceptionNO)
109       return L"";
110 
111     if (context.m_newEncoding >= 0) {
112       encodingMode = context.m_newEncoding;
113       context.resetEncoderSignal();
114     }
115   }
116   int32_t len = context.m_codewords.GetLength();
117   context.updateSymbolInfo(e);
118   if (e != BCExceptionNO)
119     return L"";
120 
121   int32_t capacity = context.m_symbolInfo->m_dataCapacity;
122   if (len < capacity) {
123     if (encodingMode != ASCII_ENCODATION &&
124         encodingMode != BASE256_ENCODATION) {
125       context.writeCodeword(0x00fe);
126     }
127   }
128   CFX_WideString codewords = context.m_codewords;
129   if (codewords.GetLength() < capacity) {
130     codewords += PAD;
131   }
132   while (codewords.GetLength() < capacity) {
133     codewords += (randomize253State(PAD, codewords.GetLength() + 1));
134   }
135   return codewords;
136 }
lookAheadTest(CFX_WideString msg,int32_t startpos,int32_t currentMode)137 int32_t CBC_HighLevelEncoder::lookAheadTest(CFX_WideString msg,
138                                             int32_t startpos,
139                                             int32_t currentMode) {
140   if (startpos >= msg.GetLength()) {
141     return currentMode;
142   }
143   std::vector<FX_FLOAT> charCounts;
144   if (currentMode == ASCII_ENCODATION) {
145     charCounts.push_back(0);
146     charCounts.push_back(1);
147     charCounts.push_back(1);
148     charCounts.push_back(1);
149     charCounts.push_back(1);
150     charCounts.push_back(1.25f);
151   } else {
152     charCounts.push_back(1);
153     charCounts.push_back(2);
154     charCounts.push_back(2);
155     charCounts.push_back(2);
156     charCounts.push_back(2);
157     charCounts.push_back(2.25f);
158     charCounts[currentMode] = 0;
159   }
160   int32_t charsProcessed = 0;
161   while (true) {
162     if ((startpos + charsProcessed) == msg.GetLength()) {
163       int32_t min = std::numeric_limits<int32_t>::max();
164       CFX_ArrayTemplate<uint8_t> mins;
165       mins.SetSize(6);
166       CFX_ArrayTemplate<int32_t> intCharCounts;
167       intCharCounts.SetSize(6);
168       min = findMinimums(charCounts, intCharCounts, min, mins);
169       int32_t minCount = getMinimumCount(mins);
170       if (intCharCounts[ASCII_ENCODATION] == min) {
171         return ASCII_ENCODATION;
172       }
173       if (minCount == 1 && mins[BASE256_ENCODATION] > 0) {
174         return BASE256_ENCODATION;
175       }
176       if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) {
177         return EDIFACT_ENCODATION;
178       }
179       if (minCount == 1 && mins[TEXT_ENCODATION] > 0) {
180         return TEXT_ENCODATION;
181       }
182       if (minCount == 1 && mins[X12_ENCODATION] > 0) {
183         return X12_ENCODATION;
184       }
185       return C40_ENCODATION;
186     }
187     FX_WCHAR c = msg.GetAt(startpos + charsProcessed);
188     charsProcessed++;
189     if (isDigit(c)) {
190       charCounts[ASCII_ENCODATION] += 0.5;
191     } else if (isExtendedASCII(c)) {
192       charCounts[ASCII_ENCODATION] =
193           (FX_FLOAT)ceil(charCounts[ASCII_ENCODATION]);
194       charCounts[ASCII_ENCODATION] += 2;
195     } else {
196       charCounts[ASCII_ENCODATION] =
197           (FX_FLOAT)ceil(charCounts[ASCII_ENCODATION]);
198       charCounts[ASCII_ENCODATION]++;
199     }
200     if (isNativeC40(c)) {
201       charCounts[C40_ENCODATION] += 2.0f / 3.0f;
202     } else if (isExtendedASCII(c)) {
203       charCounts[C40_ENCODATION] += 8.0f / 3.0f;
204     } else {
205       charCounts[C40_ENCODATION] += 4.0f / 3.0f;
206     }
207     if (isNativeText(c)) {
208       charCounts[TEXT_ENCODATION] += 2.0f / 3.0f;
209     } else if (isExtendedASCII(c)) {
210       charCounts[TEXT_ENCODATION] += 8.0f / 3.0f;
211     } else {
212       charCounts[TEXT_ENCODATION] += 4.0f / 3.0f;
213     }
214     if (isNativeX12(c)) {
215       charCounts[X12_ENCODATION] += 2.0f / 3.0f;
216     } else if (isExtendedASCII(c)) {
217       charCounts[X12_ENCODATION] += 13.0f / 3.0f;
218     } else {
219       charCounts[X12_ENCODATION] += 10.0f / 3.0f;
220     }
221     if (isNativeEDIFACT(c)) {
222       charCounts[EDIFACT_ENCODATION] += 3.0f / 4.0f;
223     } else if (isExtendedASCII(c)) {
224       charCounts[EDIFACT_ENCODATION] += 17.0f / 4.0f;
225     } else {
226       charCounts[EDIFACT_ENCODATION] += 13.0f / 4.0f;
227     }
228     if (isSpecialB256(c)) {
229       charCounts[BASE256_ENCODATION] += 4;
230     } else {
231       charCounts[BASE256_ENCODATION]++;
232     }
233     if (charsProcessed >= 4) {
234       CFX_ArrayTemplate<int32_t> intCharCounts;
235       intCharCounts.SetSize(6);
236       CFX_ArrayTemplate<uint8_t> mins;
237       mins.SetSize(6);
238       findMinimums(charCounts, intCharCounts,
239                    std::numeric_limits<int32_t>::max(), mins);
240       int32_t minCount = getMinimumCount(mins);
241       if (intCharCounts[ASCII_ENCODATION] < intCharCounts[BASE256_ENCODATION] &&
242           intCharCounts[ASCII_ENCODATION] < intCharCounts[C40_ENCODATION] &&
243           intCharCounts[ASCII_ENCODATION] < intCharCounts[TEXT_ENCODATION] &&
244           intCharCounts[ASCII_ENCODATION] < intCharCounts[X12_ENCODATION] &&
245           intCharCounts[ASCII_ENCODATION] < intCharCounts[EDIFACT_ENCODATION]) {
246         return ASCII_ENCODATION;
247       }
248       if (intCharCounts[BASE256_ENCODATION] < intCharCounts[ASCII_ENCODATION] ||
249           (mins[C40_ENCODATION] + mins[TEXT_ENCODATION] + mins[X12_ENCODATION] +
250            mins[EDIFACT_ENCODATION]) == 0) {
251         return BASE256_ENCODATION;
252       }
253       if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) {
254         return EDIFACT_ENCODATION;
255       }
256       if (minCount == 1 && mins[TEXT_ENCODATION] > 0) {
257         return TEXT_ENCODATION;
258       }
259       if (minCount == 1 && mins[X12_ENCODATION] > 0) {
260         return X12_ENCODATION;
261       }
262       if (intCharCounts[C40_ENCODATION] + 1 < intCharCounts[ASCII_ENCODATION] &&
263           intCharCounts[C40_ENCODATION] + 1 <
264               intCharCounts[BASE256_ENCODATION] &&
265           intCharCounts[C40_ENCODATION] + 1 <
266               intCharCounts[EDIFACT_ENCODATION] &&
267           intCharCounts[C40_ENCODATION] + 1 < intCharCounts[TEXT_ENCODATION]) {
268         if (intCharCounts[C40_ENCODATION] < intCharCounts[X12_ENCODATION]) {
269           return C40_ENCODATION;
270         }
271         if (intCharCounts[C40_ENCODATION] == intCharCounts[X12_ENCODATION]) {
272           int32_t p = startpos + charsProcessed + 1;
273           while (p < msg.GetLength()) {
274             FX_WCHAR tc = msg.GetAt(p);
275             if (isX12TermSep(tc)) {
276               return X12_ENCODATION;
277             }
278             if (!isNativeX12(tc)) {
279               break;
280             }
281             p++;
282           }
283           return C40_ENCODATION;
284         }
285       }
286     }
287   }
288 }
isDigit(FX_WCHAR ch)289 bool CBC_HighLevelEncoder::isDigit(FX_WCHAR ch) {
290   return ch >= '0' && ch <= '9';
291 }
isExtendedASCII(FX_WCHAR ch)292 bool CBC_HighLevelEncoder::isExtendedASCII(FX_WCHAR ch) {
293   return ch >= 128 && ch <= 255;
294 }
determineConsecutiveDigitCount(CFX_WideString msg,int32_t startpos)295 int32_t CBC_HighLevelEncoder::determineConsecutiveDigitCount(CFX_WideString msg,
296                                                              int32_t startpos) {
297   int32_t count = 0;
298   int32_t len = msg.GetLength();
299   int32_t idx = startpos;
300   if (idx < len) {
301     FX_WCHAR ch = msg.GetAt(idx);
302     while (isDigit(ch) && idx < len) {
303       count++;
304       idx++;
305       if (idx < len) {
306         ch = msg.GetAt(idx);
307       }
308     }
309   }
310   return count;
311 }
illegalCharacter(FX_WCHAR c,int32_t & e)312 void CBC_HighLevelEncoder::illegalCharacter(FX_WCHAR c, int32_t& e) {
313   e = BCExceptionIllegalArgument;
314 }
randomize253State(FX_WCHAR ch,int32_t codewordPosition)315 FX_WCHAR CBC_HighLevelEncoder::randomize253State(FX_WCHAR ch,
316                                                  int32_t codewordPosition) {
317   int32_t pseudoRandom = ((149 * codewordPosition) % 253) + 1;
318   int32_t tempVariable = ch + pseudoRandom;
319   return tempVariable <= 254 ? (FX_WCHAR)tempVariable
320                              : (FX_WCHAR)(tempVariable - 254);
321 }
findMinimums(std::vector<FX_FLOAT> & charCounts,CFX_ArrayTemplate<int32_t> & intCharCounts,int32_t min,CFX_ArrayTemplate<uint8_t> & mins)322 int32_t CBC_HighLevelEncoder::findMinimums(
323     std::vector<FX_FLOAT>& charCounts,
324     CFX_ArrayTemplate<int32_t>& intCharCounts,
325     int32_t min,
326     CFX_ArrayTemplate<uint8_t>& mins) {
327   for (int32_t l = 0; l < mins.GetSize(); l++) {
328     mins[l] = (uint8_t)0;
329   }
330   for (int32_t i = 0; i < 6; i++) {
331     intCharCounts[i] = (int32_t)ceil(charCounts[i]);
332     int32_t current = intCharCounts[i];
333     if (min > current) {
334       min = current;
335       for (int32_t j = 0; j < mins.GetSize(); j++) {
336         mins[j] = (uint8_t)0;
337       }
338     }
339     if (min == current) {
340       mins[i]++;
341     }
342   }
343   return min;
344 }
getMinimumCount(CFX_ArrayTemplate<uint8_t> & mins)345 int32_t CBC_HighLevelEncoder::getMinimumCount(
346     CFX_ArrayTemplate<uint8_t>& mins) {
347   int32_t minCount = 0;
348   for (int32_t i = 0; i < 6; i++) {
349     minCount += mins[i];
350   }
351   return minCount;
352 }
isNativeC40(FX_WCHAR ch)353 bool CBC_HighLevelEncoder::isNativeC40(FX_WCHAR ch) {
354   return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z');
355 }
isNativeText(FX_WCHAR ch)356 bool CBC_HighLevelEncoder::isNativeText(FX_WCHAR ch) {
357   return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z');
358 }
isNativeX12(FX_WCHAR ch)359 bool CBC_HighLevelEncoder::isNativeX12(FX_WCHAR ch) {
360   return isX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') ||
361          (ch >= 'A' && ch <= 'Z');
362 }
isX12TermSep(FX_WCHAR ch)363 bool CBC_HighLevelEncoder::isX12TermSep(FX_WCHAR ch) {
364   return (ch == '\r') || (ch == '*') || (ch == '>');
365 }
isNativeEDIFACT(FX_WCHAR ch)366 bool CBC_HighLevelEncoder::isNativeEDIFACT(FX_WCHAR ch) {
367   return ch >= ' ' && ch <= '^';
368 }
isSpecialB256(FX_WCHAR ch)369 bool CBC_HighLevelEncoder::isSpecialB256(FX_WCHAR ch) {
370   return false;
371 }
372