1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8  * Copyright 2006-2007 Jeremias Maerki.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  *      http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include "fxbarcode/datamatrix/BC_HighLevelEncoder.h"
24 
25 #include <algorithm>
26 #include <array>
27 #include <limits>
28 #include <memory>
29 #include <vector>
30 
31 #include "core/fxcrt/fx_extension.h"
32 #include "fxbarcode/common/BC_CommonBitMatrix.h"
33 #include "fxbarcode/datamatrix/BC_ASCIIEncoder.h"
34 #include "fxbarcode/datamatrix/BC_Base256Encoder.h"
35 #include "fxbarcode/datamatrix/BC_C40Encoder.h"
36 #include "fxbarcode/datamatrix/BC_EdifactEncoder.h"
37 #include "fxbarcode/datamatrix/BC_Encoder.h"
38 #include "fxbarcode/datamatrix/BC_EncoderContext.h"
39 #include "fxbarcode/datamatrix/BC_SymbolInfo.h"
40 #include "fxbarcode/datamatrix/BC_TextEncoder.h"
41 #include "fxbarcode/datamatrix/BC_X12Encoder.h"
42 #include "third_party/base/ptr_util.h"
43 
44 namespace {
45 
46 const wchar_t kPad = 129;
47 const wchar_t kMacro05 = 236;
48 const wchar_t kMacro06 = 237;
49 const wchar_t kMacro05Header[] =
50     L"[)>\036"
51     L"05";
52 const wchar_t kMacro06Header[] =
53     L"[)>\036"
54     L"06";
55 const wchar_t kMacroTrailer = 0x0004;
56 
57 constexpr size_t kEncoderCount =
58     static_cast<size_t>(CBC_HighLevelEncoder::Encoding::LAST) + 1;
59 static_assert(kEncoderCount == 6, "Bad encoder count");
60 
Randomize253State(wchar_t ch,int32_t codewordPosition)61 wchar_t Randomize253State(wchar_t ch, int32_t codewordPosition) {
62   int32_t pseudoRandom = ((149 * codewordPosition) % 253) + 1;
63   int32_t tempVariable = ch + pseudoRandom;
64   return tempVariable <= 254 ? static_cast<wchar_t>(tempVariable)
65                              : static_cast<wchar_t>(tempVariable - 254);
66 }
67 
FindMinimums(const std::array<float,kEncoderCount> & charCounts,std::array<int32_t,kEncoderCount> * intCharCounts,std::array<uint8_t,kEncoderCount> * mins)68 int32_t FindMinimums(const std::array<float, kEncoderCount>& charCounts,
69                      std::array<int32_t, kEncoderCount>* intCharCounts,
70                      std::array<uint8_t, kEncoderCount>* mins) {
71   int32_t min = std::numeric_limits<int32_t>::max();
72   for (size_t i = 0; i < kEncoderCount; ++i) {
73     int32_t current = static_cast<int32_t>(ceil(charCounts[i]));
74     (*intCharCounts)[i] = current;
75     if (min > current) {
76       min = current;
77       for (auto& m : *mins)
78         m = 0;
79     }
80     if (min == current)
81       (*mins)[i]++;
82   }
83   return min;
84 }
85 
GetMinimumCount(const std::array<uint8_t,kEncoderCount> & mins)86 int32_t GetMinimumCount(const std::array<uint8_t, kEncoderCount>& mins) {
87   int32_t count = 0;
88   for (const auto& m : mins)
89     count += m;
90   return count;
91 }
92 
IsNativeC40(wchar_t ch)93 bool IsNativeC40(wchar_t ch) {
94   return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z');
95 }
96 
IsNativeText(wchar_t ch)97 bool IsNativeText(wchar_t ch) {
98   return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z');
99 }
100 
IsX12TermSep(wchar_t ch)101 bool IsX12TermSep(wchar_t ch) {
102   return (ch == '\r') || (ch == '*') || (ch == '>');
103 }
104 
IsNativeX12(wchar_t ch)105 bool IsNativeX12(wchar_t ch) {
106   return IsX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') ||
107          (ch >= 'A' && ch <= 'Z');
108 }
109 
IsNativeEDIFACT(wchar_t ch)110 bool IsNativeEDIFACT(wchar_t ch) {
111   return ch >= ' ' && ch <= '^';
112 }
113 
EncoderIndex(CBC_HighLevelEncoder::Encoding encoding)114 size_t EncoderIndex(CBC_HighLevelEncoder::Encoding encoding) {
115   ASSERT(encoding != CBC_HighLevelEncoder::Encoding::UNKNOWN);
116   return static_cast<size_t>(encoding);
117 }
118 
119 }  // namespace
120 
121 // static
EncodeHighLevel(const WideString & msg)122 WideString CBC_HighLevelEncoder::EncodeHighLevel(const WideString& msg) {
123   // Per spec. Alpha numeric input is even shorter.
124   static constexpr size_t kMaxNumericInputLength = 3116;
125 
126   // Exit early if the input is too long. It will fail no matter what.
127   if (msg.GetLength() > kMaxNumericInputLength)
128     return WideString();
129 
130   CBC_EncoderContext context(msg);
131   if (context.HasCharactersOutsideISO88591Encoding())
132     return WideString();
133 
134   if (msg.Back() == kMacroTrailer) {
135     WideString left = msg.First(6);
136     if (left == kMacro05Header) {
137       context.writeCodeword(kMacro05);
138       context.setSkipAtEnd(2);
139       context.m_pos += 6;
140     } else if (left == kMacro06Header) {
141       context.writeCodeword(kMacro06);
142       context.setSkipAtEnd(2);
143       context.m_pos += 6;
144     }
145   }
146 
147   std::vector<std::unique_ptr<CBC_Encoder>> encoders;
148   encoders.push_back(pdfium::MakeUnique<CBC_ASCIIEncoder>());
149   encoders.push_back(pdfium::MakeUnique<CBC_C40Encoder>());
150   encoders.push_back(pdfium::MakeUnique<CBC_TextEncoder>());
151   encoders.push_back(pdfium::MakeUnique<CBC_X12Encoder>());
152   encoders.push_back(pdfium::MakeUnique<CBC_EdifactEncoder>());
153   encoders.push_back(pdfium::MakeUnique<CBC_Base256Encoder>());
154   Encoding encodingMode = Encoding::ASCII;
155   while (context.hasMoreCharacters()) {
156     if (!encoders[EncoderIndex(encodingMode)]->Encode(&context))
157       return WideString();
158 
159     if (context.m_newEncoding != Encoding::UNKNOWN) {
160       encodingMode = context.m_newEncoding;
161       context.ResetEncoderSignal();
162     }
163   }
164   size_t len = context.m_codewords.GetLength();
165   if (!context.UpdateSymbolInfo())
166     return WideString();
167 
168   size_t capacity = context.m_symbolInfo->dataCapacity();
169   if (len < capacity) {
170     if (encodingMode != Encoding::ASCII && encodingMode != Encoding::BASE256)
171       context.writeCodeword(0x00fe);
172   }
173   WideString codewords = context.m_codewords;
174   if (codewords.GetLength() < capacity)
175     codewords += kPad;
176 
177   while (codewords.GetLength() < capacity)
178     codewords += Randomize253State(kPad, codewords.GetLength() + 1);
179 
180   ASSERT(!codewords.IsEmpty());
181   return codewords;
182 }
183 
184 // static
LookAheadTest(const WideString & msg,size_t startpos,CBC_HighLevelEncoder::Encoding currentMode)185 CBC_HighLevelEncoder::Encoding CBC_HighLevelEncoder::LookAheadTest(
186     const WideString& msg,
187     size_t startpos,
188     CBC_HighLevelEncoder::Encoding currentMode) {
189   if (startpos >= msg.GetLength())
190     return currentMode;
191 
192   std::array<float, kEncoderCount> charCounts;
193   if (currentMode == Encoding::ASCII) {
194     charCounts = {0, 1, 1, 1, 1, 1.25f};
195   } else {
196     charCounts = {1, 2, 2, 2, 2, 2.25f};
197     charCounts[EncoderIndex(currentMode)] = 0;
198   }
199 
200   size_t charsProcessed = 0;
201   while (true) {
202     if ((startpos + charsProcessed) == msg.GetLength()) {
203       std::array<int32_t, kEncoderCount> intCharCounts;
204       std::array<uint8_t, kEncoderCount> mins;
205       int32_t min = FindMinimums(charCounts, &intCharCounts, &mins);
206       if (intCharCounts[EncoderIndex(Encoding::ASCII)] == min)
207         return Encoding::ASCII;
208       const int32_t minCount = GetMinimumCount(mins);
209       if (minCount == 1) {
210         if (mins[EncoderIndex(Encoding::BASE256)] > 0)
211           return Encoding::BASE256;
212         if (mins[EncoderIndex(Encoding::EDIFACT)] > 0)
213           return Encoding::EDIFACT;
214         if (mins[EncoderIndex(Encoding::TEXT)] > 0)
215           return Encoding::TEXT;
216         if (mins[EncoderIndex(Encoding::X12)] > 0)
217           return Encoding::X12;
218       }
219       return Encoding::C40;
220     }
221 
222     wchar_t c = msg[startpos + charsProcessed];
223     charsProcessed++;
224     {
225       auto& count = charCounts[EncoderIndex(Encoding::ASCII)];
226       if (FXSYS_IsDecimalDigit(c))
227         count += 0.5;
228       else if (IsExtendedASCII(c))
229         count = ceilf(count) + 2;
230       else
231         count = ceilf(count) + 1;
232     }
233 
234     {
235       auto& count = charCounts[EncoderIndex(Encoding::C40)];
236       if (IsNativeC40(c))
237         count += 2.0f / 3.0f;
238       else if (IsExtendedASCII(c))
239         count += 8.0f / 3.0f;
240       else
241         count += 4.0f / 3.0f;
242     }
243 
244     {
245       auto& count = charCounts[EncoderIndex(Encoding::TEXT)];
246       if (IsNativeText(c))
247         count += 2.0f / 3.0f;
248       else if (IsExtendedASCII(c))
249         count += 8.0f / 3.0f;
250       else
251         count += 4.0f / 3.0f;
252     }
253 
254     {
255       auto& count = charCounts[EncoderIndex(Encoding::X12)];
256       if (IsNativeX12(c))
257         count += 2.0f / 3.0f;
258       else if (IsExtendedASCII(c))
259         count += 13.0f / 3.0f;
260       else
261         count += 10.0f / 3.0f;
262     }
263 
264     {
265       auto& count = charCounts[EncoderIndex(Encoding::EDIFACT)];
266       if (IsNativeEDIFACT(c))
267         count += 3.0f / 4.0f;
268       else if (IsExtendedASCII(c))
269         count += 17.0f / 4.0f;
270       else
271         count += 13.0f / 4.0f;
272     }
273 
274     charCounts[EncoderIndex(Encoding::BASE256)]++;
275     if (charsProcessed < 4)
276       continue;
277 
278     std::array<int32_t, kEncoderCount> intCharCounts;
279     std::array<uint8_t, kEncoderCount> mins;
280     FindMinimums(charCounts, &intCharCounts, &mins);
281     int32_t minCount = GetMinimumCount(mins);
282     int32_t ascii_count = intCharCounts[EncoderIndex(Encoding::ASCII)];
283     int32_t c40_count = intCharCounts[EncoderIndex(Encoding::C40)];
284     int32_t text_count = intCharCounts[EncoderIndex(Encoding::TEXT)];
285     int32_t x12_count = intCharCounts[EncoderIndex(Encoding::X12)];
286     int32_t editfact_count = intCharCounts[EncoderIndex(Encoding::EDIFACT)];
287     int32_t base256_count = intCharCounts[EncoderIndex(Encoding::BASE256)];
288     int32_t bet_min = std::min({base256_count, editfact_count, text_count});
289     if (ascii_count < bet_min && ascii_count < c40_count &&
290         ascii_count < x12_count) {
291       return Encoding::ASCII;
292     }
293     if (base256_count < ascii_count ||
294         (mins[EncoderIndex(Encoding::C40)] +
295          mins[EncoderIndex(Encoding::TEXT)] +
296          mins[EncoderIndex(Encoding::X12)] +
297          mins[EncoderIndex(Encoding::EDIFACT)]) == 0) {
298       return Encoding::BASE256;
299     }
300     if (minCount == 1) {
301       if (mins[EncoderIndex(Encoding::EDIFACT)] > 0)
302         return Encoding::EDIFACT;
303       if (mins[EncoderIndex(Encoding::TEXT)] > 0)
304         return Encoding::TEXT;
305       if (mins[EncoderIndex(Encoding::X12)] > 0)
306         return Encoding::X12;
307     }
308     if (c40_count + 1 < ascii_count && c40_count + 1 < bet_min) {
309       if (c40_count < x12_count)
310         return Encoding::C40;
311       if (c40_count == x12_count) {
312         size_t p = startpos + charsProcessed + 1;
313         while (p < msg.GetLength()) {
314           wchar_t tc = msg[p];
315           if (IsX12TermSep(tc))
316             return Encoding::X12;
317           if (!IsNativeX12(tc))
318             break;
319           p++;
320         }
321         return Encoding::C40;
322       }
323     }
324   }
325 }
326 
327 // static
IsExtendedASCII(wchar_t ch)328 bool CBC_HighLevelEncoder::IsExtendedASCII(wchar_t ch) {
329   return ch >= 128 && ch <= 255;
330 }
331