1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2000-2003, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File writejava.c
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   01/11/02    Ram        Creation.
15 *******************************************************************************
16 */
17 #include "rle.h"
18 /**
19  * The ESCAPE character is used during run-length encoding.  It signals
20  * a run of identical chars.
21  */
22 static const uint16_t ESCAPE = 0xA5A5;
23 
24 /**
25  * The ESCAPE_BYTE character is used during run-length encoding.  It signals
26  * a run of identical bytes.
27  */
28 static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
29 
30 /**
31  * Append a byte to the given StringBuffer, packing two bytes into each
32  * character.  The state parameter maintains intermediary data between
33  * calls.
34  * @param state A two-element array, with state[0] == 0 if this is the
35  * first byte of a pair, or state[0] != 0 if this is the second byte
36  * of a pair, in which case state[1] is the first byte.
37  */
38 static uint16_t*
appendEncodedByte(uint16_t * buffer,uint16_t * buffLimit,uint8_t value,uint8_t state[],UErrorCode * status)39 appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
40     if(!status || U_FAILURE(*status)){
41         return NULL;
42     }
43     if (state[0] != 0) {
44         uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
45         if(buffer < buffLimit){
46             *buffer++ = c;
47         }else{
48             *status = U_BUFFER_OVERFLOW_ERROR;
49         }
50         state[0] = 0;
51         return buffer;
52     }
53     else {
54         state[0] = 1;
55         state[1] = value;
56         return buffer;
57     }
58 }
59 /**
60  * Encode a run, possibly a degenerate run (of < 4 values).
61  * @param length The length of the run; must be > 0 && <= 0xFF.
62  */
63 static uint16_t*
encodeRunByte(uint16_t * buffer,uint16_t * bufLimit,uint8_t value,int32_t length,uint8_t state[],UErrorCode * status)64 encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
65     if(!status || U_FAILURE(*status)){
66         return NULL;
67     }
68     if (length < 4) {
69         int32_t j=0;
70         for (; j<length; ++j) {
71             if (value == ESCAPE_BYTE) {
72                 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
73             }
74             buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
75         }
76     }
77     else {
78         if (length == ESCAPE_BYTE) {
79             if (value == ESCAPE_BYTE){
80                buffer =  appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
81             }
82             buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
83             --length;
84         }
85         buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
86         buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
87         buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
88     }
89     return buffer;
90 }
91 
92 #define APPEND( buffer, bufLimit, value, num, status){  \
93     if(buffer<bufLimit){                    \
94         *buffer++=(value);                  \
95     }else{                                  \
96         *status = U_BUFFER_OVERFLOW_ERROR;  \
97     }                                       \
98     num++;                                  \
99 }
100 
101 /**
102  * Encode a run, possibly a degenerate run (of < 4 values).
103  * @param length The length of the run; must be > 0 && <= 0xFFFF.
104  */
105 static uint16_t*
encodeRunShort(uint16_t * buffer,uint16_t * bufLimit,uint16_t value,int32_t length,UErrorCode * status)106 encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
107     int32_t num=0;
108     if (length < 4) {
109         int j=0;
110         for (; j<length; ++j) {
111             if (value == (int32_t) ESCAPE){
112                 APPEND(buffer,bufLimit,ESCAPE, num, status);
113 
114             }
115             APPEND(buffer,bufLimit,value,num, status);
116         }
117     }
118     else {
119         if (length == (int32_t) ESCAPE) {
120             if (value == (int32_t) ESCAPE){
121                 APPEND(buffer,bufLimit,ESCAPE,num,status);
122 
123             }
124             APPEND(buffer,bufLimit,value,num,status);
125             --length;
126         }
127         APPEND(buffer,bufLimit,ESCAPE,num,status);
128         APPEND(buffer,bufLimit,(uint16_t) length, num,status);
129         APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
130     }
131     return buffer;
132 }
133 
134 /**
135  * Construct a string representing a char array.  Use run-length encoding.
136  * A character represents itself, unless it is the ESCAPE character.  Then
137  * the following notations are possible:
138  *   ESCAPE ESCAPE   ESCAPE literal
139  *   ESCAPE n c      n instances of character c
140  * Since an encoded run occupies 3 characters, we only encode runs of 4 or
141  * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
142  * If we encounter a run where n == ESCAPE, we represent this as:
143  *   c ESCAPE n-1 c
144  * The ESCAPE value is chosen so as not to collide with commonly
145  * seen values.
146  */
147 int32_t
usArrayToRLEString(const uint16_t * src,int32_t srcLen,uint16_t * buffer,int32_t bufLen,UErrorCode * status)148 usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
149     uint16_t* bufLimit =  buffer+bufLen;
150     uint16_t* saveBuffer = buffer;
151     if(buffer < bufLimit){
152         *buffer++ =  (uint16_t)(srcLen>>16);
153         if(buffer<bufLimit){
154             uint16_t runValue = src[0];
155             int32_t runLength = 1;
156             int i=1;
157             *buffer++ = (uint16_t) srcLen;
158 
159             for (; i<srcLen; ++i) {
160                 uint16_t s = src[i];
161                 if (s == runValue && runLength < 0xFFFF){
162                     ++runLength;
163                 }else {
164                     buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
165                     runValue = s;
166                     runLength = 1;
167                 }
168             }
169             buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
170         }else{
171             *status = U_BUFFER_OVERFLOW_ERROR;
172         }
173     }else{
174         *status = U_BUFFER_OVERFLOW_ERROR;
175     }
176     return (int32_t)(buffer - saveBuffer);
177 }
178 
179 /**
180  * Construct a string representing a byte array.  Use run-length encoding.
181  * Two bytes are packed into a single char, with a single extra zero byte at
182  * the end if needed.  A byte represents itself, unless it is the
183  * ESCAPE_BYTE.  Then the following notations are possible:
184  *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
185  *   ESCAPE_BYTE n b           n instances of byte b
186  * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
187  * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
188  * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
189  *   b ESCAPE_BYTE n-1 b
190  * The ESCAPE_BYTE value is chosen so as not to collide with commonly
191  * seen values.
192  */
193 int32_t
byteArrayToRLEString(const uint8_t * src,int32_t srcLen,uint16_t * buffer,int32_t bufLen,UErrorCode * status)194 byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
195     const uint16_t* saveBuf = buffer;
196     uint16_t* bufLimit =  buffer+bufLen;
197     if(buffer < bufLimit){
198         *buffer++ = ((uint16_t) (srcLen >> 16));
199 
200         if(buffer<bufLimit){
201             uint8_t runValue = src[0];
202             int runLength = 1;
203             uint8_t state[2]= {0};
204             int i=1;
205             *buffer++=((uint16_t) srcLen);
206             for (; i<srcLen; ++i) {
207                 uint8_t b = src[i];
208                 if (b == runValue && runLength < 0xFF){
209                     ++runLength;
210                 }
211                 else {
212                     buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
213                     runValue = b;
214                     runLength = 1;
215                 }
216             }
217             buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
218 
219             /* We must save the final byte, if there is one, by padding
220              * an extra zero.
221              */
222             if (state[0] != 0) {
223                 buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
224             }
225         }else{
226             *status = U_BUFFER_OVERFLOW_ERROR;
227         }
228     }else{
229         *status = U_BUFFER_OVERFLOW_ERROR;
230     }
231     return (int32_t) (buffer - saveBuf);
232 }
233 
234 
235 /**
236  * Construct an array of shorts from a run-length encoded string.
237  */
238 int32_t
rleStringToUCharArray(uint16_t * src,int32_t srcLen,uint16_t * target,int32_t tgtLen,UErrorCode * status)239 rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
240     int32_t length = 0;
241     int32_t ai = 0;
242     int i=2;
243 
244     if(!status || U_FAILURE(*status)){
245         return 0;
246     }
247     /* the source is null terminated */
248     if(srcLen == -1){
249         srcLen = u_strlen(src);
250     }
251     if(srcLen <= 2){
252         return 2;
253     }
254     length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
255 
256     if(target == NULL){
257         return length;
258     }
259     if(tgtLen < length){
260         *status = U_BUFFER_OVERFLOW_ERROR;
261         return length;
262     }
263 
264     for (; i<srcLen; ++i) {
265         uint16_t c = src[i];
266         if (c == ESCAPE) {
267             c = src[++i];
268             if (c == ESCAPE) {
269                 target[ai++] = c;
270             } else {
271                 int32_t runLength = (int32_t) c;
272                 uint16_t runValue = src[++i];
273                 int j=0;
274                 for (; j<runLength; ++j) {
275                     target[ai++] = runValue;
276                 }
277             }
278         }
279         else {
280             target[ai++] = c;
281         }
282     }
283 
284     if (ai != length){
285         *status = U_INTERNAL_PROGRAM_ERROR;
286     }
287 
288     return length;
289 }
290 
291 /**
292  * Construct an array of bytes from a run-length encoded string.
293  */
294 int32_t
rleStringToByteArray(uint16_t * src,int32_t srcLen,uint8_t * target,int32_t tgtLen,UErrorCode * status)295 rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
296 
297     int32_t length = 0;
298     UBool nextChar = TRUE;
299     uint16_t c = 0;
300     int32_t node = 0;
301     int32_t runLength = 0;
302     int32_t i = 2;
303     int32_t ai=0;
304 
305     if(!status || U_FAILURE(*status)){
306         return 0;
307     }
308     /* the source is null terminated */
309     if(srcLen == -1){
310         srcLen = u_strlen(src);
311     }
312     if(srcLen <= 2){
313         return 2;
314     }
315     length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
316 
317     if(target == NULL){
318         return length;
319     }
320     if(tgtLen < length){
321         *status = U_BUFFER_OVERFLOW_ERROR;
322         return length;
323     }
324 
325     for (; ai<tgtLen; ) {
326        /* This part of the loop places the next byte into the local
327         * variable 'b' each time through the loop.  It keeps the
328         * current character in 'c' and uses the boolean 'nextChar'
329         * to see if we've taken both bytes out of 'c' yet.
330         */
331         uint8_t b;
332         if (nextChar) {
333             c = src[i++];
334             b = (uint8_t) (c >> 8);
335             nextChar = FALSE;
336         }
337         else {
338             b = (uint8_t) (c & 0xFF);
339             nextChar = TRUE;
340         }
341 
342        /* This part of the loop is a tiny state machine which handles
343         * the parsing of the run-length encoding.  This would be simpler
344         * if we could look ahead, but we can't, so we use 'node' to
345         * move between three nodes in the state machine.
346         */
347         switch (node) {
348         case 0:
349             /* Normal idle node */
350             if (b == ESCAPE_BYTE) {
351                 node = 1;
352             }
353             else {
354                 target[ai++] = b;
355             }
356             break;
357         case 1:
358            /* We have seen one ESCAPE_BYTE; we expect either a second
359             * one, or a run length and value.
360             */
361             if (b == ESCAPE_BYTE) {
362                 target[ai++] = ESCAPE_BYTE;
363                 node = 0;
364             }
365             else {
366                 runLength = b;
367                 node = 2;
368             }
369             break;
370         case 2:
371             {
372                 int j=0;
373                /* We have seen an ESCAPE_BYTE and length byte.  We interpret
374                 * the next byte as the value to be repeated.
375                 */
376                 for (; j<runLength; ++j){
377                     if(ai<tgtLen){
378                         target[ai++] = b;
379                     }else{
380                         *status = U_BUFFER_OVERFLOW_ERROR;
381                         return ai;
382                     }
383                 }
384                 node = 0;
385                 break;
386             }
387         }
388     }
389 
390     if (node != 0){
391         *status = U_INTERNAL_PROGRAM_ERROR;
392         /*("Bad run-length encoded byte array")*/
393         return 0;
394     }
395 
396 
397     if (i != srcLen){
398         /*("Excess data in RLE byte array string");*/
399         *status = U_INTERNAL_PROGRAM_ERROR;
400         return ai;
401     }
402 
403     return ai;
404 }
405 
406