1 /* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)
2 2018-07-04 : Igor Pavlov : Public domain */
3 
4 #include "Precomp.h"
5 
6 /* #define SHOW_STAT */
7 
8 #ifdef SHOW_STAT
9 #include <stdio.h>
10 #define PRF(x) x
11 #else
12 #define PRF(x)
13 #endif
14 
15 #include <string.h>
16 
17 #include "Bcj2.h"
18 #include "CpuArch.h"
19 
20 #define CProb UInt16
21 
22 #define kTopValue ((UInt32)1 << 24)
23 #define kNumModelBits 11
24 #define kBitModelTotal (1 << kNumModelBits)
25 #define kNumMoveBits 5
26 
Bcj2Enc_Init(CBcj2Enc * p)27 void Bcj2Enc_Init(CBcj2Enc *p)
28 {
29   unsigned i;
30 
31   p->state = BCJ2_ENC_STATE_OK;
32   p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
33 
34   p->prevByte = 0;
35 
36   p->cache = 0;
37   p->range = 0xFFFFFFFF;
38   p->low = 0;
39   p->cacheSize = 1;
40 
41   p->ip = 0;
42 
43   p->fileIp = 0;
44   p->fileSize = 0;
45   p->relatLimit = BCJ2_RELAT_LIMIT;
46 
47   p->tempPos = 0;
48 
49   p->flushPos = 0;
50 
51   for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
52     p->probs[i] = kBitModelTotal >> 1;
53 }
54 
RangeEnc_ShiftLow(CBcj2Enc * p)55 static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)
56 {
57   if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0)
58   {
59     Byte *buf = p->bufs[BCJ2_STREAM_RC];
60     do
61     {
62       if (buf == p->lims[BCJ2_STREAM_RC])
63       {
64         p->state = BCJ2_STREAM_RC;
65         p->bufs[BCJ2_STREAM_RC] = buf;
66         return True;
67       }
68       *buf++ = (Byte)(p->cache + (Byte)(p->low >> 32));
69       p->cache = 0xFF;
70     }
71     while (--p->cacheSize);
72     p->bufs[BCJ2_STREAM_RC] = buf;
73     p->cache = (Byte)((UInt32)p->low >> 24);
74   }
75   p->cacheSize++;
76   p->low = (UInt32)p->low << 8;
77   return False;
78 }
79 
Bcj2Enc_Encode_2(CBcj2Enc * p)80 static void Bcj2Enc_Encode_2(CBcj2Enc *p)
81 {
82   if (BCJ2_IS_32BIT_STREAM(p->state))
83   {
84     Byte *cur = p->bufs[p->state];
85     if (cur == p->lims[p->state])
86       return;
87     SetBe32(cur, p->tempTarget);
88     p->bufs[p->state] = cur + 4;
89   }
90 
91   p->state = BCJ2_ENC_STATE_ORIG;
92 
93   for (;;)
94   {
95     if (p->range < kTopValue)
96     {
97       if (RangeEnc_ShiftLow(p))
98         return;
99       p->range <<= 8;
100     }
101 
102     {
103       {
104         const Byte *src = p->src;
105         const Byte *srcLim;
106         Byte *dest;
107         SizeT num = p->srcLim - src;
108 
109         if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
110         {
111           if (num <= 4)
112             return;
113           num -= 4;
114         }
115         else if (num == 0)
116           break;
117 
118         dest = p->bufs[BCJ2_STREAM_MAIN];
119         if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest))
120         {
121           num = p->lims[BCJ2_STREAM_MAIN] - dest;
122           if (num == 0)
123           {
124             p->state = BCJ2_STREAM_MAIN;
125             return;
126           }
127         }
128 
129         srcLim = src + num;
130 
131         if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80)
132           *dest = src[0];
133         else for (;;)
134         {
135           Byte b = *src;
136           *dest = b;
137           if (b != 0x0F)
138           {
139             if ((b & 0xFE) == 0xE8)
140               break;
141             dest++;
142             if (++src != srcLim)
143               continue;
144             break;
145           }
146           dest++;
147           if (++src == srcLim)
148             break;
149           if ((*src & 0xF0) != 0x80)
150             continue;
151           *dest = *src;
152           break;
153         }
154 
155         num = src - p->src;
156 
157         if (src == srcLim)
158         {
159           p->prevByte = src[-1];
160           p->bufs[BCJ2_STREAM_MAIN] = dest;
161           p->src = src;
162           p->ip += (UInt32)num;
163           continue;
164         }
165 
166         {
167           Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]);
168           BoolInt needConvert;
169 
170           p->bufs[BCJ2_STREAM_MAIN] = dest + 1;
171           p->ip += (UInt32)num + 1;
172           src++;
173 
174           needConvert = False;
175 
176           if ((SizeT)(p->srcLim - src) >= 4)
177           {
178             UInt32 relatVal = GetUi32(src);
179             if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize)
180                 && ((relatVal + p->relatLimit) >> 1) < p->relatLimit)
181               needConvert = True;
182           }
183 
184           {
185             UInt32 bound;
186             unsigned ttt;
187             Byte b = src[-1];
188             CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0));
189 
190             ttt = *prob;
191             bound = (p->range >> kNumModelBits) * ttt;
192 
193             if (!needConvert)
194             {
195               p->range = bound;
196               *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
197               p->src = src;
198               p->prevByte = b;
199               continue;
200             }
201 
202             p->low += bound;
203             p->range -= bound;
204             *prob = (CProb)(ttt - (ttt >> kNumMoveBits));
205 
206             {
207               UInt32 relatVal = GetUi32(src);
208               UInt32 absVal;
209               p->ip += 4;
210               absVal = p->ip + relatVal;
211               p->prevByte = src[3];
212               src += 4;
213               p->src = src;
214               {
215                 unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
216                 Byte *cur = p->bufs[cj];
217                 if (cur == p->lims[cj])
218                 {
219                   p->state = cj;
220                   p->tempTarget = absVal;
221                   return;
222                 }
223                 SetBe32(cur, absVal);
224                 p->bufs[cj] = cur + 4;
225               }
226             }
227           }
228         }
229       }
230     }
231   }
232 
233   if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
234     return;
235 
236   for (; p->flushPos < 5; p->flushPos++)
237     if (RangeEnc_ShiftLow(p))
238       return;
239   p->state = BCJ2_ENC_STATE_OK;
240 }
241 
242 
Bcj2Enc_Encode(CBcj2Enc * p)243 void Bcj2Enc_Encode(CBcj2Enc *p)
244 {
245   PRF(printf("\n"));
246   PRF(printf("---- ip = %8d   tempPos = %8d   src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
247 
248   if (p->tempPos != 0)
249   {
250     unsigned extra = 0;
251 
252     for (;;)
253     {
254       const Byte *src = p->src;
255       const Byte *srcLim = p->srcLim;
256       unsigned finishMode = p->finishMode;
257 
258       p->src = p->temp;
259       p->srcLim = p->temp + p->tempPos;
260       if (src != srcLim)
261         p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
262 
263       PRF(printf("     ip = %8d   tempPos = %8d   src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
264 
265       Bcj2Enc_Encode_2(p);
266 
267       {
268         unsigned num = (unsigned)(p->src - p->temp);
269         unsigned tempPos = p->tempPos - num;
270         unsigned i;
271         p->tempPos = tempPos;
272         for (i = 0; i < tempPos; i++)
273           p->temp[i] = p->temp[(size_t)i + num];
274 
275         p->src = src;
276         p->srcLim = srcLim;
277         p->finishMode = finishMode;
278 
279         if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim)
280           return;
281 
282         if (extra >= tempPos)
283         {
284           p->src = src - tempPos;
285           p->tempPos = 0;
286           break;
287         }
288 
289         p->temp[tempPos] = src[0];
290         p->tempPos = tempPos + 1;
291         p->src = src + 1;
292         extra++;
293       }
294     }
295   }
296 
297   PRF(printf("++++ ip = %8d   tempPos = %8d   src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
298 
299   Bcj2Enc_Encode_2(p);
300 
301   if (p->state == BCJ2_ENC_STATE_ORIG)
302   {
303     const Byte *src = p->src;
304     unsigned rem = (unsigned)(p->srcLim - src);
305     unsigned i;
306     for (i = 0; i < rem; i++)
307       p->temp[i] = src[i];
308     p->tempPos = rem;
309     p->src = src + rem;
310   }
311 }
312