1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 **      each line encodes 45 bytes (except possibly the last)
7 **      First char encodes (binary) length, rest data
8 **      each char encodes 6 bits, as follows:
9 **      binary: 01234567 abcdefgh ijklmnop
10 **      ascii:  012345 67abcd efghij klmnop
11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 **      short binary data is zero-extended (so the bits are always in the
13 **      right place), this does *not* reflect in the length.
14 ** base64:
15 **      Line breaks are insignificant, but lines are at most 76 chars
16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 **      is done via a table.
18 **      Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 **      File starts with introductory text, real data starts and ends
21 **      with colons.
22 **      Data consists of three similar parts: info, datafork, resourcefork.
23 **      Each part is protected (at the end) with a 16-bit crc
24 **      The binary data is run-length encoded, and then ascii-fied:
25 **      binary: 01234567 abcdefgh ijklmnop
26 **      ascii:  012345 67abcd efghij klmnop
27 **      ASCII encoding is table-driven, see the code.
28 **      Short binary data results in the runt ascii-byte being output with
29 **      the bits in the right place.
30 **
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
33 **
34 **      Programs that encode binary data in ASCII are written in
35 **      such a style that they are as unreadable as possible. Devices used
36 **      include unnecessary global variables, burying important tables
37 **      in unrelated sourcefiles, putting functions in include files,
38 **      using seemingly-descriptive variable names for different purposes,
39 **      calls to empty subroutines and a host of others.
40 **
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
43 **
44 ** Jack Jansen, CWI, July 1995.
45 **
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character.  It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
52 **
53 ** Brandon Long, September 2001.
54 */
55 
56 #define PY_SSIZE_T_CLEAN
57 
58 #include "Python.h"
59 #include "pystrhex.h"
60 #ifdef USE_ZLIB_CRC32
61 #include "zlib.h"
62 #endif
63 
64 static PyObject *Error;
65 static PyObject *Incomplete;
66 
67 /*
68 ** hqx lookup table, ascii->binary.
69 */
70 
71 #define RUNCHAR 0x90
72 
73 #define DONE 0x7F
74 #define SKIP 0x7E
75 #define FAIL 0x7D
76 
77 static const unsigned char table_a2b_hqx[256] = {
78 /*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
79 /* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
80 /*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
81 /* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
82 /*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
83 /* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
84 /*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
85 /* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
86 /*              !     "     #     $     %     &     '   */
87 /* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
88 /*        (     )     *     +     ,     -     .     /   */
89 /* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
90 /*        0     1     2     3     4     5     6     7   */
91 /* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
92 /*        8     9     :     ;     <     =     >     ?   */
93 /* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
94 /*        @     A     B     C     D     E     F     G   */
95 /* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
96 /*        H     I     J     K     L     M     N     O   */
97 /* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
98 /*        P     Q     R     S     T     U     V     W   */
99 /*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
100 /*        X     Y     Z     [     \     ]     ^     _   */
101 /*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
102 /*        `     a     b     c     d     e     f     g   */
103 /*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
104 /*        h     i     j     k     l     m     n     o   */
105 /*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
106 /*        p     q     r     s     t     u     v     w   */
107 /*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
108 /*        x     y     z     {     |     }     ~    ^?   */
109 /*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 /*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
126 };
127 
128 static const unsigned char table_b2a_hqx[] =
129 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
130 
131 static const char table_a2b_base64[] = {
132     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
134     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
135     52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
136     -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
137     15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
138     -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
139     41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
140 };
141 
142 #define BASE64_PAD '='
143 
144 /* Max binary chunk size; limited only by available memory */
145 #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
146 
147 static const unsigned char table_b2a_base64[] =
148 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
149 
150 
151 
152 static const unsigned short crctab_hqx[256] = {
153     0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
154     0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
155     0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
156     0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
157     0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
158     0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
159     0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
160     0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
161     0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
162     0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
163     0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
164     0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
165     0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
166     0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
167     0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
168     0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
169     0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
170     0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
171     0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
172     0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
173     0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
174     0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
175     0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
176     0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
177     0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
178     0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
179     0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
180     0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
181     0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
182     0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
183     0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
184     0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
185 };
186 
187 /*[clinic input]
188 module binascii
189 [clinic start generated code]*/
190 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
191 
192 /*[python input]
193 
194 class ascii_buffer_converter(CConverter):
195     type = 'Py_buffer'
196     converter = 'ascii_buffer_converter'
197     impl_by_reference = True
198     c_default = "{NULL, NULL}"
199 
200     def cleanup(self):
201         name = self.name
202         return "".join(["if (", name, ".obj)\n   PyBuffer_Release(&", name, ");\n"])
203 
204 [python start generated code]*/
205 /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
206 
207 static int
ascii_buffer_converter(PyObject * arg,Py_buffer * buf)208 ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
209 {
210     if (arg == NULL) {
211         PyBuffer_Release(buf);
212         return 1;
213     }
214     if (PyUnicode_Check(arg)) {
215         if (PyUnicode_READY(arg) < 0)
216             return 0;
217         if (!PyUnicode_IS_ASCII(arg)) {
218             PyErr_SetString(PyExc_ValueError,
219                             "string argument should contain only ASCII characters");
220             return 0;
221         }
222         assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
223         buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
224         buf->len = PyUnicode_GET_LENGTH(arg);
225         buf->obj = NULL;
226         return 1;
227     }
228     if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
229         PyErr_Format(PyExc_TypeError,
230                      "argument should be bytes, buffer or ASCII string, "
231                      "not '%.100s'", Py_TYPE(arg)->tp_name);
232         return 0;
233     }
234     if (!PyBuffer_IsContiguous(buf, 'C')) {
235         PyErr_Format(PyExc_TypeError,
236                      "argument should be a contiguous buffer, "
237                      "not '%.100s'", Py_TYPE(arg)->tp_name);
238         PyBuffer_Release(buf);
239         return 0;
240     }
241     return Py_CLEANUP_SUPPORTED;
242 }
243 
244 #include "clinic/binascii.c.h"
245 
246 /*[clinic input]
247 binascii.a2b_uu
248 
249     data: ascii_buffer
250     /
251 
252 Decode a line of uuencoded data.
253 [clinic start generated code]*/
254 
255 static PyObject *
binascii_a2b_uu_impl(PyObject * module,Py_buffer * data)256 binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
257 /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
258 {
259     const unsigned char *ascii_data;
260     unsigned char *bin_data;
261     int leftbits = 0;
262     unsigned char this_ch;
263     unsigned int leftchar = 0;
264     PyObject *rv;
265     Py_ssize_t ascii_len, bin_len;
266 
267     ascii_data = data->buf;
268     ascii_len = data->len;
269 
270     assert(ascii_len >= 0);
271 
272     /* First byte: binary data length (in bytes) */
273     bin_len = (*ascii_data++ - ' ') & 077;
274     ascii_len--;
275 
276     /* Allocate the buffer */
277     if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
278         return NULL;
279     bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
280 
281     for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
282         /* XXX is it really best to add NULs if there's no more data */
283         this_ch = (ascii_len > 0) ? *ascii_data : 0;
284         if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
285             /*
286             ** Whitespace. Assume some spaces got eaten at
287             ** end-of-line. (We check this later)
288             */
289             this_ch = 0;
290         } else {
291             /* Check the character for legality
292             ** The 64 in stead of the expected 63 is because
293             ** there are a few uuencodes out there that use
294             ** '`' as zero instead of space.
295             */
296             if ( this_ch < ' ' || this_ch > (' ' + 64)) {
297                 PyErr_SetString(Error, "Illegal char");
298                 Py_DECREF(rv);
299                 return NULL;
300             }
301             this_ch = (this_ch - ' ') & 077;
302         }
303         /*
304         ** Shift it in on the low end, and see if there's
305         ** a byte ready for output.
306         */
307         leftchar = (leftchar << 6) | (this_ch);
308         leftbits += 6;
309         if ( leftbits >= 8 ) {
310             leftbits -= 8;
311             *bin_data++ = (leftchar >> leftbits) & 0xff;
312             leftchar &= ((1 << leftbits) - 1);
313             bin_len--;
314         }
315     }
316     /*
317     ** Finally, check that if there's anything left on the line
318     ** that it's whitespace only.
319     */
320     while( ascii_len-- > 0 ) {
321         this_ch = *ascii_data++;
322         /* Extra '`' may be written as padding in some cases */
323         if ( this_ch != ' ' && this_ch != ' '+64 &&
324              this_ch != '\n' && this_ch != '\r' ) {
325             PyErr_SetString(Error, "Trailing garbage");
326             Py_DECREF(rv);
327             return NULL;
328         }
329     }
330     return rv;
331 }
332 
333 /*[clinic input]
334 binascii.b2a_uu
335 
336     data: Py_buffer
337     /
338     *
339     backtick: bool(accept={int}) = False
340 
341 Uuencode line of data.
342 [clinic start generated code]*/
343 
344 static PyObject *
binascii_b2a_uu_impl(PyObject * module,Py_buffer * data,int backtick)345 binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
346 /*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
347 {
348     unsigned char *ascii_data;
349     const unsigned char *bin_data;
350     int leftbits = 0;
351     unsigned char this_ch;
352     unsigned int leftchar = 0;
353     Py_ssize_t bin_len, out_len;
354     _PyBytesWriter writer;
355 
356     _PyBytesWriter_Init(&writer);
357     bin_data = data->buf;
358     bin_len = data->len;
359     if ( bin_len > 45 ) {
360         /* The 45 is a limit that appears in all uuencode's */
361         PyErr_SetString(Error, "At most 45 bytes at once");
362         return NULL;
363     }
364 
365     /* We're lazy and allocate to much (fixed up later) */
366     out_len = 2 + (bin_len + 2) / 3 * 4;
367     ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
368     if (ascii_data == NULL)
369         return NULL;
370 
371     /* Store the length */
372     if (backtick && !bin_len)
373         *ascii_data++ = '`';
374     else
375         *ascii_data++ = ' ' + (unsigned char)bin_len;
376 
377     for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
378         /* Shift the data (or padding) into our buffer */
379         if ( bin_len > 0 )              /* Data */
380             leftchar = (leftchar << 8) | *bin_data;
381         else                            /* Padding */
382             leftchar <<= 8;
383         leftbits += 8;
384 
385         /* See if there are 6-bit groups ready */
386         while ( leftbits >= 6 ) {
387             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
388             leftbits -= 6;
389             if (backtick && !this_ch)
390                 *ascii_data++ = '`';
391             else
392                 *ascii_data++ = this_ch + ' ';
393         }
394     }
395     *ascii_data++ = '\n';       /* Append a courtesy newline */
396 
397     return _PyBytesWriter_Finish(&writer, ascii_data);
398 }
399 
400 
401 static int
binascii_find_valid(const unsigned char * s,Py_ssize_t slen,int num)402 binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num)
403 {
404     /* Finds & returns the (num+1)th
405     ** valid character for base64, or -1 if none.
406     */
407 
408     int ret = -1;
409     unsigned char c, b64val;
410 
411     while ((slen > 0) && (ret == -1)) {
412         c = *s;
413         b64val = table_a2b_base64[c & 0x7f];
414         if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
415             if (num == 0)
416                 ret = *s;
417             num--;
418         }
419 
420         s++;
421         slen--;
422     }
423     return ret;
424 }
425 
426 /*[clinic input]
427 binascii.a2b_base64
428 
429     data: ascii_buffer
430     /
431 
432 Decode a line of base64 data.
433 [clinic start generated code]*/
434 
435 static PyObject *
binascii_a2b_base64_impl(PyObject * module,Py_buffer * data)436 binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
437 /*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
438 {
439     const unsigned char *ascii_data;
440     unsigned char *bin_data;
441     unsigned char *bin_data_start;
442     int leftbits = 0;
443     unsigned char this_ch;
444     unsigned int leftchar = 0;
445     Py_ssize_t ascii_len, bin_len;
446     int quad_pos = 0;
447     _PyBytesWriter writer;
448 
449     ascii_data = data->buf;
450     ascii_len = data->len;
451 
452     assert(ascii_len >= 0);
453 
454     if (ascii_len > PY_SSIZE_T_MAX - 3)
455         return PyErr_NoMemory();
456 
457     bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
458 
459     _PyBytesWriter_Init(&writer);
460 
461     /* Allocate the buffer */
462     bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
463     if (bin_data == NULL)
464         return NULL;
465     bin_data_start = bin_data;
466 
467     for( ; ascii_len > 0; ascii_len--, ascii_data++) {
468         this_ch = *ascii_data;
469 
470         if (this_ch > 0x7f ||
471             this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
472             continue;
473 
474         /* Check for pad sequences and ignore
475         ** the invalid ones.
476         */
477         if (this_ch == BASE64_PAD) {
478             if ( (quad_pos < 2) ||
479                  ((quad_pos == 2) &&
480                   (binascii_find_valid(ascii_data, ascii_len, 1)
481                    != BASE64_PAD)) )
482             {
483                 continue;
484             }
485             else {
486                 /* A pad sequence means no more input.
487                 ** We've already interpreted the data
488                 ** from the quad at this point.
489                 */
490                 leftbits = 0;
491                 break;
492             }
493         }
494 
495         this_ch = table_a2b_base64[*ascii_data];
496         if ( this_ch == (unsigned char) -1 )
497             continue;
498 
499         /*
500         ** Shift it in on the low end, and see if there's
501         ** a byte ready for output.
502         */
503         quad_pos = (quad_pos + 1) & 0x03;
504         leftchar = (leftchar << 6) | (this_ch);
505         leftbits += 6;
506 
507         if ( leftbits >= 8 ) {
508             leftbits -= 8;
509             *bin_data++ = (leftchar >> leftbits) & 0xff;
510             leftchar &= ((1 << leftbits) - 1);
511         }
512     }
513 
514     if (leftbits != 0) {
515         if (leftbits == 6) {
516             /*
517             ** There is exactly one extra valid, non-padding, base64 character.
518             ** This is an invalid length, as there is no possible input that
519             ** could encoded into such a base64 string.
520             */
521             PyErr_Format(Error,
522                          "Invalid base64-encoded string: "
523                          "number of data characters (%d) cannot be 1 more "
524                          "than a multiple of 4",
525                          (bin_data - bin_data_start) / 3 * 4 + 1);
526         } else {
527             PyErr_SetString(Error, "Incorrect padding");
528         }
529         _PyBytesWriter_Dealloc(&writer);
530         return NULL;
531     }
532 
533     return _PyBytesWriter_Finish(&writer, bin_data);
534 }
535 
536 
537 /*[clinic input]
538 binascii.b2a_base64
539 
540     data: Py_buffer
541     /
542     *
543     newline: bool(accept={int}) = True
544 
545 Base64-code line of data.
546 [clinic start generated code]*/
547 
548 static PyObject *
binascii_b2a_base64_impl(PyObject * module,Py_buffer * data,int newline)549 binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
550 /*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
551 {
552     unsigned char *ascii_data;
553     const unsigned char *bin_data;
554     int leftbits = 0;
555     unsigned char this_ch;
556     unsigned int leftchar = 0;
557     Py_ssize_t bin_len, out_len;
558     _PyBytesWriter writer;
559 
560     bin_data = data->buf;
561     bin_len = data->len;
562     _PyBytesWriter_Init(&writer);
563 
564     assert(bin_len >= 0);
565 
566     if ( bin_len > BASE64_MAXBIN ) {
567         PyErr_SetString(Error, "Too much data for base64 line");
568         return NULL;
569     }
570 
571     /* We're lazy and allocate too much (fixed up later).
572        "+2" leaves room for up to two pad characters.
573        Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
574     out_len = bin_len*2 + 2;
575     if (newline)
576         out_len++;
577     ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
578     if (ascii_data == NULL)
579         return NULL;
580 
581     for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
582         /* Shift the data into our buffer */
583         leftchar = (leftchar << 8) | *bin_data;
584         leftbits += 8;
585 
586         /* See if there are 6-bit groups ready */
587         while ( leftbits >= 6 ) {
588             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
589             leftbits -= 6;
590             *ascii_data++ = table_b2a_base64[this_ch];
591         }
592     }
593     if ( leftbits == 2 ) {
594         *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
595         *ascii_data++ = BASE64_PAD;
596         *ascii_data++ = BASE64_PAD;
597     } else if ( leftbits == 4 ) {
598         *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
599         *ascii_data++ = BASE64_PAD;
600     }
601     if (newline)
602         *ascii_data++ = '\n';       /* Append a courtesy newline */
603 
604     return _PyBytesWriter_Finish(&writer, ascii_data);
605 }
606 
607 /*[clinic input]
608 binascii.a2b_hqx
609 
610     data: ascii_buffer
611     /
612 
613 Decode .hqx coding.
614 [clinic start generated code]*/
615 
616 static PyObject *
binascii_a2b_hqx_impl(PyObject * module,Py_buffer * data)617 binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
618 /*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
619 {
620     const unsigned char *ascii_data;
621     unsigned char *bin_data;
622     int leftbits = 0;
623     unsigned char this_ch;
624     unsigned int leftchar = 0;
625     PyObject *res;
626     Py_ssize_t len;
627     int done = 0;
628     _PyBytesWriter writer;
629 
630     ascii_data = data->buf;
631     len = data->len;
632     _PyBytesWriter_Init(&writer);
633 
634     assert(len >= 0);
635 
636     if (len > PY_SSIZE_T_MAX - 2)
637         return PyErr_NoMemory();
638 
639     /* Allocate a string that is too big (fixed later)
640        Add two to the initial length to prevent interning which
641        would preclude subsequent resizing.  */
642     bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
643     if (bin_data == NULL)
644         return NULL;
645 
646     for( ; len > 0 ; len--, ascii_data++ ) {
647         /* Get the byte and look it up */
648         this_ch = table_a2b_hqx[*ascii_data];
649         if ( this_ch == SKIP )
650             continue;
651         if ( this_ch == FAIL ) {
652             PyErr_SetString(Error, "Illegal char");
653             _PyBytesWriter_Dealloc(&writer);
654             return NULL;
655         }
656         if ( this_ch == DONE ) {
657             /* The terminating colon */
658             done = 1;
659             break;
660         }
661 
662         /* Shift it into the buffer and see if any bytes are ready */
663         leftchar = (leftchar << 6) | (this_ch);
664         leftbits += 6;
665         if ( leftbits >= 8 ) {
666             leftbits -= 8;
667             *bin_data++ = (leftchar >> leftbits) & 0xff;
668             leftchar &= ((1 << leftbits) - 1);
669         }
670     }
671 
672     if ( leftbits && !done ) {
673         PyErr_SetString(Incomplete,
674                         "String has incomplete number of bytes");
675         _PyBytesWriter_Dealloc(&writer);
676         return NULL;
677     }
678 
679     res = _PyBytesWriter_Finish(&writer, bin_data);
680     if (res == NULL)
681         return NULL;
682     return Py_BuildValue("Ni", res, done);
683 }
684 
685 
686 /*[clinic input]
687 binascii.rlecode_hqx
688 
689     data: Py_buffer
690     /
691 
692 Binhex RLE-code binary data.
693 [clinic start generated code]*/
694 
695 static PyObject *
binascii_rlecode_hqx_impl(PyObject * module,Py_buffer * data)696 binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
697 /*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
698 {
699     const unsigned char *in_data;
700     unsigned char *out_data;
701     unsigned char ch;
702     Py_ssize_t in, inend, len;
703     _PyBytesWriter writer;
704 
705     _PyBytesWriter_Init(&writer);
706     in_data = data->buf;
707     len = data->len;
708 
709     assert(len >= 0);
710 
711     if (len > PY_SSIZE_T_MAX / 2 - 2)
712         return PyErr_NoMemory();
713 
714     /* Worst case: output is twice as big as input (fixed later) */
715     out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
716     if (out_data == NULL)
717         return NULL;
718 
719     for( in=0; in<len; in++) {
720         ch = in_data[in];
721         if ( ch == RUNCHAR ) {
722             /* RUNCHAR. Escape it. */
723             *out_data++ = RUNCHAR;
724             *out_data++ = 0;
725         } else {
726             /* Check how many following are the same */
727             for(inend=in+1;
728                 inend<len && in_data[inend] == ch &&
729                     inend < in+255;
730                 inend++) ;
731             if ( inend - in > 3 ) {
732                 /* More than 3 in a row. Output RLE. */
733                 *out_data++ = ch;
734                 *out_data++ = RUNCHAR;
735                 *out_data++ = (unsigned char) (inend-in);
736                 in = inend-1;
737             } else {
738                 /* Less than 3. Output the byte itself */
739                 *out_data++ = ch;
740             }
741         }
742     }
743 
744     return _PyBytesWriter_Finish(&writer, out_data);
745 }
746 
747 
748 /*[clinic input]
749 binascii.b2a_hqx
750 
751     data: Py_buffer
752     /
753 
754 Encode .hqx data.
755 [clinic start generated code]*/
756 
757 static PyObject *
binascii_b2a_hqx_impl(PyObject * module,Py_buffer * data)758 binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
759 /*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
760 {
761     unsigned char *ascii_data;
762     const unsigned char *bin_data;
763     int leftbits = 0;
764     unsigned char this_ch;
765     unsigned int leftchar = 0;
766     Py_ssize_t len;
767     _PyBytesWriter writer;
768 
769     bin_data = data->buf;
770     len = data->len;
771     _PyBytesWriter_Init(&writer);
772 
773     assert(len >= 0);
774 
775     if (len > PY_SSIZE_T_MAX / 2 - 2)
776         return PyErr_NoMemory();
777 
778     /* Allocate a buffer that is at least large enough */
779     ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
780     if (ascii_data == NULL)
781         return NULL;
782 
783     for( ; len > 0 ; len--, bin_data++ ) {
784         /* Shift into our buffer, and output any 6bits ready */
785         leftchar = (leftchar << 8) | *bin_data;
786         leftbits += 8;
787         while ( leftbits >= 6 ) {
788             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
789             leftbits -= 6;
790             *ascii_data++ = table_b2a_hqx[this_ch];
791         }
792     }
793     /* Output a possible runt byte */
794     if ( leftbits ) {
795         leftchar <<= (6-leftbits);
796         *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
797     }
798 
799     return _PyBytesWriter_Finish(&writer, ascii_data);
800 }
801 
802 
803 /*[clinic input]
804 binascii.rledecode_hqx
805 
806     data: Py_buffer
807     /
808 
809 Decode hexbin RLE-coded string.
810 [clinic start generated code]*/
811 
812 static PyObject *
binascii_rledecode_hqx_impl(PyObject * module,Py_buffer * data)813 binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
814 /*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
815 {
816     const unsigned char *in_data;
817     unsigned char *out_data;
818     unsigned char in_byte, in_repeat;
819     Py_ssize_t in_len;
820     _PyBytesWriter writer;
821 
822     in_data = data->buf;
823     in_len = data->len;
824     _PyBytesWriter_Init(&writer);
825 
826     assert(in_len >= 0);
827 
828     /* Empty string is a special case */
829     if ( in_len == 0 )
830         return PyBytes_FromStringAndSize("", 0);
831     else if (in_len > PY_SSIZE_T_MAX / 2)
832         return PyErr_NoMemory();
833 
834     /* Allocate a buffer of reasonable size. Resized when needed */
835     out_data = _PyBytesWriter_Alloc(&writer, in_len);
836     if (out_data == NULL)
837         return NULL;
838 
839     /* Use overallocation */
840     writer.overallocate = 1;
841 
842     /*
843     ** We need two macros here to get/put bytes and handle
844     ** end-of-buffer for input and output strings.
845     */
846 #define INBYTE(b)                                                       \
847     do {                                                                \
848          if ( --in_len < 0 ) {                                          \
849            PyErr_SetString(Incomplete, "");                             \
850            goto error;                                                  \
851          }                                                              \
852          b = *in_data++;                                                \
853     } while(0)
854 
855     /*
856     ** Handle first byte separately (since we have to get angry
857     ** in case of an orphaned RLE code).
858     */
859     INBYTE(in_byte);
860 
861     if (in_byte == RUNCHAR) {
862         INBYTE(in_repeat);
863         /* only 1 byte will be written, but 2 bytes were preallocated:
864            subtract 1 byte to prevent overallocation */
865         writer.min_size--;
866 
867         if (in_repeat != 0) {
868             /* Note Error, not Incomplete (which is at the end
869             ** of the string only). This is a programmer error.
870             */
871             PyErr_SetString(Error, "Orphaned RLE code at start");
872             goto error;
873         }
874         *out_data++ = RUNCHAR;
875     } else {
876         *out_data++ = in_byte;
877     }
878 
879     while( in_len > 0 ) {
880         INBYTE(in_byte);
881 
882         if (in_byte == RUNCHAR) {
883             INBYTE(in_repeat);
884             /* only 1 byte will be written, but 2 bytes were preallocated:
885                subtract 1 byte to prevent overallocation */
886             writer.min_size--;
887 
888             if ( in_repeat == 0 ) {
889                 /* Just an escaped RUNCHAR value */
890                 *out_data++ = RUNCHAR;
891             } else {
892                 /* Pick up value and output a sequence of it */
893                 in_byte = out_data[-1];
894 
895                 /* enlarge the buffer if needed */
896                 if (in_repeat > 1) {
897                     /* -1 because we already preallocated 1 byte */
898                     out_data = _PyBytesWriter_Prepare(&writer, out_data,
899                                                       in_repeat - 1);
900                     if (out_data == NULL)
901                         goto error;
902                 }
903 
904                 while ( --in_repeat > 0 )
905                     *out_data++ = in_byte;
906             }
907         } else {
908             /* Normal byte */
909             *out_data++ = in_byte;
910         }
911     }
912     return _PyBytesWriter_Finish(&writer, out_data);
913 
914 error:
915     _PyBytesWriter_Dealloc(&writer);
916     return NULL;
917 }
918 
919 
920 /*[clinic input]
921 binascii.crc_hqx -> unsigned_int
922 
923     data: Py_buffer
924     crc: unsigned_int(bitwise=True)
925     /
926 
927 Compute CRC-CCITT incrementally.
928 [clinic start generated code]*/
929 
930 static unsigned int
binascii_crc_hqx_impl(PyObject * module,Py_buffer * data,unsigned int crc)931 binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
932 /*[clinic end generated code: output=8ec2a78590d19170 input=f18240ff8c705b79]*/
933 {
934     const unsigned char *bin_data;
935     Py_ssize_t len;
936 
937     crc &= 0xffff;
938     bin_data = data->buf;
939     len = data->len;
940 
941     while(len-- > 0) {
942         crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
943     }
944 
945     return crc;
946 }
947 
948 #ifndef USE_ZLIB_CRC32
949 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
950     Also known as: ISO 3307
951 **********************************************************************|
952 *                                                                    *|
953 * Demonstration program to compute the 32-bit CRC used as the frame  *|
954 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
955 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
956 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
957 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
958 * this polynomial is or will be included in CCITT V.41, which        *|
959 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
960 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
961 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
962 *                                                                    *|
963 **********************************************************************|
964 
965  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
966  code or tables extracted from it, as desired without restriction.
967 
968  First, the polynomial itself and its table of feedback terms.  The
969  polynomial is
970  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
971  Note that we take it "backwards" and put the highest-order term in
972  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
973  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
974  the MSB being 1.
975 
976  Note that the usual hardware shift register implementation, which
977  is what we're using (we're merely optimizing it by doing eight-bit
978  chunks at a time) shifts bits into the lowest-order term.  In our
979  implementation, that means shifting towards the right.  Why do we
980  do it this way?  Because the calculated CRC must be transmitted in
981  order from highest-order term to lowest-order term.  UARTs transmit
982  characters in order from LSB to MSB.  By storing the CRC this way,
983  we hand it to the UART in the order low-byte to high-byte; the UART
984  sends each low-bit to hight-bit; and the result is transmission bit
985  by bit from highest- to lowest-order term without requiring any bit
986  shuffling on our part.  Reception works similarly.
987 
988  The feedback terms table consists of 256, 32-bit entries.  Notes:
989 
990   1. The table can be generated at runtime if desired; code to do so
991      is shown later.  It might not be obvious, but the feedback
992      terms simply represent the results of eight shift/xor opera-
993      tions for all combinations of data and CRC register values.
994 
995   2. The CRC accumulation logic is the same for all CRC polynomials,
996      be they sixteen or thirty-two bits wide.  You simply choose the
997      appropriate table.  Alternatively, because the table can be
998      generated at runtime, you can start by generating the table for
999      the polynomial in question and use exactly the same "updcrc",
1000      if your application needn't simultaneously handle two CRC
1001      polynomials.  (Note, however, that XMODEM is strange.)
1002 
1003   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
1004      of course, 32-bit entries work OK if the high 16 bits are zero.
1005 
1006   4. The values must be right-shifted by eight bits by the "updcrc"
1007      logic; the shift must be unsigned (bring in zeroes).  On some
1008      hardware you could probably optimize the shift in assembler by
1009      using byte-swap instructions.
1010 ********************************************************************/
1011 
1012 static const unsigned int crc_32_tab[256] = {
1013 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
1014 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
1015 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
1016 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
1017 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
1018 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
1019 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
1020 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
1021 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
1022 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
1023 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
1024 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
1025 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
1026 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
1027 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
1028 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
1029 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
1030 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
1031 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
1032 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
1033 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
1034 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
1035 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
1036 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
1037 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
1038 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
1039 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
1040 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
1041 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
1042 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
1043 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
1044 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
1045 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
1046 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
1047 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
1048 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
1049 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
1050 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
1051 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
1052 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
1053 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
1054 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
1055 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
1056 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
1057 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
1058 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
1059 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
1060 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
1061 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
1062 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
1063 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
1064 0x2d02ef8dU
1065 };
1066 #endif  /* USE_ZLIB_CRC32 */
1067 
1068 /*[clinic input]
1069 binascii.crc32 -> unsigned_int
1070 
1071     data: Py_buffer
1072     crc: unsigned_int(bitwise=True) = 0
1073     /
1074 
1075 Compute CRC-32 incrementally.
1076 [clinic start generated code]*/
1077 
1078 static unsigned int
binascii_crc32_impl(PyObject * module,Py_buffer * data,unsigned int crc)1079 binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
1080 /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
1081 
1082 #ifdef USE_ZLIB_CRC32
1083 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
1084 {
1085     const Byte *buf;
1086     Py_ssize_t len;
1087     int signed_val;
1088 
1089     buf = (Byte*)data->buf;
1090     len = data->len;
1091     signed_val = crc32(crc, buf, len);
1092     return (unsigned int)signed_val & 0xffffffffU;
1093 }
1094 #else  /* USE_ZLIB_CRC32 */
1095 { /* By Jim Ahlstrom; All rights transferred to CNRI */
1096     const unsigned char *bin_data;
1097     Py_ssize_t len;
1098     unsigned int result;
1099 
1100     bin_data = data->buf;
1101     len = data->len;
1102 
1103     crc = ~ crc;
1104     while (len-- > 0) {
1105         crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1106         /* Note:  (crc >> 8) MUST zero fill on left */
1107     }
1108 
1109     result = (crc ^ 0xFFFFFFFF);
1110     return result & 0xffffffff;
1111 }
1112 #endif  /* USE_ZLIB_CRC32 */
1113 
1114 /*[clinic input]
1115 binascii.b2a_hex
1116 
1117     data: Py_buffer
1118     /
1119 
1120 Hexadecimal representation of binary data.
1121 
1122 The return value is a bytes object.  This function is also
1123 available as "hexlify()".
1124 [clinic start generated code]*/
1125 
1126 static PyObject *
binascii_b2a_hex_impl(PyObject * module,Py_buffer * data)1127 binascii_b2a_hex_impl(PyObject *module, Py_buffer *data)
1128 /*[clinic end generated code: output=92fec1a95c9897a0 input=96423cfa299ff3b1]*/
1129 {
1130     return _Py_strhex_bytes((const char *)data->buf, data->len);
1131 }
1132 
1133 /*[clinic input]
1134 binascii.hexlify = binascii.b2a_hex
1135 
1136 Hexadecimal representation of binary data.
1137 
1138 The return value is a bytes object.
1139 [clinic start generated code]*/
1140 
1141 static PyObject *
binascii_hexlify_impl(PyObject * module,Py_buffer * data)1142 binascii_hexlify_impl(PyObject *module, Py_buffer *data)
1143 /*[clinic end generated code: output=749e95e53c14880c input=2e3afae7f083f061]*/
1144 {
1145     return _Py_strhex_bytes((const char *)data->buf, data->len);
1146 }
1147 
1148 static int
to_int(int c)1149 to_int(int c)
1150 {
1151     if (Py_ISDIGIT(c))
1152         return c - '0';
1153     else {
1154         if (Py_ISUPPER(c))
1155             c = Py_TOLOWER(c);
1156         if (c >= 'a' && c <= 'f')
1157             return c - 'a' + 10;
1158     }
1159     return -1;
1160 }
1161 
1162 
1163 /*[clinic input]
1164 binascii.a2b_hex
1165 
1166     hexstr: ascii_buffer
1167     /
1168 
1169 Binary data of hexadecimal representation.
1170 
1171 hexstr must contain an even number of hex digits (upper or lower case).
1172 This function is also available as "unhexlify()".
1173 [clinic start generated code]*/
1174 
1175 static PyObject *
binascii_a2b_hex_impl(PyObject * module,Py_buffer * hexstr)1176 binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
1177 /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
1178 {
1179     const char* argbuf;
1180     Py_ssize_t arglen;
1181     PyObject *retval;
1182     char* retbuf;
1183     Py_ssize_t i, j;
1184 
1185     argbuf = hexstr->buf;
1186     arglen = hexstr->len;
1187 
1188     assert(arglen >= 0);
1189 
1190     /* XXX What should we do about strings with an odd length?  Should
1191      * we add an implicit leading zero, or a trailing zero?  For now,
1192      * raise an exception.
1193      */
1194     if (arglen % 2) {
1195         PyErr_SetString(Error, "Odd-length string");
1196         return NULL;
1197     }
1198 
1199     retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
1200     if (!retval)
1201         return NULL;
1202     retbuf = PyBytes_AS_STRING(retval);
1203 
1204     for (i=j=0; i < arglen; i += 2) {
1205         int top = to_int(Py_CHARMASK(argbuf[i]));
1206         int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1207         if (top == -1 || bot == -1) {
1208             PyErr_SetString(Error,
1209                             "Non-hexadecimal digit found");
1210             goto finally;
1211         }
1212         retbuf[j++] = (top << 4) + bot;
1213     }
1214     return retval;
1215 
1216   finally:
1217     Py_DECREF(retval);
1218     return NULL;
1219 }
1220 
1221 /*[clinic input]
1222 binascii.unhexlify = binascii.a2b_hex
1223 
1224 Binary data of hexadecimal representation.
1225 
1226 hexstr must contain an even number of hex digits (upper or lower case).
1227 [clinic start generated code]*/
1228 
1229 static PyObject *
binascii_unhexlify_impl(PyObject * module,Py_buffer * hexstr)1230 binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
1231 /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
1232 {
1233     return binascii_a2b_hex_impl(module, hexstr);
1234 }
1235 
1236 static const int table_hex[128] = {
1237   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1238   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1239   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1240    0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1241   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1242   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1243   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1244   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1245 };
1246 
1247 #define hexval(c) table_hex[(unsigned int)(c)]
1248 
1249 #define MAXLINESIZE 76
1250 
1251 
1252 /*[clinic input]
1253 binascii.a2b_qp
1254 
1255     data: ascii_buffer
1256     header: bool(accept={int}) = False
1257 
1258 Decode a string of qp-encoded data.
1259 [clinic start generated code]*/
1260 
1261 static PyObject *
binascii_a2b_qp_impl(PyObject * module,Py_buffer * data,int header)1262 binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
1263 /*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
1264 {
1265     Py_ssize_t in, out;
1266     char ch;
1267     const unsigned char *ascii_data;
1268     unsigned char *odata;
1269     Py_ssize_t datalen = 0;
1270     PyObject *rv;
1271 
1272     ascii_data = data->buf;
1273     datalen = data->len;
1274 
1275     /* We allocate the output same size as input, this is overkill.
1276      * The previous implementation used calloc() so we'll zero out the
1277      * memory here too, since PyMem_Malloc() does not guarantee that.
1278      */
1279     odata = (unsigned char *) PyMem_Malloc(datalen);
1280     if (odata == NULL) {
1281         PyErr_NoMemory();
1282         return NULL;
1283     }
1284     memset(odata, 0, datalen);
1285 
1286     in = out = 0;
1287     while (in < datalen) {
1288         if (ascii_data[in] == '=') {
1289             in++;
1290             if (in >= datalen) break;
1291             /* Soft line breaks */
1292             if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
1293                 if (ascii_data[in] != '\n') {
1294                     while (in < datalen && ascii_data[in] != '\n') in++;
1295                 }
1296                 if (in < datalen) in++;
1297             }
1298             else if (ascii_data[in] == '=') {
1299                 /* broken case from broken python qp */
1300                 odata[out++] = '=';
1301                 in++;
1302             }
1303             else if ((in + 1 < datalen) &&
1304                      ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
1305                       (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1306                       (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1307                      ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1308                       (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1309                       (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
1310                 /* hexval */
1311                 ch = hexval(ascii_data[in]) << 4;
1312                 in++;
1313                 ch |= hexval(ascii_data[in]);
1314                 in++;
1315                 odata[out++] = ch;
1316             }
1317             else {
1318               odata[out++] = '=';
1319             }
1320         }
1321         else if (header && ascii_data[in] == '_') {
1322             odata[out++] = ' ';
1323             in++;
1324         }
1325         else {
1326             odata[out] = ascii_data[in];
1327             in++;
1328             out++;
1329         }
1330     }
1331     if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1332         PyMem_Free(odata);
1333         return NULL;
1334     }
1335     PyMem_Free(odata);
1336     return rv;
1337 }
1338 
1339 static int
to_hex(unsigned char ch,unsigned char * s)1340 to_hex (unsigned char ch, unsigned char *s)
1341 {
1342     unsigned int uvalue = ch;
1343 
1344     s[1] = "0123456789ABCDEF"[uvalue % 16];
1345     uvalue = (uvalue / 16);
1346     s[0] = "0123456789ABCDEF"[uvalue % 16];
1347     return 0;
1348 }
1349 
1350 /* XXX: This is ridiculously complicated to be backward compatible
1351  * (mostly) with the quopri module.  It doesn't re-create the quopri
1352  * module bug where text ending in CRLF has the CR encoded */
1353 
1354 /*[clinic input]
1355 binascii.b2a_qp
1356 
1357     data: Py_buffer
1358     quotetabs: bool(accept={int}) = False
1359     istext: bool(accept={int}) = True
1360     header: bool(accept={int}) = False
1361 
1362 Encode a string using quoted-printable encoding.
1363 
1364 On encoding, when istext is set, newlines are not encoded, and white
1365 space at end of lines is.  When istext is not set, \r and \n (CR/LF)
1366 are both encoded.  When quotetabs is set, space and tabs are encoded.
1367 [clinic start generated code]*/
1368 
1369 static PyObject *
binascii_b2a_qp_impl(PyObject * module,Py_buffer * data,int quotetabs,int istext,int header)1370 binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
1371                      int istext, int header)
1372 /*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
1373 {
1374     Py_ssize_t in, out;
1375     const unsigned char *databuf;
1376     unsigned char *odata;
1377     Py_ssize_t datalen = 0, odatalen = 0;
1378     PyObject *rv;
1379     unsigned int linelen = 0;
1380     unsigned char ch;
1381     int crlf = 0;
1382     const unsigned char *p;
1383 
1384     databuf = data->buf;
1385     datalen = data->len;
1386 
1387     /* See if this string is using CRLF line ends */
1388     /* XXX: this function has the side effect of converting all of
1389      * the end of lines to be the same depending on this detection
1390      * here */
1391     p = (const unsigned char *) memchr(databuf, '\n', datalen);
1392     if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
1393         crlf = 1;
1394 
1395     /* First, scan to see how many characters need to be encoded */
1396     in = 0;
1397     while (in < datalen) {
1398         Py_ssize_t delta = 0;
1399         if ((databuf[in] > 126) ||
1400             (databuf[in] == '=') ||
1401             (header && databuf[in] == '_') ||
1402             ((databuf[in] == '.') && (linelen == 0) &&
1403              (in + 1 == datalen || databuf[in+1] == '\n' ||
1404               databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1405             (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1406             ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1407             ((databuf[in] < 33) &&
1408              (databuf[in] != '\r') && (databuf[in] != '\n') &&
1409              (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1410         {
1411             if ((linelen + 3) >= MAXLINESIZE) {
1412                 linelen = 0;
1413                 if (crlf)
1414                     delta += 3;
1415                 else
1416                     delta += 2;
1417             }
1418             linelen += 3;
1419             delta += 3;
1420             in++;
1421         }
1422         else {
1423             if (istext &&
1424                 ((databuf[in] == '\n') ||
1425                  ((in+1 < datalen) && (databuf[in] == '\r') &&
1426                  (databuf[in+1] == '\n'))))
1427             {
1428                 linelen = 0;
1429                 /* Protect against whitespace on end of line */
1430                 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
1431                     delta += 2;
1432                 if (crlf)
1433                     delta += 2;
1434                 else
1435                     delta += 1;
1436                 if (databuf[in] == '\r')
1437                     in += 2;
1438                 else
1439                     in++;
1440             }
1441             else {
1442                 if ((in + 1 != datalen) &&
1443                     (databuf[in+1] != '\n') &&
1444                     (linelen + 1) >= MAXLINESIZE) {
1445                     linelen = 0;
1446                     if (crlf)
1447                         delta += 3;
1448                     else
1449                         delta += 2;
1450                 }
1451                 linelen++;
1452                 delta++;
1453                 in++;
1454             }
1455         }
1456         if (PY_SSIZE_T_MAX - delta < odatalen) {
1457             PyErr_NoMemory();
1458             return NULL;
1459         }
1460         odatalen += delta;
1461     }
1462 
1463     /* We allocate the output same size as input, this is overkill.
1464      * The previous implementation used calloc() so we'll zero out the
1465      * memory here too, since PyMem_Malloc() does not guarantee that.
1466      */
1467     odata = (unsigned char *) PyMem_Malloc(odatalen);
1468     if (odata == NULL) {
1469         PyErr_NoMemory();
1470         return NULL;
1471     }
1472     memset(odata, 0, odatalen);
1473 
1474     in = out = linelen = 0;
1475     while (in < datalen) {
1476         if ((databuf[in] > 126) ||
1477             (databuf[in] == '=') ||
1478             (header && databuf[in] == '_') ||
1479             ((databuf[in] == '.') && (linelen == 0) &&
1480              (in + 1 == datalen || databuf[in+1] == '\n' ||
1481               databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1482             (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1483             ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1484             ((databuf[in] < 33) &&
1485              (databuf[in] != '\r') && (databuf[in] != '\n') &&
1486              (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1487         {
1488             if ((linelen + 3 )>= MAXLINESIZE) {
1489                 odata[out++] = '=';
1490                 if (crlf) odata[out++] = '\r';
1491                 odata[out++] = '\n';
1492                 linelen = 0;
1493             }
1494             odata[out++] = '=';
1495             to_hex(databuf[in], &odata[out]);
1496             out += 2;
1497             in++;
1498             linelen += 3;
1499         }
1500         else {
1501             if (istext &&
1502                 ((databuf[in] == '\n') ||
1503                  ((in+1 < datalen) && (databuf[in] == '\r') &&
1504                  (databuf[in+1] == '\n'))))
1505             {
1506                 linelen = 0;
1507                 /* Protect against whitespace on end of line */
1508                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1509                     ch = odata[out-1];
1510                     odata[out-1] = '=';
1511                     to_hex(ch, &odata[out]);
1512                     out += 2;
1513                 }
1514 
1515                 if (crlf) odata[out++] = '\r';
1516                 odata[out++] = '\n';
1517                 if (databuf[in] == '\r')
1518                     in += 2;
1519                 else
1520                     in++;
1521             }
1522             else {
1523                 if ((in + 1 != datalen) &&
1524                     (databuf[in+1] != '\n') &&
1525                     (linelen + 1) >= MAXLINESIZE) {
1526                     odata[out++] = '=';
1527                     if (crlf) odata[out++] = '\r';
1528                     odata[out++] = '\n';
1529                     linelen = 0;
1530                 }
1531                 linelen++;
1532                 if (header && databuf[in] == ' ') {
1533                     odata[out++] = '_';
1534                     in++;
1535                 }
1536                 else {
1537                     odata[out++] = databuf[in++];
1538                 }
1539             }
1540         }
1541     }
1542     if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1543         PyMem_Free(odata);
1544         return NULL;
1545     }
1546     PyMem_Free(odata);
1547     return rv;
1548 }
1549 
1550 /* List of functions defined in the module */
1551 
1552 static struct PyMethodDef binascii_module_methods[] = {
1553     BINASCII_A2B_UU_METHODDEF
1554     BINASCII_B2A_UU_METHODDEF
1555     BINASCII_A2B_BASE64_METHODDEF
1556     BINASCII_B2A_BASE64_METHODDEF
1557     BINASCII_A2B_HQX_METHODDEF
1558     BINASCII_B2A_HQX_METHODDEF
1559     BINASCII_A2B_HEX_METHODDEF
1560     BINASCII_B2A_HEX_METHODDEF
1561     BINASCII_HEXLIFY_METHODDEF
1562     BINASCII_UNHEXLIFY_METHODDEF
1563     BINASCII_RLECODE_HQX_METHODDEF
1564     BINASCII_RLEDECODE_HQX_METHODDEF
1565     BINASCII_CRC_HQX_METHODDEF
1566     BINASCII_CRC32_METHODDEF
1567     BINASCII_A2B_QP_METHODDEF
1568     BINASCII_B2A_QP_METHODDEF
1569     {NULL, NULL}                             /* sentinel */
1570 };
1571 
1572 
1573 /* Initialization function for the module (*must* be called PyInit_binascii) */
1574 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1575 
1576 
1577 static struct PyModuleDef binasciimodule = {
1578     PyModuleDef_HEAD_INIT,
1579     "binascii",
1580     doc_binascii,
1581     -1,
1582     binascii_module_methods,
1583     NULL,
1584     NULL,
1585     NULL,
1586     NULL
1587 };
1588 
1589 PyMODINIT_FUNC
PyInit_binascii(void)1590 PyInit_binascii(void)
1591 {
1592     PyObject *m, *d;
1593 
1594     /* Create the module and add the functions */
1595     m = PyModule_Create(&binasciimodule);
1596     if (m == NULL)
1597         return NULL;
1598 
1599     d = PyModule_GetDict(m);
1600 
1601     Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1602     PyDict_SetItemString(d, "Error", Error);
1603     Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1604     PyDict_SetItemString(d, "Incomplete", Incomplete);
1605     if (PyErr_Occurred()) {
1606         Py_DECREF(m);
1607         m = NULL;
1608     }
1609     return m;
1610 }
1611