1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 ** each line encodes 45 bytes (except possibly the last)
7 ** First char encodes (binary) length, rest data
8 ** each char encodes 6 bits, as follows:
9 ** binary: 01234567 abcdefgh ijklmnop
10 ** ascii: 012345 67abcd efghij klmnop
11 ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 ** short binary data is zero-extended (so the bits are always in the
13 ** right place), this does *not* reflect in the length.
14 ** base64:
15 ** Line breaks are insignificant, but lines are at most 76 chars
16 ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 ** is done via a table.
18 ** Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 ** File starts with introductory text, real data starts and ends
21 ** with colons.
22 ** Data consists of three similar parts: info, datafork, resourcefork.
23 ** Each part is protected (at the end) with a 16-bit crc
24 ** The binary data is run-length encoded, and then ascii-fied:
25 ** binary: 01234567 abcdefgh ijklmnop
26 ** ascii: 012345 67abcd efghij klmnop
27 ** ASCII encoding is table-driven, see the code.
28 ** Short binary data results in the runt ascii-byte being output with
29 ** the bits in the right place.
30 **
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
33 **
34 ** Programs that encode binary data in ASCII are written in
35 ** such a style that they are as unreadable as possible. Devices used
36 ** include unnecessary global variables, burying important tables
37 ** in unrelated sourcefiles, putting functions in include files,
38 ** using seemingly-descriptive variable names for different purposes,
39 ** calls to empty subroutines and a host of others.
40 **
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
43 **
44 ** Jack Jansen, CWI, July 1995.
45 **
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character. It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
52 **
53 ** Brandon Long, September 2001.
54 */
55
56 #define PY_SSIZE_T_CLEAN
57
58 #include "Python.h"
59 #include "pystrhex.h"
60 #ifdef USE_ZLIB_CRC32
61 #include "zlib.h"
62 #endif
63
64 static PyObject *Error;
65 static PyObject *Incomplete;
66
67 /*
68 ** hqx lookup table, ascii->binary.
69 */
70
71 #define RUNCHAR 0x90
72
73 #define DONE 0x7F
74 #define SKIP 0x7E
75 #define FAIL 0x7D
76
77 static const unsigned char table_a2b_hqx[256] = {
78 /* ^@ ^A ^B ^C ^D ^E ^F ^G */
79 /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
80 /* \b \t \n ^K ^L \r ^N ^O */
81 /* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
82 /* ^P ^Q ^R ^S ^T ^U ^V ^W */
83 /* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
84 /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
85 /* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
86 /* ! " # $ % & ' */
87 /* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
88 /* ( ) * + , - . / */
89 /* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
90 /* 0 1 2 3 4 5 6 7 */
91 /* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
92 /* 8 9 : ; < = > ? */
93 /* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
94 /* @ A B C D E F G */
95 /* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
96 /* H I J K L M N O */
97 /* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
98 /* P Q R S T U V W */
99 /*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
100 /* X Y Z [ \ ] ^ _ */
101 /*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
102 /* ` a b c d e f g */
103 /*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
104 /* h i j k l m n o */
105 /*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
106 /* p q r s t u v w */
107 /*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
108 /* x y z { | } ~ ^? */
109 /*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 /*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
126 };
127
128 static const unsigned char table_b2a_hqx[] =
129 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
130
131 static const char table_a2b_base64[] = {
132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
134 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
135 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
136 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
137 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
138 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
139 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
140 };
141
142 #define BASE64_PAD '='
143
144 /* Max binary chunk size; limited only by available memory */
145 #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
146
147 static const unsigned char table_b2a_base64[] =
148 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
149
150
151
152 static const unsigned short crctab_hqx[256] = {
153 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
154 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
155 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
156 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
157 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
158 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
159 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
160 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
161 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
162 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
163 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
164 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
165 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
166 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
167 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
168 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
169 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
170 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
171 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
172 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
173 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
174 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
175 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
176 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
177 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
178 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
179 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
180 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
181 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
182 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
183 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
184 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
185 };
186
187 /*[clinic input]
188 module binascii
189 [clinic start generated code]*/
190 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
191
192 /*[python input]
193
194 class ascii_buffer_converter(CConverter):
195 type = 'Py_buffer'
196 converter = 'ascii_buffer_converter'
197 impl_by_reference = True
198 c_default = "{NULL, NULL}"
199
200 def cleanup(self):
201 name = self.name
202 return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"])
203
204 [python start generated code]*/
205 /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
206
207 static int
ascii_buffer_converter(PyObject * arg,Py_buffer * buf)208 ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
209 {
210 if (arg == NULL) {
211 PyBuffer_Release(buf);
212 return 1;
213 }
214 if (PyUnicode_Check(arg)) {
215 if (PyUnicode_READY(arg) < 0)
216 return 0;
217 if (!PyUnicode_IS_ASCII(arg)) {
218 PyErr_SetString(PyExc_ValueError,
219 "string argument should contain only ASCII characters");
220 return 0;
221 }
222 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
223 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
224 buf->len = PyUnicode_GET_LENGTH(arg);
225 buf->obj = NULL;
226 return 1;
227 }
228 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
229 PyErr_Format(PyExc_TypeError,
230 "argument should be bytes, buffer or ASCII string, "
231 "not '%.100s'", Py_TYPE(arg)->tp_name);
232 return 0;
233 }
234 if (!PyBuffer_IsContiguous(buf, 'C')) {
235 PyErr_Format(PyExc_TypeError,
236 "argument should be a contiguous buffer, "
237 "not '%.100s'", Py_TYPE(arg)->tp_name);
238 PyBuffer_Release(buf);
239 return 0;
240 }
241 return Py_CLEANUP_SUPPORTED;
242 }
243
244 #include "clinic/binascii.c.h"
245
246 /*[clinic input]
247 binascii.a2b_uu
248
249 data: ascii_buffer
250 /
251
252 Decode a line of uuencoded data.
253 [clinic start generated code]*/
254
255 static PyObject *
binascii_a2b_uu_impl(PyObject * module,Py_buffer * data)256 binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
257 /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
258 {
259 const unsigned char *ascii_data;
260 unsigned char *bin_data;
261 int leftbits = 0;
262 unsigned char this_ch;
263 unsigned int leftchar = 0;
264 PyObject *rv;
265 Py_ssize_t ascii_len, bin_len;
266
267 ascii_data = data->buf;
268 ascii_len = data->len;
269
270 assert(ascii_len >= 0);
271
272 /* First byte: binary data length (in bytes) */
273 bin_len = (*ascii_data++ - ' ') & 077;
274 ascii_len--;
275
276 /* Allocate the buffer */
277 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
278 return NULL;
279 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
280
281 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
282 /* XXX is it really best to add NULs if there's no more data */
283 this_ch = (ascii_len > 0) ? *ascii_data : 0;
284 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
285 /*
286 ** Whitespace. Assume some spaces got eaten at
287 ** end-of-line. (We check this later)
288 */
289 this_ch = 0;
290 } else {
291 /* Check the character for legality
292 ** The 64 in stead of the expected 63 is because
293 ** there are a few uuencodes out there that use
294 ** '`' as zero instead of space.
295 */
296 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
297 PyErr_SetString(Error, "Illegal char");
298 Py_DECREF(rv);
299 return NULL;
300 }
301 this_ch = (this_ch - ' ') & 077;
302 }
303 /*
304 ** Shift it in on the low end, and see if there's
305 ** a byte ready for output.
306 */
307 leftchar = (leftchar << 6) | (this_ch);
308 leftbits += 6;
309 if ( leftbits >= 8 ) {
310 leftbits -= 8;
311 *bin_data++ = (leftchar >> leftbits) & 0xff;
312 leftchar &= ((1 << leftbits) - 1);
313 bin_len--;
314 }
315 }
316 /*
317 ** Finally, check that if there's anything left on the line
318 ** that it's whitespace only.
319 */
320 while( ascii_len-- > 0 ) {
321 this_ch = *ascii_data++;
322 /* Extra '`' may be written as padding in some cases */
323 if ( this_ch != ' ' && this_ch != ' '+64 &&
324 this_ch != '\n' && this_ch != '\r' ) {
325 PyErr_SetString(Error, "Trailing garbage");
326 Py_DECREF(rv);
327 return NULL;
328 }
329 }
330 return rv;
331 }
332
333 /*[clinic input]
334 binascii.b2a_uu
335
336 data: Py_buffer
337 /
338 *
339 backtick: bool(accept={int}) = False
340
341 Uuencode line of data.
342 [clinic start generated code]*/
343
344 static PyObject *
binascii_b2a_uu_impl(PyObject * module,Py_buffer * data,int backtick)345 binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
346 /*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
347 {
348 unsigned char *ascii_data;
349 const unsigned char *bin_data;
350 int leftbits = 0;
351 unsigned char this_ch;
352 unsigned int leftchar = 0;
353 Py_ssize_t bin_len, out_len;
354 _PyBytesWriter writer;
355
356 _PyBytesWriter_Init(&writer);
357 bin_data = data->buf;
358 bin_len = data->len;
359 if ( bin_len > 45 ) {
360 /* The 45 is a limit that appears in all uuencode's */
361 PyErr_SetString(Error, "At most 45 bytes at once");
362 return NULL;
363 }
364
365 /* We're lazy and allocate to much (fixed up later) */
366 out_len = 2 + (bin_len + 2) / 3 * 4;
367 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
368 if (ascii_data == NULL)
369 return NULL;
370
371 /* Store the length */
372 if (backtick && !bin_len)
373 *ascii_data++ = '`';
374 else
375 *ascii_data++ = ' ' + (unsigned char)bin_len;
376
377 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
378 /* Shift the data (or padding) into our buffer */
379 if ( bin_len > 0 ) /* Data */
380 leftchar = (leftchar << 8) | *bin_data;
381 else /* Padding */
382 leftchar <<= 8;
383 leftbits += 8;
384
385 /* See if there are 6-bit groups ready */
386 while ( leftbits >= 6 ) {
387 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
388 leftbits -= 6;
389 if (backtick && !this_ch)
390 *ascii_data++ = '`';
391 else
392 *ascii_data++ = this_ch + ' ';
393 }
394 }
395 *ascii_data++ = '\n'; /* Append a courtesy newline */
396
397 return _PyBytesWriter_Finish(&writer, ascii_data);
398 }
399
400
401 static int
binascii_find_valid(const unsigned char * s,Py_ssize_t slen,int num)402 binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num)
403 {
404 /* Finds & returns the (num+1)th
405 ** valid character for base64, or -1 if none.
406 */
407
408 int ret = -1;
409 unsigned char c, b64val;
410
411 while ((slen > 0) && (ret == -1)) {
412 c = *s;
413 b64val = table_a2b_base64[c & 0x7f];
414 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
415 if (num == 0)
416 ret = *s;
417 num--;
418 }
419
420 s++;
421 slen--;
422 }
423 return ret;
424 }
425
426 /*[clinic input]
427 binascii.a2b_base64
428
429 data: ascii_buffer
430 /
431
432 Decode a line of base64 data.
433 [clinic start generated code]*/
434
435 static PyObject *
binascii_a2b_base64_impl(PyObject * module,Py_buffer * data)436 binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
437 /*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
438 {
439 const unsigned char *ascii_data;
440 unsigned char *bin_data;
441 unsigned char *bin_data_start;
442 int leftbits = 0;
443 unsigned char this_ch;
444 unsigned int leftchar = 0;
445 Py_ssize_t ascii_len, bin_len;
446 int quad_pos = 0;
447 _PyBytesWriter writer;
448
449 ascii_data = data->buf;
450 ascii_len = data->len;
451
452 assert(ascii_len >= 0);
453
454 if (ascii_len > PY_SSIZE_T_MAX - 3)
455 return PyErr_NoMemory();
456
457 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
458
459 _PyBytesWriter_Init(&writer);
460
461 /* Allocate the buffer */
462 bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
463 if (bin_data == NULL)
464 return NULL;
465 bin_data_start = bin_data;
466
467 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
468 this_ch = *ascii_data;
469
470 if (this_ch > 0x7f ||
471 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
472 continue;
473
474 /* Check for pad sequences and ignore
475 ** the invalid ones.
476 */
477 if (this_ch == BASE64_PAD) {
478 if ( (quad_pos < 2) ||
479 ((quad_pos == 2) &&
480 (binascii_find_valid(ascii_data, ascii_len, 1)
481 != BASE64_PAD)) )
482 {
483 continue;
484 }
485 else {
486 /* A pad sequence means no more input.
487 ** We've already interpreted the data
488 ** from the quad at this point.
489 */
490 leftbits = 0;
491 break;
492 }
493 }
494
495 this_ch = table_a2b_base64[*ascii_data];
496 if ( this_ch == (unsigned char) -1 )
497 continue;
498
499 /*
500 ** Shift it in on the low end, and see if there's
501 ** a byte ready for output.
502 */
503 quad_pos = (quad_pos + 1) & 0x03;
504 leftchar = (leftchar << 6) | (this_ch);
505 leftbits += 6;
506
507 if ( leftbits >= 8 ) {
508 leftbits -= 8;
509 *bin_data++ = (leftchar >> leftbits) & 0xff;
510 leftchar &= ((1 << leftbits) - 1);
511 }
512 }
513
514 if (leftbits != 0) {
515 if (leftbits == 6) {
516 /*
517 ** There is exactly one extra valid, non-padding, base64 character.
518 ** This is an invalid length, as there is no possible input that
519 ** could encoded into such a base64 string.
520 */
521 PyErr_Format(Error,
522 "Invalid base64-encoded string: "
523 "number of data characters (%d) cannot be 1 more "
524 "than a multiple of 4",
525 (bin_data - bin_data_start) / 3 * 4 + 1);
526 } else {
527 PyErr_SetString(Error, "Incorrect padding");
528 }
529 _PyBytesWriter_Dealloc(&writer);
530 return NULL;
531 }
532
533 return _PyBytesWriter_Finish(&writer, bin_data);
534 }
535
536
537 /*[clinic input]
538 binascii.b2a_base64
539
540 data: Py_buffer
541 /
542 *
543 newline: bool(accept={int}) = True
544
545 Base64-code line of data.
546 [clinic start generated code]*/
547
548 static PyObject *
binascii_b2a_base64_impl(PyObject * module,Py_buffer * data,int newline)549 binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
550 /*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
551 {
552 unsigned char *ascii_data;
553 const unsigned char *bin_data;
554 int leftbits = 0;
555 unsigned char this_ch;
556 unsigned int leftchar = 0;
557 Py_ssize_t bin_len, out_len;
558 _PyBytesWriter writer;
559
560 bin_data = data->buf;
561 bin_len = data->len;
562 _PyBytesWriter_Init(&writer);
563
564 assert(bin_len >= 0);
565
566 if ( bin_len > BASE64_MAXBIN ) {
567 PyErr_SetString(Error, "Too much data for base64 line");
568 return NULL;
569 }
570
571 /* We're lazy and allocate too much (fixed up later).
572 "+2" leaves room for up to two pad characters.
573 Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
574 out_len = bin_len*2 + 2;
575 if (newline)
576 out_len++;
577 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
578 if (ascii_data == NULL)
579 return NULL;
580
581 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
582 /* Shift the data into our buffer */
583 leftchar = (leftchar << 8) | *bin_data;
584 leftbits += 8;
585
586 /* See if there are 6-bit groups ready */
587 while ( leftbits >= 6 ) {
588 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
589 leftbits -= 6;
590 *ascii_data++ = table_b2a_base64[this_ch];
591 }
592 }
593 if ( leftbits == 2 ) {
594 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
595 *ascii_data++ = BASE64_PAD;
596 *ascii_data++ = BASE64_PAD;
597 } else if ( leftbits == 4 ) {
598 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
599 *ascii_data++ = BASE64_PAD;
600 }
601 if (newline)
602 *ascii_data++ = '\n'; /* Append a courtesy newline */
603
604 return _PyBytesWriter_Finish(&writer, ascii_data);
605 }
606
607 /*[clinic input]
608 binascii.a2b_hqx
609
610 data: ascii_buffer
611 /
612
613 Decode .hqx coding.
614 [clinic start generated code]*/
615
616 static PyObject *
binascii_a2b_hqx_impl(PyObject * module,Py_buffer * data)617 binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
618 /*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
619 {
620 const unsigned char *ascii_data;
621 unsigned char *bin_data;
622 int leftbits = 0;
623 unsigned char this_ch;
624 unsigned int leftchar = 0;
625 PyObject *res;
626 Py_ssize_t len;
627 int done = 0;
628 _PyBytesWriter writer;
629
630 ascii_data = data->buf;
631 len = data->len;
632 _PyBytesWriter_Init(&writer);
633
634 assert(len >= 0);
635
636 if (len > PY_SSIZE_T_MAX - 2)
637 return PyErr_NoMemory();
638
639 /* Allocate a string that is too big (fixed later)
640 Add two to the initial length to prevent interning which
641 would preclude subsequent resizing. */
642 bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
643 if (bin_data == NULL)
644 return NULL;
645
646 for( ; len > 0 ; len--, ascii_data++ ) {
647 /* Get the byte and look it up */
648 this_ch = table_a2b_hqx[*ascii_data];
649 if ( this_ch == SKIP )
650 continue;
651 if ( this_ch == FAIL ) {
652 PyErr_SetString(Error, "Illegal char");
653 _PyBytesWriter_Dealloc(&writer);
654 return NULL;
655 }
656 if ( this_ch == DONE ) {
657 /* The terminating colon */
658 done = 1;
659 break;
660 }
661
662 /* Shift it into the buffer and see if any bytes are ready */
663 leftchar = (leftchar << 6) | (this_ch);
664 leftbits += 6;
665 if ( leftbits >= 8 ) {
666 leftbits -= 8;
667 *bin_data++ = (leftchar >> leftbits) & 0xff;
668 leftchar &= ((1 << leftbits) - 1);
669 }
670 }
671
672 if ( leftbits && !done ) {
673 PyErr_SetString(Incomplete,
674 "String has incomplete number of bytes");
675 _PyBytesWriter_Dealloc(&writer);
676 return NULL;
677 }
678
679 res = _PyBytesWriter_Finish(&writer, bin_data);
680 if (res == NULL)
681 return NULL;
682 return Py_BuildValue("Ni", res, done);
683 }
684
685
686 /*[clinic input]
687 binascii.rlecode_hqx
688
689 data: Py_buffer
690 /
691
692 Binhex RLE-code binary data.
693 [clinic start generated code]*/
694
695 static PyObject *
binascii_rlecode_hqx_impl(PyObject * module,Py_buffer * data)696 binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
697 /*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
698 {
699 const unsigned char *in_data;
700 unsigned char *out_data;
701 unsigned char ch;
702 Py_ssize_t in, inend, len;
703 _PyBytesWriter writer;
704
705 _PyBytesWriter_Init(&writer);
706 in_data = data->buf;
707 len = data->len;
708
709 assert(len >= 0);
710
711 if (len > PY_SSIZE_T_MAX / 2 - 2)
712 return PyErr_NoMemory();
713
714 /* Worst case: output is twice as big as input (fixed later) */
715 out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
716 if (out_data == NULL)
717 return NULL;
718
719 for( in=0; in<len; in++) {
720 ch = in_data[in];
721 if ( ch == RUNCHAR ) {
722 /* RUNCHAR. Escape it. */
723 *out_data++ = RUNCHAR;
724 *out_data++ = 0;
725 } else {
726 /* Check how many following are the same */
727 for(inend=in+1;
728 inend<len && in_data[inend] == ch &&
729 inend < in+255;
730 inend++) ;
731 if ( inend - in > 3 ) {
732 /* More than 3 in a row. Output RLE. */
733 *out_data++ = ch;
734 *out_data++ = RUNCHAR;
735 *out_data++ = (unsigned char) (inend-in);
736 in = inend-1;
737 } else {
738 /* Less than 3. Output the byte itself */
739 *out_data++ = ch;
740 }
741 }
742 }
743
744 return _PyBytesWriter_Finish(&writer, out_data);
745 }
746
747
748 /*[clinic input]
749 binascii.b2a_hqx
750
751 data: Py_buffer
752 /
753
754 Encode .hqx data.
755 [clinic start generated code]*/
756
757 static PyObject *
binascii_b2a_hqx_impl(PyObject * module,Py_buffer * data)758 binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
759 /*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
760 {
761 unsigned char *ascii_data;
762 const unsigned char *bin_data;
763 int leftbits = 0;
764 unsigned char this_ch;
765 unsigned int leftchar = 0;
766 Py_ssize_t len;
767 _PyBytesWriter writer;
768
769 bin_data = data->buf;
770 len = data->len;
771 _PyBytesWriter_Init(&writer);
772
773 assert(len >= 0);
774
775 if (len > PY_SSIZE_T_MAX / 2 - 2)
776 return PyErr_NoMemory();
777
778 /* Allocate a buffer that is at least large enough */
779 ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
780 if (ascii_data == NULL)
781 return NULL;
782
783 for( ; len > 0 ; len--, bin_data++ ) {
784 /* Shift into our buffer, and output any 6bits ready */
785 leftchar = (leftchar << 8) | *bin_data;
786 leftbits += 8;
787 while ( leftbits >= 6 ) {
788 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
789 leftbits -= 6;
790 *ascii_data++ = table_b2a_hqx[this_ch];
791 }
792 }
793 /* Output a possible runt byte */
794 if ( leftbits ) {
795 leftchar <<= (6-leftbits);
796 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
797 }
798
799 return _PyBytesWriter_Finish(&writer, ascii_data);
800 }
801
802
803 /*[clinic input]
804 binascii.rledecode_hqx
805
806 data: Py_buffer
807 /
808
809 Decode hexbin RLE-coded string.
810 [clinic start generated code]*/
811
812 static PyObject *
binascii_rledecode_hqx_impl(PyObject * module,Py_buffer * data)813 binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
814 /*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
815 {
816 const unsigned char *in_data;
817 unsigned char *out_data;
818 unsigned char in_byte, in_repeat;
819 Py_ssize_t in_len;
820 _PyBytesWriter writer;
821
822 in_data = data->buf;
823 in_len = data->len;
824 _PyBytesWriter_Init(&writer);
825
826 assert(in_len >= 0);
827
828 /* Empty string is a special case */
829 if ( in_len == 0 )
830 return PyBytes_FromStringAndSize("", 0);
831 else if (in_len > PY_SSIZE_T_MAX / 2)
832 return PyErr_NoMemory();
833
834 /* Allocate a buffer of reasonable size. Resized when needed */
835 out_data = _PyBytesWriter_Alloc(&writer, in_len);
836 if (out_data == NULL)
837 return NULL;
838
839 /* Use overallocation */
840 writer.overallocate = 1;
841
842 /*
843 ** We need two macros here to get/put bytes and handle
844 ** end-of-buffer for input and output strings.
845 */
846 #define INBYTE(b) \
847 do { \
848 if ( --in_len < 0 ) { \
849 PyErr_SetString(Incomplete, ""); \
850 goto error; \
851 } \
852 b = *in_data++; \
853 } while(0)
854
855 /*
856 ** Handle first byte separately (since we have to get angry
857 ** in case of an orphaned RLE code).
858 */
859 INBYTE(in_byte);
860
861 if (in_byte == RUNCHAR) {
862 INBYTE(in_repeat);
863 /* only 1 byte will be written, but 2 bytes were preallocated:
864 subtract 1 byte to prevent overallocation */
865 writer.min_size--;
866
867 if (in_repeat != 0) {
868 /* Note Error, not Incomplete (which is at the end
869 ** of the string only). This is a programmer error.
870 */
871 PyErr_SetString(Error, "Orphaned RLE code at start");
872 goto error;
873 }
874 *out_data++ = RUNCHAR;
875 } else {
876 *out_data++ = in_byte;
877 }
878
879 while( in_len > 0 ) {
880 INBYTE(in_byte);
881
882 if (in_byte == RUNCHAR) {
883 INBYTE(in_repeat);
884 /* only 1 byte will be written, but 2 bytes were preallocated:
885 subtract 1 byte to prevent overallocation */
886 writer.min_size--;
887
888 if ( in_repeat == 0 ) {
889 /* Just an escaped RUNCHAR value */
890 *out_data++ = RUNCHAR;
891 } else {
892 /* Pick up value and output a sequence of it */
893 in_byte = out_data[-1];
894
895 /* enlarge the buffer if needed */
896 if (in_repeat > 1) {
897 /* -1 because we already preallocated 1 byte */
898 out_data = _PyBytesWriter_Prepare(&writer, out_data,
899 in_repeat - 1);
900 if (out_data == NULL)
901 goto error;
902 }
903
904 while ( --in_repeat > 0 )
905 *out_data++ = in_byte;
906 }
907 } else {
908 /* Normal byte */
909 *out_data++ = in_byte;
910 }
911 }
912 return _PyBytesWriter_Finish(&writer, out_data);
913
914 error:
915 _PyBytesWriter_Dealloc(&writer);
916 return NULL;
917 }
918
919
920 /*[clinic input]
921 binascii.crc_hqx -> unsigned_int
922
923 data: Py_buffer
924 crc: unsigned_int(bitwise=True)
925 /
926
927 Compute CRC-CCITT incrementally.
928 [clinic start generated code]*/
929
930 static unsigned int
binascii_crc_hqx_impl(PyObject * module,Py_buffer * data,unsigned int crc)931 binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
932 /*[clinic end generated code: output=8ec2a78590d19170 input=f18240ff8c705b79]*/
933 {
934 const unsigned char *bin_data;
935 Py_ssize_t len;
936
937 crc &= 0xffff;
938 bin_data = data->buf;
939 len = data->len;
940
941 while(len-- > 0) {
942 crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
943 }
944
945 return crc;
946 }
947
948 #ifndef USE_ZLIB_CRC32
949 /* Crc - 32 BIT ANSI X3.66 CRC checksum files
950 Also known as: ISO 3307
951 **********************************************************************|
952 * *|
953 * Demonstration program to compute the 32-bit CRC used as the frame *|
954 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
955 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
956 * protocol). The 32-bit FCS was added via the Federal Register, *|
957 * 1 June 1982, p.23798. I presume but don't know for certain that *|
958 * this polynomial is or will be included in CCITT V.41, which *|
959 * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
960 * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
961 * errors by a factor of 10^-5 over 16-bit FCS. *|
962 * *|
963 **********************************************************************|
964
965 Copyright (C) 1986 Gary S. Brown. You may use this program, or
966 code or tables extracted from it, as desired without restriction.
967
968 First, the polynomial itself and its table of feedback terms. The
969 polynomial is
970 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
971 Note that we take it "backwards" and put the highest-order term in
972 the lowest-order bit. The X^32 term is "implied"; the LSB is the
973 X^31 term, etc. The X^0 term (usually shown as "+1") results in
974 the MSB being 1.
975
976 Note that the usual hardware shift register implementation, which
977 is what we're using (we're merely optimizing it by doing eight-bit
978 chunks at a time) shifts bits into the lowest-order term. In our
979 implementation, that means shifting towards the right. Why do we
980 do it this way? Because the calculated CRC must be transmitted in
981 order from highest-order term to lowest-order term. UARTs transmit
982 characters in order from LSB to MSB. By storing the CRC this way,
983 we hand it to the UART in the order low-byte to high-byte; the UART
984 sends each low-bit to hight-bit; and the result is transmission bit
985 by bit from highest- to lowest-order term without requiring any bit
986 shuffling on our part. Reception works similarly.
987
988 The feedback terms table consists of 256, 32-bit entries. Notes:
989
990 1. The table can be generated at runtime if desired; code to do so
991 is shown later. It might not be obvious, but the feedback
992 terms simply represent the results of eight shift/xor opera-
993 tions for all combinations of data and CRC register values.
994
995 2. The CRC accumulation logic is the same for all CRC polynomials,
996 be they sixteen or thirty-two bits wide. You simply choose the
997 appropriate table. Alternatively, because the table can be
998 generated at runtime, you can start by generating the table for
999 the polynomial in question and use exactly the same "updcrc",
1000 if your application needn't simultaneously handle two CRC
1001 polynomials. (Note, however, that XMODEM is strange.)
1002
1003 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
1004 of course, 32-bit entries work OK if the high 16 bits are zero.
1005
1006 4. The values must be right-shifted by eight bits by the "updcrc"
1007 logic; the shift must be unsigned (bring in zeroes). On some
1008 hardware you could probably optimize the shift in assembler by
1009 using byte-swap instructions.
1010 ********************************************************************/
1011
1012 static const unsigned int crc_32_tab[256] = {
1013 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
1014 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
1015 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
1016 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
1017 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
1018 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
1019 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
1020 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
1021 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
1022 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
1023 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
1024 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
1025 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
1026 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
1027 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
1028 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
1029 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
1030 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
1031 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
1032 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
1033 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
1034 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
1035 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
1036 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
1037 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
1038 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
1039 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
1040 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
1041 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
1042 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
1043 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
1044 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
1045 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
1046 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
1047 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
1048 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
1049 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
1050 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
1051 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
1052 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
1053 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
1054 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
1055 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
1056 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
1057 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
1058 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
1059 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
1060 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
1061 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
1062 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
1063 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
1064 0x2d02ef8dU
1065 };
1066 #endif /* USE_ZLIB_CRC32 */
1067
1068 /*[clinic input]
1069 binascii.crc32 -> unsigned_int
1070
1071 data: Py_buffer
1072 crc: unsigned_int(bitwise=True) = 0
1073 /
1074
1075 Compute CRC-32 incrementally.
1076 [clinic start generated code]*/
1077
1078 static unsigned int
binascii_crc32_impl(PyObject * module,Py_buffer * data,unsigned int crc)1079 binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
1080 /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
1081
1082 #ifdef USE_ZLIB_CRC32
1083 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
1084 {
1085 const Byte *buf;
1086 Py_ssize_t len;
1087 int signed_val;
1088
1089 buf = (Byte*)data->buf;
1090 len = data->len;
1091 signed_val = crc32(crc, buf, len);
1092 return (unsigned int)signed_val & 0xffffffffU;
1093 }
1094 #else /* USE_ZLIB_CRC32 */
1095 { /* By Jim Ahlstrom; All rights transferred to CNRI */
1096 const unsigned char *bin_data;
1097 Py_ssize_t len;
1098 unsigned int result;
1099
1100 bin_data = data->buf;
1101 len = data->len;
1102
1103 crc = ~ crc;
1104 while (len-- > 0) {
1105 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1106 /* Note: (crc >> 8) MUST zero fill on left */
1107 }
1108
1109 result = (crc ^ 0xFFFFFFFF);
1110 return result & 0xffffffff;
1111 }
1112 #endif /* USE_ZLIB_CRC32 */
1113
1114 /*[clinic input]
1115 binascii.b2a_hex
1116
1117 data: Py_buffer
1118 /
1119
1120 Hexadecimal representation of binary data.
1121
1122 The return value is a bytes object. This function is also
1123 available as "hexlify()".
1124 [clinic start generated code]*/
1125
1126 static PyObject *
binascii_b2a_hex_impl(PyObject * module,Py_buffer * data)1127 binascii_b2a_hex_impl(PyObject *module, Py_buffer *data)
1128 /*[clinic end generated code: output=92fec1a95c9897a0 input=96423cfa299ff3b1]*/
1129 {
1130 return _Py_strhex_bytes((const char *)data->buf, data->len);
1131 }
1132
1133 /*[clinic input]
1134 binascii.hexlify = binascii.b2a_hex
1135
1136 Hexadecimal representation of binary data.
1137
1138 The return value is a bytes object.
1139 [clinic start generated code]*/
1140
1141 static PyObject *
binascii_hexlify_impl(PyObject * module,Py_buffer * data)1142 binascii_hexlify_impl(PyObject *module, Py_buffer *data)
1143 /*[clinic end generated code: output=749e95e53c14880c input=2e3afae7f083f061]*/
1144 {
1145 return _Py_strhex_bytes((const char *)data->buf, data->len);
1146 }
1147
1148 static int
to_int(int c)1149 to_int(int c)
1150 {
1151 if (Py_ISDIGIT(c))
1152 return c - '0';
1153 else {
1154 if (Py_ISUPPER(c))
1155 c = Py_TOLOWER(c);
1156 if (c >= 'a' && c <= 'f')
1157 return c - 'a' + 10;
1158 }
1159 return -1;
1160 }
1161
1162
1163 /*[clinic input]
1164 binascii.a2b_hex
1165
1166 hexstr: ascii_buffer
1167 /
1168
1169 Binary data of hexadecimal representation.
1170
1171 hexstr must contain an even number of hex digits (upper or lower case).
1172 This function is also available as "unhexlify()".
1173 [clinic start generated code]*/
1174
1175 static PyObject *
binascii_a2b_hex_impl(PyObject * module,Py_buffer * hexstr)1176 binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
1177 /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
1178 {
1179 const char* argbuf;
1180 Py_ssize_t arglen;
1181 PyObject *retval;
1182 char* retbuf;
1183 Py_ssize_t i, j;
1184
1185 argbuf = hexstr->buf;
1186 arglen = hexstr->len;
1187
1188 assert(arglen >= 0);
1189
1190 /* XXX What should we do about strings with an odd length? Should
1191 * we add an implicit leading zero, or a trailing zero? For now,
1192 * raise an exception.
1193 */
1194 if (arglen % 2) {
1195 PyErr_SetString(Error, "Odd-length string");
1196 return NULL;
1197 }
1198
1199 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
1200 if (!retval)
1201 return NULL;
1202 retbuf = PyBytes_AS_STRING(retval);
1203
1204 for (i=j=0; i < arglen; i += 2) {
1205 int top = to_int(Py_CHARMASK(argbuf[i]));
1206 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1207 if (top == -1 || bot == -1) {
1208 PyErr_SetString(Error,
1209 "Non-hexadecimal digit found");
1210 goto finally;
1211 }
1212 retbuf[j++] = (top << 4) + bot;
1213 }
1214 return retval;
1215
1216 finally:
1217 Py_DECREF(retval);
1218 return NULL;
1219 }
1220
1221 /*[clinic input]
1222 binascii.unhexlify = binascii.a2b_hex
1223
1224 Binary data of hexadecimal representation.
1225
1226 hexstr must contain an even number of hex digits (upper or lower case).
1227 [clinic start generated code]*/
1228
1229 static PyObject *
binascii_unhexlify_impl(PyObject * module,Py_buffer * hexstr)1230 binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
1231 /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
1232 {
1233 return binascii_a2b_hex_impl(module, hexstr);
1234 }
1235
1236 static const int table_hex[128] = {
1237 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1238 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1239 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1240 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1241 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1242 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1243 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1244 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1245 };
1246
1247 #define hexval(c) table_hex[(unsigned int)(c)]
1248
1249 #define MAXLINESIZE 76
1250
1251
1252 /*[clinic input]
1253 binascii.a2b_qp
1254
1255 data: ascii_buffer
1256 header: bool(accept={int}) = False
1257
1258 Decode a string of qp-encoded data.
1259 [clinic start generated code]*/
1260
1261 static PyObject *
binascii_a2b_qp_impl(PyObject * module,Py_buffer * data,int header)1262 binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
1263 /*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
1264 {
1265 Py_ssize_t in, out;
1266 char ch;
1267 const unsigned char *ascii_data;
1268 unsigned char *odata;
1269 Py_ssize_t datalen = 0;
1270 PyObject *rv;
1271
1272 ascii_data = data->buf;
1273 datalen = data->len;
1274
1275 /* We allocate the output same size as input, this is overkill.
1276 * The previous implementation used calloc() so we'll zero out the
1277 * memory here too, since PyMem_Malloc() does not guarantee that.
1278 */
1279 odata = (unsigned char *) PyMem_Malloc(datalen);
1280 if (odata == NULL) {
1281 PyErr_NoMemory();
1282 return NULL;
1283 }
1284 memset(odata, 0, datalen);
1285
1286 in = out = 0;
1287 while (in < datalen) {
1288 if (ascii_data[in] == '=') {
1289 in++;
1290 if (in >= datalen) break;
1291 /* Soft line breaks */
1292 if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
1293 if (ascii_data[in] != '\n') {
1294 while (in < datalen && ascii_data[in] != '\n') in++;
1295 }
1296 if (in < datalen) in++;
1297 }
1298 else if (ascii_data[in] == '=') {
1299 /* broken case from broken python qp */
1300 odata[out++] = '=';
1301 in++;
1302 }
1303 else if ((in + 1 < datalen) &&
1304 ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
1305 (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1306 (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1307 ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1308 (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1309 (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
1310 /* hexval */
1311 ch = hexval(ascii_data[in]) << 4;
1312 in++;
1313 ch |= hexval(ascii_data[in]);
1314 in++;
1315 odata[out++] = ch;
1316 }
1317 else {
1318 odata[out++] = '=';
1319 }
1320 }
1321 else if (header && ascii_data[in] == '_') {
1322 odata[out++] = ' ';
1323 in++;
1324 }
1325 else {
1326 odata[out] = ascii_data[in];
1327 in++;
1328 out++;
1329 }
1330 }
1331 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1332 PyMem_Free(odata);
1333 return NULL;
1334 }
1335 PyMem_Free(odata);
1336 return rv;
1337 }
1338
1339 static int
to_hex(unsigned char ch,unsigned char * s)1340 to_hex (unsigned char ch, unsigned char *s)
1341 {
1342 unsigned int uvalue = ch;
1343
1344 s[1] = "0123456789ABCDEF"[uvalue % 16];
1345 uvalue = (uvalue / 16);
1346 s[0] = "0123456789ABCDEF"[uvalue % 16];
1347 return 0;
1348 }
1349
1350 /* XXX: This is ridiculously complicated to be backward compatible
1351 * (mostly) with the quopri module. It doesn't re-create the quopri
1352 * module bug where text ending in CRLF has the CR encoded */
1353
1354 /*[clinic input]
1355 binascii.b2a_qp
1356
1357 data: Py_buffer
1358 quotetabs: bool(accept={int}) = False
1359 istext: bool(accept={int}) = True
1360 header: bool(accept={int}) = False
1361
1362 Encode a string using quoted-printable encoding.
1363
1364 On encoding, when istext is set, newlines are not encoded, and white
1365 space at end of lines is. When istext is not set, \r and \n (CR/LF)
1366 are both encoded. When quotetabs is set, space and tabs are encoded.
1367 [clinic start generated code]*/
1368
1369 static PyObject *
binascii_b2a_qp_impl(PyObject * module,Py_buffer * data,int quotetabs,int istext,int header)1370 binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
1371 int istext, int header)
1372 /*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
1373 {
1374 Py_ssize_t in, out;
1375 const unsigned char *databuf;
1376 unsigned char *odata;
1377 Py_ssize_t datalen = 0, odatalen = 0;
1378 PyObject *rv;
1379 unsigned int linelen = 0;
1380 unsigned char ch;
1381 int crlf = 0;
1382 const unsigned char *p;
1383
1384 databuf = data->buf;
1385 datalen = data->len;
1386
1387 /* See if this string is using CRLF line ends */
1388 /* XXX: this function has the side effect of converting all of
1389 * the end of lines to be the same depending on this detection
1390 * here */
1391 p = (const unsigned char *) memchr(databuf, '\n', datalen);
1392 if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
1393 crlf = 1;
1394
1395 /* First, scan to see how many characters need to be encoded */
1396 in = 0;
1397 while (in < datalen) {
1398 Py_ssize_t delta = 0;
1399 if ((databuf[in] > 126) ||
1400 (databuf[in] == '=') ||
1401 (header && databuf[in] == '_') ||
1402 ((databuf[in] == '.') && (linelen == 0) &&
1403 (in + 1 == datalen || databuf[in+1] == '\n' ||
1404 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1405 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1406 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1407 ((databuf[in] < 33) &&
1408 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1409 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1410 {
1411 if ((linelen + 3) >= MAXLINESIZE) {
1412 linelen = 0;
1413 if (crlf)
1414 delta += 3;
1415 else
1416 delta += 2;
1417 }
1418 linelen += 3;
1419 delta += 3;
1420 in++;
1421 }
1422 else {
1423 if (istext &&
1424 ((databuf[in] == '\n') ||
1425 ((in+1 < datalen) && (databuf[in] == '\r') &&
1426 (databuf[in+1] == '\n'))))
1427 {
1428 linelen = 0;
1429 /* Protect against whitespace on end of line */
1430 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
1431 delta += 2;
1432 if (crlf)
1433 delta += 2;
1434 else
1435 delta += 1;
1436 if (databuf[in] == '\r')
1437 in += 2;
1438 else
1439 in++;
1440 }
1441 else {
1442 if ((in + 1 != datalen) &&
1443 (databuf[in+1] != '\n') &&
1444 (linelen + 1) >= MAXLINESIZE) {
1445 linelen = 0;
1446 if (crlf)
1447 delta += 3;
1448 else
1449 delta += 2;
1450 }
1451 linelen++;
1452 delta++;
1453 in++;
1454 }
1455 }
1456 if (PY_SSIZE_T_MAX - delta < odatalen) {
1457 PyErr_NoMemory();
1458 return NULL;
1459 }
1460 odatalen += delta;
1461 }
1462
1463 /* We allocate the output same size as input, this is overkill.
1464 * The previous implementation used calloc() so we'll zero out the
1465 * memory here too, since PyMem_Malloc() does not guarantee that.
1466 */
1467 odata = (unsigned char *) PyMem_Malloc(odatalen);
1468 if (odata == NULL) {
1469 PyErr_NoMemory();
1470 return NULL;
1471 }
1472 memset(odata, 0, odatalen);
1473
1474 in = out = linelen = 0;
1475 while (in < datalen) {
1476 if ((databuf[in] > 126) ||
1477 (databuf[in] == '=') ||
1478 (header && databuf[in] == '_') ||
1479 ((databuf[in] == '.') && (linelen == 0) &&
1480 (in + 1 == datalen || databuf[in+1] == '\n' ||
1481 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1482 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1483 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1484 ((databuf[in] < 33) &&
1485 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1486 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1487 {
1488 if ((linelen + 3 )>= MAXLINESIZE) {
1489 odata[out++] = '=';
1490 if (crlf) odata[out++] = '\r';
1491 odata[out++] = '\n';
1492 linelen = 0;
1493 }
1494 odata[out++] = '=';
1495 to_hex(databuf[in], &odata[out]);
1496 out += 2;
1497 in++;
1498 linelen += 3;
1499 }
1500 else {
1501 if (istext &&
1502 ((databuf[in] == '\n') ||
1503 ((in+1 < datalen) && (databuf[in] == '\r') &&
1504 (databuf[in+1] == '\n'))))
1505 {
1506 linelen = 0;
1507 /* Protect against whitespace on end of line */
1508 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1509 ch = odata[out-1];
1510 odata[out-1] = '=';
1511 to_hex(ch, &odata[out]);
1512 out += 2;
1513 }
1514
1515 if (crlf) odata[out++] = '\r';
1516 odata[out++] = '\n';
1517 if (databuf[in] == '\r')
1518 in += 2;
1519 else
1520 in++;
1521 }
1522 else {
1523 if ((in + 1 != datalen) &&
1524 (databuf[in+1] != '\n') &&
1525 (linelen + 1) >= MAXLINESIZE) {
1526 odata[out++] = '=';
1527 if (crlf) odata[out++] = '\r';
1528 odata[out++] = '\n';
1529 linelen = 0;
1530 }
1531 linelen++;
1532 if (header && databuf[in] == ' ') {
1533 odata[out++] = '_';
1534 in++;
1535 }
1536 else {
1537 odata[out++] = databuf[in++];
1538 }
1539 }
1540 }
1541 }
1542 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1543 PyMem_Free(odata);
1544 return NULL;
1545 }
1546 PyMem_Free(odata);
1547 return rv;
1548 }
1549
1550 /* List of functions defined in the module */
1551
1552 static struct PyMethodDef binascii_module_methods[] = {
1553 BINASCII_A2B_UU_METHODDEF
1554 BINASCII_B2A_UU_METHODDEF
1555 BINASCII_A2B_BASE64_METHODDEF
1556 BINASCII_B2A_BASE64_METHODDEF
1557 BINASCII_A2B_HQX_METHODDEF
1558 BINASCII_B2A_HQX_METHODDEF
1559 BINASCII_A2B_HEX_METHODDEF
1560 BINASCII_B2A_HEX_METHODDEF
1561 BINASCII_HEXLIFY_METHODDEF
1562 BINASCII_UNHEXLIFY_METHODDEF
1563 BINASCII_RLECODE_HQX_METHODDEF
1564 BINASCII_RLEDECODE_HQX_METHODDEF
1565 BINASCII_CRC_HQX_METHODDEF
1566 BINASCII_CRC32_METHODDEF
1567 BINASCII_A2B_QP_METHODDEF
1568 BINASCII_B2A_QP_METHODDEF
1569 {NULL, NULL} /* sentinel */
1570 };
1571
1572
1573 /* Initialization function for the module (*must* be called PyInit_binascii) */
1574 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1575
1576
1577 static struct PyModuleDef binasciimodule = {
1578 PyModuleDef_HEAD_INIT,
1579 "binascii",
1580 doc_binascii,
1581 -1,
1582 binascii_module_methods,
1583 NULL,
1584 NULL,
1585 NULL,
1586 NULL
1587 };
1588
1589 PyMODINIT_FUNC
PyInit_binascii(void)1590 PyInit_binascii(void)
1591 {
1592 PyObject *m, *d;
1593
1594 /* Create the module and add the functions */
1595 m = PyModule_Create(&binasciimodule);
1596 if (m == NULL)
1597 return NULL;
1598
1599 d = PyModule_GetDict(m);
1600
1601 Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1602 PyDict_SetItemString(d, "Error", Error);
1603 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1604 PyDict_SetItemString(d, "Incomplete", Incomplete);
1605 if (PyErr_Occurred()) {
1606 Py_DECREF(m);
1607 m = NULL;
1608 }
1609 return m;
1610 }
1611