1 /* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> CodecInfo object
14
15 The builtin Unicode codecs use the following interface:
16
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
19
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
22
23 These <encoding>s are available: utf_8, unicode_escape,
24 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
25 mbcs (on win32).
26
27
28 Written by Marc-Andre Lemburg (mal@lemburg.com).
29
30 Copyright (c) Corporation for National Research Initiatives.
31
32 ------------------------------------------------------------------------ */
33
34 #define PY_SSIZE_T_CLEAN
35 #include "Python.h"
36
37 #ifdef MS_WINDOWS
38 #include <windows.h>
39 #endif
40
41 /*[clinic input]
42 module _codecs
43 [clinic start generated code]*/
44 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
45
46 #include "clinic/_codecsmodule.c.h"
47
48 /* --- Registry ----------------------------------------------------------- */
49
50 /*[clinic input]
51 _codecs.register
52 search_function: object
53 /
54
55 Register a codec search function.
56
57 Search functions are expected to take one argument, the encoding name in
58 all lower case letters, and either return None, or a tuple of functions
59 (encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
60 [clinic start generated code]*/
61
62 static PyObject *
_codecs_register(PyObject * module,PyObject * search_function)63 _codecs_register(PyObject *module, PyObject *search_function)
64 /*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
65 {
66 if (PyCodec_Register(search_function))
67 return NULL;
68
69 Py_RETURN_NONE;
70 }
71
72 /*[clinic input]
73 _codecs.lookup
74 encoding: str
75 /
76
77 Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
78 [clinic start generated code]*/
79
80 static PyObject *
_codecs_lookup_impl(PyObject * module,const char * encoding)81 _codecs_lookup_impl(PyObject *module, const char *encoding)
82 /*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
83 {
84 return _PyCodec_Lookup(encoding);
85 }
86
87 /*[clinic input]
88 _codecs.encode
89 obj: object
90 encoding: str(c_default="NULL") = "utf-8"
91 errors: str(c_default="NULL") = "strict"
92
93 Encodes obj using the codec registered for encoding.
94
95 The default encoding is 'utf-8'. errors may be given to set a
96 different error handling scheme. Default is 'strict' meaning that encoding
97 errors raise a ValueError. Other possible values are 'ignore', 'replace'
98 and 'backslashreplace' as well as any other name registered with
99 codecs.register_error that can handle ValueErrors.
100 [clinic start generated code]*/
101
102 static PyObject *
_codecs_encode_impl(PyObject * module,PyObject * obj,const char * encoding,const char * errors)103 _codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
104 const char *errors)
105 /*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
106 {
107 if (encoding == NULL)
108 encoding = PyUnicode_GetDefaultEncoding();
109
110 /* Encode via the codec registry */
111 return PyCodec_Encode(obj, encoding, errors);
112 }
113
114 /*[clinic input]
115 _codecs.decode
116 obj: object
117 encoding: str(c_default="NULL") = "utf-8"
118 errors: str(c_default="NULL") = "strict"
119
120 Decodes obj using the codec registered for encoding.
121
122 Default encoding is 'utf-8'. errors may be given to set a
123 different error handling scheme. Default is 'strict' meaning that encoding
124 errors raise a ValueError. Other possible values are 'ignore', 'replace'
125 and 'backslashreplace' as well as any other name registered with
126 codecs.register_error that can handle ValueErrors.
127 [clinic start generated code]*/
128
129 static PyObject *
_codecs_decode_impl(PyObject * module,PyObject * obj,const char * encoding,const char * errors)130 _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
131 const char *errors)
132 /*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
133 {
134 if (encoding == NULL)
135 encoding = PyUnicode_GetDefaultEncoding();
136
137 /* Decode via the codec registry */
138 return PyCodec_Decode(obj, encoding, errors);
139 }
140
141 /* --- Helpers ------------------------------------------------------------ */
142
143 /*[clinic input]
144 _codecs._forget_codec
145
146 encoding: str
147 /
148
149 Purge the named codec from the internal codec lookup cache
150 [clinic start generated code]*/
151
152 static PyObject *
_codecs__forget_codec_impl(PyObject * module,const char * encoding)153 _codecs__forget_codec_impl(PyObject *module, const char *encoding)
154 /*[clinic end generated code: output=0bde9f0a5b084aa2 input=18d5d92d0e386c38]*/
155 {
156 if (_PyCodec_Forget(encoding) < 0) {
157 return NULL;
158 };
159 Py_RETURN_NONE;
160 }
161
162 static
codec_tuple(PyObject * decoded,Py_ssize_t len)163 PyObject *codec_tuple(PyObject *decoded,
164 Py_ssize_t len)
165 {
166 if (decoded == NULL)
167 return NULL;
168 return Py_BuildValue("Nn", decoded, len);
169 }
170
171 /* --- String codecs ------------------------------------------------------ */
172 /*[clinic input]
173 _codecs.escape_decode
174 data: Py_buffer(accept={str, buffer})
175 errors: str(accept={str, NoneType}) = NULL
176 /
177 [clinic start generated code]*/
178
179 static PyObject *
_codecs_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors)180 _codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
181 const char *errors)
182 /*[clinic end generated code: output=505200ba8056979a input=0018edfd99db714d]*/
183 {
184 PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
185 errors, 0, NULL);
186 return codec_tuple(decoded, data->len);
187 }
188
189 /*[clinic input]
190 _codecs.escape_encode
191 data: object(subclass_of='&PyBytes_Type')
192 errors: str(accept={str, NoneType}) = NULL
193 /
194 [clinic start generated code]*/
195
196 static PyObject *
_codecs_escape_encode_impl(PyObject * module,PyObject * data,const char * errors)197 _codecs_escape_encode_impl(PyObject *module, PyObject *data,
198 const char *errors)
199 /*[clinic end generated code: output=4af1d477834bab34 input=da9ded00992f32f2]*/
200 {
201 Py_ssize_t size;
202 Py_ssize_t newsize;
203 PyObject *v;
204
205 size = PyBytes_GET_SIZE(data);
206 if (size > PY_SSIZE_T_MAX / 4) {
207 PyErr_SetString(PyExc_OverflowError,
208 "string is too large to encode");
209 return NULL;
210 }
211 newsize = 4*size;
212 v = PyBytes_FromStringAndSize(NULL, newsize);
213
214 if (v == NULL) {
215 return NULL;
216 }
217 else {
218 Py_ssize_t i;
219 char c;
220 char *p = PyBytes_AS_STRING(v);
221
222 for (i = 0; i < size; i++) {
223 /* There's at least enough room for a hex escape */
224 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
225 c = PyBytes_AS_STRING(data)[i];
226 if (c == '\'' || c == '\\')
227 *p++ = '\\', *p++ = c;
228 else if (c == '\t')
229 *p++ = '\\', *p++ = 't';
230 else if (c == '\n')
231 *p++ = '\\', *p++ = 'n';
232 else if (c == '\r')
233 *p++ = '\\', *p++ = 'r';
234 else if (c < ' ' || c >= 0x7f) {
235 *p++ = '\\';
236 *p++ = 'x';
237 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
238 *p++ = Py_hexdigits[c & 0xf];
239 }
240 else
241 *p++ = c;
242 }
243 *p = '\0';
244 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
245 return NULL;
246 }
247 }
248
249 return codec_tuple(v, size);
250 }
251
252 /* --- Decoder ------------------------------------------------------------ */
253 /*[clinic input]
254 _codecs.unicode_internal_decode
255 obj: object
256 errors: str(accept={str, NoneType}) = NULL
257 /
258 [clinic start generated code]*/
259
260 static PyObject *
_codecs_unicode_internal_decode_impl(PyObject * module,PyObject * obj,const char * errors)261 _codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj,
262 const char *errors)
263 /*[clinic end generated code: output=edbfe175e09eff9a input=8d57930aeda170c6]*/
264 {
265 if (PyUnicode_Check(obj)) {
266 if (PyUnicode_READY(obj) < 0)
267 return NULL;
268 Py_INCREF(obj);
269 return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
270 }
271 else {
272 Py_buffer view;
273 PyObject *result;
274 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
275 return NULL;
276
277 result = codec_tuple(
278 _PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors),
279 view.len);
280 PyBuffer_Release(&view);
281 return result;
282 }
283 }
284
285 /*[clinic input]
286 _codecs.utf_7_decode
287 data: Py_buffer
288 errors: str(accept={str, NoneType}) = NULL
289 final: bool(accept={int}) = False
290 /
291 [clinic start generated code]*/
292
293 static PyObject *
_codecs_utf_7_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)294 _codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
295 const char *errors, int final)
296 /*[clinic end generated code: output=0cd3a944a32a4089 input=2d94a5a1f170c8ae]*/
297 {
298 Py_ssize_t consumed = data->len;
299 PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
300 errors,
301 final ? NULL : &consumed);
302 return codec_tuple(decoded, consumed);
303 }
304
305 /*[clinic input]
306 _codecs.utf_8_decode
307 data: Py_buffer
308 errors: str(accept={str, NoneType}) = NULL
309 final: bool(accept={int}) = False
310 /
311 [clinic start generated code]*/
312
313 static PyObject *
_codecs_utf_8_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)314 _codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
315 const char *errors, int final)
316 /*[clinic end generated code: output=10f74dec8d9bb8bf input=1ea6c21492e8bcbe]*/
317 {
318 Py_ssize_t consumed = data->len;
319 PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
320 errors,
321 final ? NULL : &consumed);
322 return codec_tuple(decoded, consumed);
323 }
324
325 /*[clinic input]
326 _codecs.utf_16_decode
327 data: Py_buffer
328 errors: str(accept={str, NoneType}) = NULL
329 final: bool(accept={int}) = False
330 /
331 [clinic start generated code]*/
332
333 static PyObject *
_codecs_utf_16_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)334 _codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
335 const char *errors, int final)
336 /*[clinic end generated code: output=783b442abcbcc2d0 input=2ba128c28ea0bb40]*/
337 {
338 int byteorder = 0;
339 /* This is overwritten unless final is true. */
340 Py_ssize_t consumed = data->len;
341 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
342 errors, &byteorder,
343 final ? NULL : &consumed);
344 return codec_tuple(decoded, consumed);
345 }
346
347 /*[clinic input]
348 _codecs.utf_16_le_decode
349 data: Py_buffer
350 errors: str(accept={str, NoneType}) = NULL
351 final: bool(accept={int}) = False
352 /
353 [clinic start generated code]*/
354
355 static PyObject *
_codecs_utf_16_le_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)356 _codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
357 const char *errors, int final)
358 /*[clinic end generated code: output=899b9e6364379dcd input=43aeb8b0461cace5]*/
359 {
360 int byteorder = -1;
361 /* This is overwritten unless final is true. */
362 Py_ssize_t consumed = data->len;
363 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
364 errors, &byteorder,
365 final ? NULL : &consumed);
366 return codec_tuple(decoded, consumed);
367 }
368
369 /*[clinic input]
370 _codecs.utf_16_be_decode
371 data: Py_buffer
372 errors: str(accept={str, NoneType}) = NULL
373 final: bool(accept={int}) = False
374 /
375 [clinic start generated code]*/
376
377 static PyObject *
_codecs_utf_16_be_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)378 _codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
379 const char *errors, int final)
380 /*[clinic end generated code: output=49f6465ea07669c8 input=339e554c804f34b2]*/
381 {
382 int byteorder = 1;
383 /* This is overwritten unless final is true. */
384 Py_ssize_t consumed = data->len;
385 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
386 errors, &byteorder,
387 final ? NULL : &consumed);
388 return codec_tuple(decoded, consumed);
389 }
390
391 /* This non-standard version also provides access to the byteorder
392 parameter of the builtin UTF-16 codec.
393
394 It returns a tuple (unicode, bytesread, byteorder) with byteorder
395 being the value in effect at the end of data.
396
397 */
398 /*[clinic input]
399 _codecs.utf_16_ex_decode
400 data: Py_buffer
401 errors: str(accept={str, NoneType}) = NULL
402 byteorder: int = 0
403 final: bool(accept={int}) = False
404 /
405 [clinic start generated code]*/
406
407 static PyObject *
_codecs_utf_16_ex_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int byteorder,int final)408 _codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
409 const char *errors, int byteorder, int final)
410 /*[clinic end generated code: output=0f385f251ecc1988 input=3201aeddb9636889]*/
411 {
412 /* This is overwritten unless final is true. */
413 Py_ssize_t consumed = data->len;
414
415 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
416 errors, &byteorder,
417 final ? NULL : &consumed);
418 if (decoded == NULL)
419 return NULL;
420 return Py_BuildValue("Nni", decoded, consumed, byteorder);
421 }
422
423 /*[clinic input]
424 _codecs.utf_32_decode
425 data: Py_buffer
426 errors: str(accept={str, NoneType}) = NULL
427 final: bool(accept={int}) = False
428 /
429 [clinic start generated code]*/
430
431 static PyObject *
_codecs_utf_32_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)432 _codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
433 const char *errors, int final)
434 /*[clinic end generated code: output=2fc961807f7b145f input=155a5c673a4e2514]*/
435 {
436 int byteorder = 0;
437 /* This is overwritten unless final is true. */
438 Py_ssize_t consumed = data->len;
439 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
440 errors, &byteorder,
441 final ? NULL : &consumed);
442 return codec_tuple(decoded, consumed);
443 }
444
445 /*[clinic input]
446 _codecs.utf_32_le_decode
447 data: Py_buffer
448 errors: str(accept={str, NoneType}) = NULL
449 final: bool(accept={int}) = False
450 /
451 [clinic start generated code]*/
452
453 static PyObject *
_codecs_utf_32_le_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)454 _codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
455 const char *errors, int final)
456 /*[clinic end generated code: output=ec8f46b67a94f3e6 input=7baf061069e92d3b]*/
457 {
458 int byteorder = -1;
459 /* This is overwritten unless final is true. */
460 Py_ssize_t consumed = data->len;
461 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
462 errors, &byteorder,
463 final ? NULL : &consumed);
464 return codec_tuple(decoded, consumed);
465 }
466
467 /*[clinic input]
468 _codecs.utf_32_be_decode
469 data: Py_buffer
470 errors: str(accept={str, NoneType}) = NULL
471 final: bool(accept={int}) = False
472 /
473 [clinic start generated code]*/
474
475 static PyObject *
_codecs_utf_32_be_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)476 _codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
477 const char *errors, int final)
478 /*[clinic end generated code: output=ff82bae862c92c4e input=b182026300dae595]*/
479 {
480 int byteorder = 1;
481 /* This is overwritten unless final is true. */
482 Py_ssize_t consumed = data->len;
483 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
484 errors, &byteorder,
485 final ? NULL : &consumed);
486 return codec_tuple(decoded, consumed);
487 }
488
489 /* This non-standard version also provides access to the byteorder
490 parameter of the builtin UTF-32 codec.
491
492 It returns a tuple (unicode, bytesread, byteorder) with byteorder
493 being the value in effect at the end of data.
494
495 */
496 /*[clinic input]
497 _codecs.utf_32_ex_decode
498 data: Py_buffer
499 errors: str(accept={str, NoneType}) = NULL
500 byteorder: int = 0
501 final: bool(accept={int}) = False
502 /
503 [clinic start generated code]*/
504
505 static PyObject *
_codecs_utf_32_ex_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int byteorder,int final)506 _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
507 const char *errors, int byteorder, int final)
508 /*[clinic end generated code: output=6bfb177dceaf4848 input=7b9c2cb819fb237a]*/
509 {
510 Py_ssize_t consumed = data->len;
511 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
512 errors, &byteorder,
513 final ? NULL : &consumed);
514 if (decoded == NULL)
515 return NULL;
516 return Py_BuildValue("Nni", decoded, consumed, byteorder);
517 }
518
519 /*[clinic input]
520 _codecs.unicode_escape_decode
521 data: Py_buffer(accept={str, buffer})
522 errors: str(accept={str, NoneType}) = NULL
523 /
524 [clinic start generated code]*/
525
526 static PyObject *
_codecs_unicode_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors)527 _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
528 const char *errors)
529 /*[clinic end generated code: output=3ca3c917176b82ab input=49fd27d06813a7f5]*/
530 {
531 PyObject *decoded = PyUnicode_DecodeUnicodeEscape(data->buf, data->len,
532 errors);
533 return codec_tuple(decoded, data->len);
534 }
535
536 /*[clinic input]
537 _codecs.raw_unicode_escape_decode
538 data: Py_buffer(accept={str, buffer})
539 errors: str(accept={str, NoneType}) = NULL
540 /
541 [clinic start generated code]*/
542
543 static PyObject *
_codecs_raw_unicode_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors)544 _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
545 const char *errors)
546 /*[clinic end generated code: output=c98eeb56028070a6 input=770903a211434ebc]*/
547 {
548 PyObject *decoded = PyUnicode_DecodeRawUnicodeEscape(data->buf, data->len,
549 errors);
550 return codec_tuple(decoded, data->len);
551 }
552
553 /*[clinic input]
554 _codecs.latin_1_decode
555 data: Py_buffer
556 errors: str(accept={str, NoneType}) = NULL
557 /
558 [clinic start generated code]*/
559
560 static PyObject *
_codecs_latin_1_decode_impl(PyObject * module,Py_buffer * data,const char * errors)561 _codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
562 const char *errors)
563 /*[clinic end generated code: output=07f3dfa3f72c7d8f input=5cad0f1759c618ec]*/
564 {
565 PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
566 return codec_tuple(decoded, data->len);
567 }
568
569 /*[clinic input]
570 _codecs.ascii_decode
571 data: Py_buffer
572 errors: str(accept={str, NoneType}) = NULL
573 /
574 [clinic start generated code]*/
575
576 static PyObject *
_codecs_ascii_decode_impl(PyObject * module,Py_buffer * data,const char * errors)577 _codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
578 const char *errors)
579 /*[clinic end generated code: output=2627d72058d42429 input=ad1106f64037bd16]*/
580 {
581 PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
582 return codec_tuple(decoded, data->len);
583 }
584
585 /*[clinic input]
586 _codecs.charmap_decode
587 data: Py_buffer
588 errors: str(accept={str, NoneType}) = NULL
589 mapping: object = NULL
590 /
591 [clinic start generated code]*/
592
593 static PyObject *
_codecs_charmap_decode_impl(PyObject * module,Py_buffer * data,const char * errors,PyObject * mapping)594 _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
595 const char *errors, PyObject *mapping)
596 /*[clinic end generated code: output=2c335b09778cf895 input=19712ca35c5a80e2]*/
597 {
598 PyObject *decoded;
599
600 if (mapping == Py_None)
601 mapping = NULL;
602
603 decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
604 return codec_tuple(decoded, data->len);
605 }
606
607 #ifdef MS_WINDOWS
608
609 /*[clinic input]
610 _codecs.mbcs_decode
611 data: Py_buffer
612 errors: str(accept={str, NoneType}) = NULL
613 final: bool(accept={int}) = False
614 /
615 [clinic start generated code]*/
616
617 static PyObject *
_codecs_mbcs_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)618 _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
619 const char *errors, int final)
620 /*[clinic end generated code: output=39b65b8598938c4b input=b5f2fe568f311297]*/
621 {
622 Py_ssize_t consumed = data->len;
623 PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
624 errors, final ? NULL : &consumed);
625 return codec_tuple(decoded, consumed);
626 }
627
628 /*[clinic input]
629 _codecs.oem_decode
630 data: Py_buffer
631 errors: str(accept={str, NoneType}) = NULL
632 final: bool(accept={int}) = False
633 /
634 [clinic start generated code]*/
635
636 static PyObject *
_codecs_oem_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)637 _codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
638 const char *errors, int final)
639 /*[clinic end generated code: output=da1617612f3fcad8 input=278709bcfd374a9c]*/
640 {
641 Py_ssize_t consumed = data->len;
642 PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
643 data->buf, data->len, errors, final ? NULL : &consumed);
644 return codec_tuple(decoded, consumed);
645 }
646
647 /*[clinic input]
648 _codecs.code_page_decode
649 codepage: int
650 data: Py_buffer
651 errors: str(accept={str, NoneType}) = NULL
652 final: bool(accept={int}) = False
653 /
654 [clinic start generated code]*/
655
656 static PyObject *
_codecs_code_page_decode_impl(PyObject * module,int codepage,Py_buffer * data,const char * errors,int final)657 _codecs_code_page_decode_impl(PyObject *module, int codepage,
658 Py_buffer *data, const char *errors, int final)
659 /*[clinic end generated code: output=53008ea967da3fff input=51f6169021c68dd5]*/
660 {
661 Py_ssize_t consumed = data->len;
662 PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
663 data->buf, data->len,
664 errors,
665 final ? NULL : &consumed);
666 return codec_tuple(decoded, consumed);
667 }
668
669 #endif /* MS_WINDOWS */
670
671 /* --- Encoder ------------------------------------------------------------ */
672
673 /*[clinic input]
674 _codecs.readbuffer_encode
675 data: Py_buffer(accept={str, buffer})
676 errors: str(accept={str, NoneType}) = NULL
677 /
678 [clinic start generated code]*/
679
680 static PyObject *
_codecs_readbuffer_encode_impl(PyObject * module,Py_buffer * data,const char * errors)681 _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
682 const char *errors)
683 /*[clinic end generated code: output=c645ea7cdb3d6e86 input=b7c322b89d4ab923]*/
684 {
685 PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
686 return codec_tuple(result, data->len);
687 }
688
689 /*[clinic input]
690 _codecs.unicode_internal_encode
691 obj: object
692 errors: str(accept={str, NoneType}) = NULL
693 /
694 [clinic start generated code]*/
695
696 static PyObject *
_codecs_unicode_internal_encode_impl(PyObject * module,PyObject * obj,const char * errors)697 _codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj,
698 const char *errors)
699 /*[clinic end generated code: output=a72507dde4ea558f input=8628f0280cf5ba61]*/
700 {
701 if (PyErr_WarnEx(PyExc_DeprecationWarning,
702 "unicode_internal codec has been deprecated",
703 1))
704 return NULL;
705
706 if (PyUnicode_Check(obj)) {
707 Py_UNICODE *u;
708 Py_ssize_t len, size;
709
710 if (PyUnicode_READY(obj) < 0)
711 return NULL;
712
713 u = PyUnicode_AsUnicodeAndSize(obj, &len);
714 if (u == NULL)
715 return NULL;
716 if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
717 return PyErr_NoMemory();
718 size = len * sizeof(Py_UNICODE);
719 return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
720 PyUnicode_GET_LENGTH(obj));
721 }
722 else {
723 Py_buffer view;
724 PyObject *result;
725 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
726 return NULL;
727 result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len),
728 view.len);
729 PyBuffer_Release(&view);
730 return result;
731 }
732 }
733
734 /*[clinic input]
735 _codecs.utf_7_encode
736 str: unicode
737 errors: str(accept={str, NoneType}) = NULL
738 /
739 [clinic start generated code]*/
740
741 static PyObject *
_codecs_utf_7_encode_impl(PyObject * module,PyObject * str,const char * errors)742 _codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
743 const char *errors)
744 /*[clinic end generated code: output=0feda21ffc921bc8 input=d1a47579e79cbe15]*/
745 {
746 return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
747 PyUnicode_GET_LENGTH(str));
748 }
749
750 /*[clinic input]
751 _codecs.utf_8_encode
752 str: unicode
753 errors: str(accept={str, NoneType}) = NULL
754 /
755 [clinic start generated code]*/
756
757 static PyObject *
_codecs_utf_8_encode_impl(PyObject * module,PyObject * str,const char * errors)758 _codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
759 const char *errors)
760 /*[clinic end generated code: output=02bf47332b9c796c input=42e3ba73c4392eef]*/
761 {
762 return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
763 PyUnicode_GET_LENGTH(str));
764 }
765
766 /* This version provides access to the byteorder parameter of the
767 builtin UTF-16 codecs as optional third argument. It defaults to 0
768 which means: use the native byte order and prepend the data with a
769 BOM mark.
770
771 */
772
773 /*[clinic input]
774 _codecs.utf_16_encode
775 str: unicode
776 errors: str(accept={str, NoneType}) = NULL
777 byteorder: int = 0
778 /
779 [clinic start generated code]*/
780
781 static PyObject *
_codecs_utf_16_encode_impl(PyObject * module,PyObject * str,const char * errors,int byteorder)782 _codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
783 const char *errors, int byteorder)
784 /*[clinic end generated code: output=c654e13efa2e64e4 input=ff46416b04edb944]*/
785 {
786 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
787 PyUnicode_GET_LENGTH(str));
788 }
789
790 /*[clinic input]
791 _codecs.utf_16_le_encode
792 str: unicode
793 errors: str(accept={str, NoneType}) = NULL
794 /
795 [clinic start generated code]*/
796
797 static PyObject *
_codecs_utf_16_le_encode_impl(PyObject * module,PyObject * str,const char * errors)798 _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
799 const char *errors)
800 /*[clinic end generated code: output=431b01e55f2d4995 input=cb385455ea8f2fe0]*/
801 {
802 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
803 PyUnicode_GET_LENGTH(str));
804 }
805
806 /*[clinic input]
807 _codecs.utf_16_be_encode
808 str: unicode
809 errors: str(accept={str, NoneType}) = NULL
810 /
811 [clinic start generated code]*/
812
813 static PyObject *
_codecs_utf_16_be_encode_impl(PyObject * module,PyObject * str,const char * errors)814 _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
815 const char *errors)
816 /*[clinic end generated code: output=96886a6fd54dcae3 input=9119997066bdaefd]*/
817 {
818 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
819 PyUnicode_GET_LENGTH(str));
820 }
821
822 /* This version provides access to the byteorder parameter of the
823 builtin UTF-32 codecs as optional third argument. It defaults to 0
824 which means: use the native byte order and prepend the data with a
825 BOM mark.
826
827 */
828
829 /*[clinic input]
830 _codecs.utf_32_encode
831 str: unicode
832 errors: str(accept={str, NoneType}) = NULL
833 byteorder: int = 0
834 /
835 [clinic start generated code]*/
836
837 static PyObject *
_codecs_utf_32_encode_impl(PyObject * module,PyObject * str,const char * errors,int byteorder)838 _codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
839 const char *errors, int byteorder)
840 /*[clinic end generated code: output=5c760da0c09a8b83 input=c5e77da82fbe5c2a]*/
841 {
842 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
843 PyUnicode_GET_LENGTH(str));
844 }
845
846 /*[clinic input]
847 _codecs.utf_32_le_encode
848 str: unicode
849 errors: str(accept={str, NoneType}) = NULL
850 /
851 [clinic start generated code]*/
852
853 static PyObject *
_codecs_utf_32_le_encode_impl(PyObject * module,PyObject * str,const char * errors)854 _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
855 const char *errors)
856 /*[clinic end generated code: output=b65cd176de8e36d6 input=9993b25fe0877848]*/
857 {
858 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
859 PyUnicode_GET_LENGTH(str));
860 }
861
862 /*[clinic input]
863 _codecs.utf_32_be_encode
864 str: unicode
865 errors: str(accept={str, NoneType}) = NULL
866 /
867 [clinic start generated code]*/
868
869 static PyObject *
_codecs_utf_32_be_encode_impl(PyObject * module,PyObject * str,const char * errors)870 _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
871 const char *errors)
872 /*[clinic end generated code: output=1d9e71a9358709e9 input=d3e0ccaa02920431]*/
873 {
874 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
875 PyUnicode_GET_LENGTH(str));
876 }
877
878 /*[clinic input]
879 _codecs.unicode_escape_encode
880 str: unicode
881 errors: str(accept={str, NoneType}) = NULL
882 /
883 [clinic start generated code]*/
884
885 static PyObject *
_codecs_unicode_escape_encode_impl(PyObject * module,PyObject * str,const char * errors)886 _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
887 const char *errors)
888 /*[clinic end generated code: output=66271b30bc4f7a3c input=65d9eefca65b455a]*/
889 {
890 return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
891 PyUnicode_GET_LENGTH(str));
892 }
893
894 /*[clinic input]
895 _codecs.raw_unicode_escape_encode
896 str: unicode
897 errors: str(accept={str, NoneType}) = NULL
898 /
899 [clinic start generated code]*/
900
901 static PyObject *
_codecs_raw_unicode_escape_encode_impl(PyObject * module,PyObject * str,const char * errors)902 _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
903 const char *errors)
904 /*[clinic end generated code: output=a66a806ed01c830a input=5aa33e4a133391ab]*/
905 {
906 return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
907 PyUnicode_GET_LENGTH(str));
908 }
909
910 /*[clinic input]
911 _codecs.latin_1_encode
912 str: unicode
913 errors: str(accept={str, NoneType}) = NULL
914 /
915 [clinic start generated code]*/
916
917 static PyObject *
_codecs_latin_1_encode_impl(PyObject * module,PyObject * str,const char * errors)918 _codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
919 const char *errors)
920 /*[clinic end generated code: output=2c28c83a27884e08 input=30b11c9e49a65150]*/
921 {
922 return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
923 PyUnicode_GET_LENGTH(str));
924 }
925
926 /*[clinic input]
927 _codecs.ascii_encode
928 str: unicode
929 errors: str(accept={str, NoneType}) = NULL
930 /
931 [clinic start generated code]*/
932
933 static PyObject *
_codecs_ascii_encode_impl(PyObject * module,PyObject * str,const char * errors)934 _codecs_ascii_encode_impl(PyObject *module, PyObject *str,
935 const char *errors)
936 /*[clinic end generated code: output=b5e035182d33befc input=843a1d268e6dfa8e]*/
937 {
938 return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
939 PyUnicode_GET_LENGTH(str));
940 }
941
942 /*[clinic input]
943 _codecs.charmap_encode
944 str: unicode
945 errors: str(accept={str, NoneType}) = NULL
946 mapping: object = NULL
947 /
948 [clinic start generated code]*/
949
950 static PyObject *
_codecs_charmap_encode_impl(PyObject * module,PyObject * str,const char * errors,PyObject * mapping)951 _codecs_charmap_encode_impl(PyObject *module, PyObject *str,
952 const char *errors, PyObject *mapping)
953 /*[clinic end generated code: output=047476f48495a9e9 input=0752cde07a6d6d00]*/
954 {
955 if (mapping == Py_None)
956 mapping = NULL;
957
958 return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
959 PyUnicode_GET_LENGTH(str));
960 }
961
962 /*[clinic input]
963 _codecs.charmap_build
964 map: unicode
965 /
966 [clinic start generated code]*/
967
968 static PyObject *
_codecs_charmap_build_impl(PyObject * module,PyObject * map)969 _codecs_charmap_build_impl(PyObject *module, PyObject *map)
970 /*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
971 {
972 return PyUnicode_BuildEncodingMap(map);
973 }
974
975 #ifdef MS_WINDOWS
976
977 /*[clinic input]
978 _codecs.mbcs_encode
979 str: unicode
980 errors: str(accept={str, NoneType}) = NULL
981 /
982 [clinic start generated code]*/
983
984 static PyObject *
_codecs_mbcs_encode_impl(PyObject * module,PyObject * str,const char * errors)985 _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
986 /*[clinic end generated code: output=76e2e170c966c080 input=de471e0815947553]*/
987 {
988 return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
989 PyUnicode_GET_LENGTH(str));
990 }
991
992 /*[clinic input]
993 _codecs.oem_encode
994 str: unicode
995 errors: str(accept={str, NoneType}) = NULL
996 /
997 [clinic start generated code]*/
998
999 static PyObject *
_codecs_oem_encode_impl(PyObject * module,PyObject * str,const char * errors)1000 _codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
1001 /*[clinic end generated code: output=65d5982c737de649 input=3fc5f0028aad3cda]*/
1002 {
1003 return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
1004 PyUnicode_GET_LENGTH(str));
1005 }
1006
1007 /*[clinic input]
1008 _codecs.code_page_encode
1009 code_page: int
1010 str: unicode
1011 errors: str(accept={str, NoneType}) = NULL
1012 /
1013 [clinic start generated code]*/
1014
1015 static PyObject *
_codecs_code_page_encode_impl(PyObject * module,int code_page,PyObject * str,const char * errors)1016 _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
1017 const char *errors)
1018 /*[clinic end generated code: output=45673f6085657a9e input=786421ae617d680b]*/
1019 {
1020 return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
1021 PyUnicode_GET_LENGTH(str));
1022 }
1023
1024 #endif /* MS_WINDOWS */
1025
1026 /* --- Error handler registry --------------------------------------------- */
1027
1028 /*[clinic input]
1029 _codecs.register_error
1030 errors: str
1031 handler: object
1032 /
1033
1034 Register the specified error handler under the name errors.
1035
1036 handler must be a callable object, that will be called with an exception
1037 instance containing information about the location of the encoding/decoding
1038 error and must return a (replacement, new position) tuple.
1039 [clinic start generated code]*/
1040
1041 static PyObject *
_codecs_register_error_impl(PyObject * module,const char * errors,PyObject * handler)1042 _codecs_register_error_impl(PyObject *module, const char *errors,
1043 PyObject *handler)
1044 /*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
1045 {
1046 if (PyCodec_RegisterError(errors, handler))
1047 return NULL;
1048 Py_RETURN_NONE;
1049 }
1050
1051 /*[clinic input]
1052 _codecs.lookup_error
1053 name: str
1054 /
1055
1056 lookup_error(errors) -> handler
1057
1058 Return the error handler for the specified error handling name or raise a
1059 LookupError, if no handler exists under this name.
1060 [clinic start generated code]*/
1061
1062 static PyObject *
_codecs_lookup_error_impl(PyObject * module,const char * name)1063 _codecs_lookup_error_impl(PyObject *module, const char *name)
1064 /*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
1065 {
1066 return PyCodec_LookupError(name);
1067 }
1068
1069 /* --- Module API --------------------------------------------------------- */
1070
1071 static PyMethodDef _codecs_functions[] = {
1072 _CODECS_REGISTER_METHODDEF
1073 _CODECS_LOOKUP_METHODDEF
1074 _CODECS_ENCODE_METHODDEF
1075 _CODECS_DECODE_METHODDEF
1076 _CODECS_ESCAPE_ENCODE_METHODDEF
1077 _CODECS_ESCAPE_DECODE_METHODDEF
1078 _CODECS_UTF_8_ENCODE_METHODDEF
1079 _CODECS_UTF_8_DECODE_METHODDEF
1080 _CODECS_UTF_7_ENCODE_METHODDEF
1081 _CODECS_UTF_7_DECODE_METHODDEF
1082 _CODECS_UTF_16_ENCODE_METHODDEF
1083 _CODECS_UTF_16_LE_ENCODE_METHODDEF
1084 _CODECS_UTF_16_BE_ENCODE_METHODDEF
1085 _CODECS_UTF_16_DECODE_METHODDEF
1086 _CODECS_UTF_16_LE_DECODE_METHODDEF
1087 _CODECS_UTF_16_BE_DECODE_METHODDEF
1088 _CODECS_UTF_16_EX_DECODE_METHODDEF
1089 _CODECS_UTF_32_ENCODE_METHODDEF
1090 _CODECS_UTF_32_LE_ENCODE_METHODDEF
1091 _CODECS_UTF_32_BE_ENCODE_METHODDEF
1092 _CODECS_UTF_32_DECODE_METHODDEF
1093 _CODECS_UTF_32_LE_DECODE_METHODDEF
1094 _CODECS_UTF_32_BE_DECODE_METHODDEF
1095 _CODECS_UTF_32_EX_DECODE_METHODDEF
1096 _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1097 _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1098 _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF
1099 _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF
1100 _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1101 _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1102 _CODECS_LATIN_1_ENCODE_METHODDEF
1103 _CODECS_LATIN_1_DECODE_METHODDEF
1104 _CODECS_ASCII_ENCODE_METHODDEF
1105 _CODECS_ASCII_DECODE_METHODDEF
1106 _CODECS_CHARMAP_ENCODE_METHODDEF
1107 _CODECS_CHARMAP_DECODE_METHODDEF
1108 _CODECS_CHARMAP_BUILD_METHODDEF
1109 _CODECS_READBUFFER_ENCODE_METHODDEF
1110 _CODECS_MBCS_ENCODE_METHODDEF
1111 _CODECS_MBCS_DECODE_METHODDEF
1112 _CODECS_OEM_ENCODE_METHODDEF
1113 _CODECS_OEM_DECODE_METHODDEF
1114 _CODECS_CODE_PAGE_ENCODE_METHODDEF
1115 _CODECS_CODE_PAGE_DECODE_METHODDEF
1116 _CODECS_REGISTER_ERROR_METHODDEF
1117 _CODECS_LOOKUP_ERROR_METHODDEF
1118 _CODECS__FORGET_CODEC_METHODDEF
1119 {NULL, NULL} /* sentinel */
1120 };
1121
1122 static struct PyModuleDef codecsmodule = {
1123 PyModuleDef_HEAD_INIT,
1124 "_codecs",
1125 NULL,
1126 -1,
1127 _codecs_functions,
1128 NULL,
1129 NULL,
1130 NULL,
1131 NULL
1132 };
1133
1134 PyMODINIT_FUNC
PyInit__codecs(void)1135 PyInit__codecs(void)
1136 {
1137 return PyModule_Create(&codecsmodule);
1138 }
1139