1 /* csv module */
2
3 /*
4
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
7 module instead.
8
9 */
10
11 #define MODULE_VERSION "1.0"
12
13 #include "Python.h"
14 #include "structmember.h"
15
16
17 typedef struct {
18 PyObject *error_obj; /* CSV exception */
19 PyObject *dialects; /* Dialect registry */
20 long field_limit; /* max parsed field size */
21 } _csvstate;
22
23 #define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24
25 static int
_csv_clear(PyObject * m)26 _csv_clear(PyObject *m)
27 {
28 Py_CLEAR(_csvstate(m)->error_obj);
29 Py_CLEAR(_csvstate(m)->dialects);
30 return 0;
31 }
32
33 static int
_csv_traverse(PyObject * m,visitproc visit,void * arg)34 _csv_traverse(PyObject *m, visitproc visit, void *arg)
35 {
36 Py_VISIT(_csvstate(m)->error_obj);
37 Py_VISIT(_csvstate(m)->dialects);
38 return 0;
39 }
40
41 static void
_csv_free(void * m)42 _csv_free(void *m)
43 {
44 _csv_clear((PyObject *)m);
45 }
46
47 static struct PyModuleDef _csvmodule;
48
49 #define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
50
51 typedef enum {
52 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
54 EAT_CRNL,AFTER_ESCAPED_CRNL
55 } ParserState;
56
57 typedef enum {
58 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
59 } QuoteStyle;
60
61 typedef struct {
62 QuoteStyle style;
63 const char *name;
64 } StyleDesc;
65
66 static const StyleDesc quote_styles[] = {
67 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68 { QUOTE_ALL, "QUOTE_ALL" },
69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70 { QUOTE_NONE, "QUOTE_NONE" },
71 { 0 }
72 };
73
74 typedef struct {
75 PyObject_HEAD
76
77 int doublequote; /* is " represented by ""? */
78 Py_UCS4 delimiter; /* field separator */
79 Py_UCS4 quotechar; /* quote character */
80 Py_UCS4 escapechar; /* escape character */
81 int skipinitialspace; /* ignore spaces following delimiter? */
82 PyObject *lineterminator; /* string to write between records */
83 int quoting; /* style of quoting to write */
84
85 int strict; /* raise exception on bad CSV */
86 } DialectObj;
87
88 static PyTypeObject Dialect_Type;
89
90 typedef struct {
91 PyObject_HEAD
92
93 PyObject *input_iter; /* iterate over this for input lines */
94
95 DialectObj *dialect; /* parsing dialect */
96
97 PyObject *fields; /* field list for current record */
98 ParserState state; /* current CSV parse state */
99 Py_UCS4 *field; /* temporary buffer */
100 Py_ssize_t field_size; /* size of allocated buffer */
101 Py_ssize_t field_len; /* length of current field */
102 int numeric_field; /* treat field as numeric */
103 unsigned long line_num; /* Source-file line number */
104 } ReaderObj;
105
106 static PyTypeObject Reader_Type;
107
108 #define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
109
110 typedef struct {
111 PyObject_HEAD
112
113 PyObject *writeline; /* write output lines to this file */
114
115 DialectObj *dialect; /* parsing dialect */
116
117 Py_UCS4 *rec; /* buffer for parser.join */
118 Py_ssize_t rec_size; /* size of allocated record */
119 Py_ssize_t rec_len; /* length of record */
120 int num_fields; /* number of fields in record */
121 } WriterObj;
122
123 static PyTypeObject Writer_Type;
124
125 /*
126 * DIALECT class
127 */
128
129 static PyObject *
get_dialect_from_registry(PyObject * name_obj)130 get_dialect_from_registry(PyObject * name_obj)
131 {
132 PyObject *dialect_obj;
133
134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
135 if (dialect_obj == NULL) {
136 if (!PyErr_Occurred())
137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
138 }
139 else
140 Py_INCREF(dialect_obj);
141 return dialect_obj;
142 }
143
144 static PyObject *
get_string(PyObject * str)145 get_string(PyObject *str)
146 {
147 Py_XINCREF(str);
148 return str;
149 }
150
151 static PyObject *
get_nullchar_as_None(Py_UCS4 c)152 get_nullchar_as_None(Py_UCS4 c)
153 {
154 if (c == '\0') {
155 Py_INCREF(Py_None);
156 return Py_None;
157 }
158 else
159 return PyUnicode_FromOrdinal(c);
160 }
161
162 static PyObject *
Dialect_get_lineterminator(DialectObj * self)163 Dialect_get_lineterminator(DialectObj *self)
164 {
165 return get_string(self->lineterminator);
166 }
167
168 static PyObject *
Dialect_get_delimiter(DialectObj * self)169 Dialect_get_delimiter(DialectObj *self)
170 {
171 return get_nullchar_as_None(self->delimiter);
172 }
173
174 static PyObject *
Dialect_get_escapechar(DialectObj * self)175 Dialect_get_escapechar(DialectObj *self)
176 {
177 return get_nullchar_as_None(self->escapechar);
178 }
179
180 static PyObject *
Dialect_get_quotechar(DialectObj * self)181 Dialect_get_quotechar(DialectObj *self)
182 {
183 return get_nullchar_as_None(self->quotechar);
184 }
185
186 static PyObject *
Dialect_get_quoting(DialectObj * self)187 Dialect_get_quoting(DialectObj *self)
188 {
189 return PyLong_FromLong(self->quoting);
190 }
191
192 static int
_set_bool(const char * name,int * target,PyObject * src,int dflt)193 _set_bool(const char *name, int *target, PyObject *src, int dflt)
194 {
195 if (src == NULL)
196 *target = dflt;
197 else {
198 int b = PyObject_IsTrue(src);
199 if (b < 0)
200 return -1;
201 *target = b;
202 }
203 return 0;
204 }
205
206 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)207 _set_int(const char *name, int *target, PyObject *src, int dflt)
208 {
209 if (src == NULL)
210 *target = dflt;
211 else {
212 long value;
213 if (!PyLong_CheckExact(src)) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an integer", name);
216 return -1;
217 }
218 value = PyLong_AsLong(src);
219 if (value == -1 && PyErr_Occurred())
220 return -1;
221 #if SIZEOF_LONG > SIZEOF_INT
222 if (value > INT_MAX || value < INT_MIN) {
223 PyErr_Format(PyExc_ValueError,
224 "integer out of range for \"%s\"", name);
225 return -1;
226 }
227 #endif
228 *target = (int)value;
229 }
230 return 0;
231 }
232
233 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)234 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
235 {
236 if (src == NULL)
237 *target = dflt;
238 else {
239 *target = '\0';
240 if (src != Py_None) {
241 Py_ssize_t len;
242 if (!PyUnicode_Check(src)) {
243 PyErr_Format(PyExc_TypeError,
244 "\"%s\" must be string, not %.200s", name,
245 src->ob_type->tp_name);
246 return -1;
247 }
248 len = PyUnicode_GetLength(src);
249 if (len > 1) {
250 PyErr_Format(PyExc_TypeError,
251 "\"%s\" must be a 1-character string",
252 name);
253 return -1;
254 }
255 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
256 if (len > 0)
257 *target = PyUnicode_READ_CHAR(src, 0);
258 }
259 }
260 return 0;
261 }
262
263 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)264 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265 {
266 if (src == NULL)
267 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
268 else {
269 if (src == Py_None)
270 *target = NULL;
271 else if (!PyUnicode_Check(src)) {
272 PyErr_Format(PyExc_TypeError,
273 "\"%s\" must be a string", name);
274 return -1;
275 }
276 else {
277 if (PyUnicode_READY(src) == -1)
278 return -1;
279 Py_INCREF(src);
280 Py_XSETREF(*target, src);
281 }
282 }
283 return 0;
284 }
285
286 static int
dialect_check_quoting(int quoting)287 dialect_check_quoting(int quoting)
288 {
289 const StyleDesc *qs;
290
291 for (qs = quote_styles; qs->name; qs++) {
292 if ((int)qs->style == quoting)
293 return 0;
294 }
295 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
296 return -1;
297 }
298
299 #define D_OFF(x) offsetof(DialectObj, x)
300
301 static struct PyMemberDef Dialect_memberlist[] = {
302 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
303 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
304 { "strict", T_INT, D_OFF(strict), READONLY },
305 { NULL }
306 };
307
308 static PyGetSetDef Dialect_getsetlist[] = {
309 { "delimiter", (getter)Dialect_get_delimiter},
310 { "escapechar", (getter)Dialect_get_escapechar},
311 { "lineterminator", (getter)Dialect_get_lineterminator},
312 { "quotechar", (getter)Dialect_get_quotechar},
313 { "quoting", (getter)Dialect_get_quoting},
314 {NULL},
315 };
316
317 static void
Dialect_dealloc(DialectObj * self)318 Dialect_dealloc(DialectObj *self)
319 {
320 Py_XDECREF(self->lineterminator);
321 Py_TYPE(self)->tp_free((PyObject *)self);
322 }
323
324 static char *dialect_kws[] = {
325 "dialect",
326 "delimiter",
327 "doublequote",
328 "escapechar",
329 "lineterminator",
330 "quotechar",
331 "quoting",
332 "skipinitialspace",
333 "strict",
334 NULL
335 };
336
337 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)338 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
339 {
340 DialectObj *self;
341 PyObject *ret = NULL;
342 PyObject *dialect = NULL;
343 PyObject *delimiter = NULL;
344 PyObject *doublequote = NULL;
345 PyObject *escapechar = NULL;
346 PyObject *lineterminator = NULL;
347 PyObject *quotechar = NULL;
348 PyObject *quoting = NULL;
349 PyObject *skipinitialspace = NULL;
350 PyObject *strict = NULL;
351
352 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
353 "|OOOOOOOOO", dialect_kws,
354 &dialect,
355 &delimiter,
356 &doublequote,
357 &escapechar,
358 &lineterminator,
359 "echar,
360 "ing,
361 &skipinitialspace,
362 &strict))
363 return NULL;
364
365 if (dialect != NULL) {
366 if (PyUnicode_Check(dialect)) {
367 dialect = get_dialect_from_registry(dialect);
368 if (dialect == NULL)
369 return NULL;
370 }
371 else
372 Py_INCREF(dialect);
373 /* Can we reuse this instance? */
374 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
375 delimiter == 0 &&
376 doublequote == 0 &&
377 escapechar == 0 &&
378 lineterminator == 0 &&
379 quotechar == 0 &&
380 quoting == 0 &&
381 skipinitialspace == 0 &&
382 strict == 0)
383 return dialect;
384 }
385
386 self = (DialectObj *)type->tp_alloc(type, 0);
387 if (self == NULL) {
388 Py_XDECREF(dialect);
389 return NULL;
390 }
391 self->lineterminator = NULL;
392
393 Py_XINCREF(delimiter);
394 Py_XINCREF(doublequote);
395 Py_XINCREF(escapechar);
396 Py_XINCREF(lineterminator);
397 Py_XINCREF(quotechar);
398 Py_XINCREF(quoting);
399 Py_XINCREF(skipinitialspace);
400 Py_XINCREF(strict);
401 if (dialect != NULL) {
402 #define DIALECT_GETATTR(v, n) \
403 if (v == NULL) \
404 v = PyObject_GetAttrString(dialect, n)
405 DIALECT_GETATTR(delimiter, "delimiter");
406 DIALECT_GETATTR(doublequote, "doublequote");
407 DIALECT_GETATTR(escapechar, "escapechar");
408 DIALECT_GETATTR(lineterminator, "lineterminator");
409 DIALECT_GETATTR(quotechar, "quotechar");
410 DIALECT_GETATTR(quoting, "quoting");
411 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
412 DIALECT_GETATTR(strict, "strict");
413 PyErr_Clear();
414 }
415
416 /* check types and convert to C values */
417 #define DIASET(meth, name, target, src, dflt) \
418 if (meth(name, target, src, dflt)) \
419 goto err
420 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
421 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
422 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
423 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
424 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
425 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
426 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
427 DIASET(_set_bool, "strict", &self->strict, strict, 0);
428
429 /* validate options */
430 if (dialect_check_quoting(self->quoting))
431 goto err;
432 if (self->delimiter == 0) {
433 PyErr_SetString(PyExc_TypeError,
434 "\"delimiter\" must be a 1-character string");
435 goto err;
436 }
437 if (quotechar == Py_None && quoting == NULL)
438 self->quoting = QUOTE_NONE;
439 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
440 PyErr_SetString(PyExc_TypeError,
441 "quotechar must be set if quoting enabled");
442 goto err;
443 }
444 if (self->lineterminator == 0) {
445 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
446 goto err;
447 }
448
449 ret = (PyObject *)self;
450 Py_INCREF(self);
451 err:
452 Py_XDECREF(self);
453 Py_XDECREF(dialect);
454 Py_XDECREF(delimiter);
455 Py_XDECREF(doublequote);
456 Py_XDECREF(escapechar);
457 Py_XDECREF(lineterminator);
458 Py_XDECREF(quotechar);
459 Py_XDECREF(quoting);
460 Py_XDECREF(skipinitialspace);
461 Py_XDECREF(strict);
462 return ret;
463 }
464
465
466 PyDoc_STRVAR(Dialect_Type_doc,
467 "CSV dialect\n"
468 "\n"
469 "The Dialect type records CSV parsing and generation options.\n");
470
471 static PyTypeObject Dialect_Type = {
472 PyVarObject_HEAD_INIT(NULL, 0)
473 "_csv.Dialect", /* tp_name */
474 sizeof(DialectObj), /* tp_basicsize */
475 0, /* tp_itemsize */
476 /* methods */
477 (destructor)Dialect_dealloc, /* tp_dealloc */
478 (printfunc)0, /* tp_print */
479 (getattrfunc)0, /* tp_getattr */
480 (setattrfunc)0, /* tp_setattr */
481 0, /* tp_reserved */
482 (reprfunc)0, /* tp_repr */
483 0, /* tp_as_number */
484 0, /* tp_as_sequence */
485 0, /* tp_as_mapping */
486 (hashfunc)0, /* tp_hash */
487 (ternaryfunc)0, /* tp_call */
488 (reprfunc)0, /* tp_str */
489 0, /* tp_getattro */
490 0, /* tp_setattro */
491 0, /* tp_as_buffer */
492 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
493 Dialect_Type_doc, /* tp_doc */
494 0, /* tp_traverse */
495 0, /* tp_clear */
496 0, /* tp_richcompare */
497 0, /* tp_weaklistoffset */
498 0, /* tp_iter */
499 0, /* tp_iternext */
500 0, /* tp_methods */
501 Dialect_memberlist, /* tp_members */
502 Dialect_getsetlist, /* tp_getset */
503 0, /* tp_base */
504 0, /* tp_dict */
505 0, /* tp_descr_get */
506 0, /* tp_descr_set */
507 0, /* tp_dictoffset */
508 0, /* tp_init */
509 0, /* tp_alloc */
510 dialect_new, /* tp_new */
511 0, /* tp_free */
512 };
513
514 /*
515 * Return an instance of the dialect type, given a Python instance or kwarg
516 * description of the dialect
517 */
518 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)519 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
520 {
521 PyObject *type = (PyObject *)&Dialect_Type;
522 if (dialect_inst) {
523 return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
524 }
525 else {
526 return _PyObject_FastCallDict(type, NULL, 0, kwargs);
527 }
528 }
529
530 /*
531 * READER
532 */
533 static int
parse_save_field(ReaderObj * self)534 parse_save_field(ReaderObj *self)
535 {
536 PyObject *field;
537
538 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
539 (void *) self->field, self->field_len);
540 if (field == NULL)
541 return -1;
542 self->field_len = 0;
543 if (self->numeric_field) {
544 PyObject *tmp;
545
546 self->numeric_field = 0;
547 tmp = PyNumber_Float(field);
548 Py_DECREF(field);
549 if (tmp == NULL)
550 return -1;
551 field = tmp;
552 }
553 if (PyList_Append(self->fields, field) < 0) {
554 Py_DECREF(field);
555 return -1;
556 }
557 Py_DECREF(field);
558 return 0;
559 }
560
561 static int
parse_grow_buff(ReaderObj * self)562 parse_grow_buff(ReaderObj *self)
563 {
564 if (self->field_size == 0) {
565 self->field_size = 4096;
566 if (self->field != NULL)
567 PyMem_Free(self->field);
568 self->field = PyMem_New(Py_UCS4, self->field_size);
569 }
570 else {
571 Py_UCS4 *field = self->field;
572 if (self->field_size > PY_SSIZE_T_MAX / 2) {
573 PyErr_NoMemory();
574 return 0;
575 }
576 self->field_size *= 2;
577 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
578 }
579 if (self->field == NULL) {
580 PyErr_NoMemory();
581 return 0;
582 }
583 return 1;
584 }
585
586 static int
parse_add_char(ReaderObj * self,Py_UCS4 c)587 parse_add_char(ReaderObj *self, Py_UCS4 c)
588 {
589 if (self->field_len >= _csvstate_global->field_limit) {
590 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
591 _csvstate_global->field_limit);
592 return -1;
593 }
594 if (self->field_len == self->field_size && !parse_grow_buff(self))
595 return -1;
596 self->field[self->field_len++] = c;
597 return 0;
598 }
599
600 static int
parse_process_char(ReaderObj * self,Py_UCS4 c)601 parse_process_char(ReaderObj *self, Py_UCS4 c)
602 {
603 DialectObj *dialect = self->dialect;
604
605 switch (self->state) {
606 case START_RECORD:
607 /* start of record */
608 if (c == '\0')
609 /* empty line - return [] */
610 break;
611 else if (c == '\n' || c == '\r') {
612 self->state = EAT_CRNL;
613 break;
614 }
615 /* normal character - handle as START_FIELD */
616 self->state = START_FIELD;
617 /* fallthru */
618 case START_FIELD:
619 /* expecting field */
620 if (c == '\n' || c == '\r' || c == '\0') {
621 /* save empty field - return [fields] */
622 if (parse_save_field(self) < 0)
623 return -1;
624 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
625 }
626 else if (c == dialect->quotechar &&
627 dialect->quoting != QUOTE_NONE) {
628 /* start quoted field */
629 self->state = IN_QUOTED_FIELD;
630 }
631 else if (c == dialect->escapechar) {
632 /* possible escaped character */
633 self->state = ESCAPED_CHAR;
634 }
635 else if (c == ' ' && dialect->skipinitialspace)
636 /* ignore space at start of field */
637 ;
638 else if (c == dialect->delimiter) {
639 /* save empty field */
640 if (parse_save_field(self) < 0)
641 return -1;
642 }
643 else {
644 /* begin new unquoted field */
645 if (dialect->quoting == QUOTE_NONNUMERIC)
646 self->numeric_field = 1;
647 if (parse_add_char(self, c) < 0)
648 return -1;
649 self->state = IN_FIELD;
650 }
651 break;
652
653 case ESCAPED_CHAR:
654 if (c == '\n' || c=='\r') {
655 if (parse_add_char(self, c) < 0)
656 return -1;
657 self->state = AFTER_ESCAPED_CRNL;
658 break;
659 }
660 if (c == '\0')
661 c = '\n';
662 if (parse_add_char(self, c) < 0)
663 return -1;
664 self->state = IN_FIELD;
665 break;
666
667 case AFTER_ESCAPED_CRNL:
668 if (c == '\0')
669 break;
670 /*fallthru*/
671
672 case IN_FIELD:
673 /* in unquoted field */
674 if (c == '\n' || c == '\r' || c == '\0') {
675 /* end of line - return [fields] */
676 if (parse_save_field(self) < 0)
677 return -1;
678 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
679 }
680 else if (c == dialect->escapechar) {
681 /* possible escaped character */
682 self->state = ESCAPED_CHAR;
683 }
684 else if (c == dialect->delimiter) {
685 /* save field - wait for new field */
686 if (parse_save_field(self) < 0)
687 return -1;
688 self->state = START_FIELD;
689 }
690 else {
691 /* normal character - save in field */
692 if (parse_add_char(self, c) < 0)
693 return -1;
694 }
695 break;
696
697 case IN_QUOTED_FIELD:
698 /* in quoted field */
699 if (c == '\0')
700 ;
701 else if (c == dialect->escapechar) {
702 /* Possible escape character */
703 self->state = ESCAPE_IN_QUOTED_FIELD;
704 }
705 else if (c == dialect->quotechar &&
706 dialect->quoting != QUOTE_NONE) {
707 if (dialect->doublequote) {
708 /* doublequote; " represented by "" */
709 self->state = QUOTE_IN_QUOTED_FIELD;
710 }
711 else {
712 /* end of quote part of field */
713 self->state = IN_FIELD;
714 }
715 }
716 else {
717 /* normal character - save in field */
718 if (parse_add_char(self, c) < 0)
719 return -1;
720 }
721 break;
722
723 case ESCAPE_IN_QUOTED_FIELD:
724 if (c == '\0')
725 c = '\n';
726 if (parse_add_char(self, c) < 0)
727 return -1;
728 self->state = IN_QUOTED_FIELD;
729 break;
730
731 case QUOTE_IN_QUOTED_FIELD:
732 /* doublequote - seen a quote in a quoted field */
733 if (dialect->quoting != QUOTE_NONE &&
734 c == dialect->quotechar) {
735 /* save "" as " */
736 if (parse_add_char(self, c) < 0)
737 return -1;
738 self->state = IN_QUOTED_FIELD;
739 }
740 else if (c == dialect->delimiter) {
741 /* save field - wait for new field */
742 if (parse_save_field(self) < 0)
743 return -1;
744 self->state = START_FIELD;
745 }
746 else if (c == '\n' || c == '\r' || c == '\0') {
747 /* end of line - return [fields] */
748 if (parse_save_field(self) < 0)
749 return -1;
750 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
751 }
752 else if (!dialect->strict) {
753 if (parse_add_char(self, c) < 0)
754 return -1;
755 self->state = IN_FIELD;
756 }
757 else {
758 /* illegal */
759 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
760 dialect->delimiter,
761 dialect->quotechar);
762 return -1;
763 }
764 break;
765
766 case EAT_CRNL:
767 if (c == '\n' || c == '\r')
768 ;
769 else if (c == '\0')
770 self->state = START_RECORD;
771 else {
772 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
773 return -1;
774 }
775 break;
776
777 }
778 return 0;
779 }
780
781 static int
parse_reset(ReaderObj * self)782 parse_reset(ReaderObj *self)
783 {
784 Py_XSETREF(self->fields, PyList_New(0));
785 if (self->fields == NULL)
786 return -1;
787 self->field_len = 0;
788 self->state = START_RECORD;
789 self->numeric_field = 0;
790 return 0;
791 }
792
793 static PyObject *
Reader_iternext(ReaderObj * self)794 Reader_iternext(ReaderObj *self)
795 {
796 PyObject *fields = NULL;
797 Py_UCS4 c;
798 Py_ssize_t pos, linelen;
799 unsigned int kind;
800 void *data;
801 PyObject *lineobj;
802
803 if (parse_reset(self) < 0)
804 return NULL;
805 do {
806 lineobj = PyIter_Next(self->input_iter);
807 if (lineobj == NULL) {
808 /* End of input OR exception */
809 if (!PyErr_Occurred() && (self->field_len != 0 ||
810 self->state == IN_QUOTED_FIELD)) {
811 if (self->dialect->strict)
812 PyErr_SetString(_csvstate_global->error_obj,
813 "unexpected end of data");
814 else if (parse_save_field(self) >= 0)
815 break;
816 }
817 return NULL;
818 }
819 if (!PyUnicode_Check(lineobj)) {
820 PyErr_Format(_csvstate_global->error_obj,
821 "iterator should return strings, "
822 "not %.200s "
823 "(did you open the file in text mode?)",
824 lineobj->ob_type->tp_name
825 );
826 Py_DECREF(lineobj);
827 return NULL;
828 }
829 if (PyUnicode_READY(lineobj) == -1) {
830 Py_DECREF(lineobj);
831 return NULL;
832 }
833 ++self->line_num;
834 kind = PyUnicode_KIND(lineobj);
835 data = PyUnicode_DATA(lineobj);
836 pos = 0;
837 linelen = PyUnicode_GET_LENGTH(lineobj);
838 while (linelen--) {
839 c = PyUnicode_READ(kind, data, pos);
840 if (c == '\0') {
841 Py_DECREF(lineobj);
842 PyErr_Format(_csvstate_global->error_obj,
843 "line contains NULL byte");
844 goto err;
845 }
846 if (parse_process_char(self, c) < 0) {
847 Py_DECREF(lineobj);
848 goto err;
849 }
850 pos++;
851 }
852 Py_DECREF(lineobj);
853 if (parse_process_char(self, 0) < 0)
854 goto err;
855 } while (self->state != START_RECORD);
856
857 fields = self->fields;
858 self->fields = NULL;
859 err:
860 return fields;
861 }
862
863 static void
Reader_dealloc(ReaderObj * self)864 Reader_dealloc(ReaderObj *self)
865 {
866 PyObject_GC_UnTrack(self);
867 Py_XDECREF(self->dialect);
868 Py_XDECREF(self->input_iter);
869 Py_XDECREF(self->fields);
870 if (self->field != NULL)
871 PyMem_Free(self->field);
872 PyObject_GC_Del(self);
873 }
874
875 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)876 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
877 {
878 Py_VISIT(self->dialect);
879 Py_VISIT(self->input_iter);
880 Py_VISIT(self->fields);
881 return 0;
882 }
883
884 static int
Reader_clear(ReaderObj * self)885 Reader_clear(ReaderObj *self)
886 {
887 Py_CLEAR(self->dialect);
888 Py_CLEAR(self->input_iter);
889 Py_CLEAR(self->fields);
890 return 0;
891 }
892
893 PyDoc_STRVAR(Reader_Type_doc,
894 "CSV reader\n"
895 "\n"
896 "Reader objects are responsible for reading and parsing tabular data\n"
897 "in CSV format.\n"
898 );
899
900 static struct PyMethodDef Reader_methods[] = {
901 { NULL, NULL }
902 };
903 #define R_OFF(x) offsetof(ReaderObj, x)
904
905 static struct PyMemberDef Reader_memberlist[] = {
906 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
907 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
908 { NULL }
909 };
910
911
912 static PyTypeObject Reader_Type = {
913 PyVarObject_HEAD_INIT(NULL, 0)
914 "_csv.reader", /*tp_name*/
915 sizeof(ReaderObj), /*tp_basicsize*/
916 0, /*tp_itemsize*/
917 /* methods */
918 (destructor)Reader_dealloc, /*tp_dealloc*/
919 (printfunc)0, /*tp_print*/
920 (getattrfunc)0, /*tp_getattr*/
921 (setattrfunc)0, /*tp_setattr*/
922 0, /*tp_reserved*/
923 (reprfunc)0, /*tp_repr*/
924 0, /*tp_as_number*/
925 0, /*tp_as_sequence*/
926 0, /*tp_as_mapping*/
927 (hashfunc)0, /*tp_hash*/
928 (ternaryfunc)0, /*tp_call*/
929 (reprfunc)0, /*tp_str*/
930 0, /*tp_getattro*/
931 0, /*tp_setattro*/
932 0, /*tp_as_buffer*/
933 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
934 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
935 Reader_Type_doc, /*tp_doc*/
936 (traverseproc)Reader_traverse, /*tp_traverse*/
937 (inquiry)Reader_clear, /*tp_clear*/
938 0, /*tp_richcompare*/
939 0, /*tp_weaklistoffset*/
940 PyObject_SelfIter, /*tp_iter*/
941 (getiterfunc)Reader_iternext, /*tp_iternext*/
942 Reader_methods, /*tp_methods*/
943 Reader_memberlist, /*tp_members*/
944 0, /*tp_getset*/
945
946 };
947
948 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)949 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
950 {
951 PyObject * iterator, * dialect = NULL;
952 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
953
954 if (!self)
955 return NULL;
956
957 self->dialect = NULL;
958 self->fields = NULL;
959 self->input_iter = NULL;
960 self->field = NULL;
961 self->field_size = 0;
962 self->line_num = 0;
963
964 if (parse_reset(self) < 0) {
965 Py_DECREF(self);
966 return NULL;
967 }
968
969 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
970 Py_DECREF(self);
971 return NULL;
972 }
973 self->input_iter = PyObject_GetIter(iterator);
974 if (self->input_iter == NULL) {
975 PyErr_SetString(PyExc_TypeError,
976 "argument 1 must be an iterator");
977 Py_DECREF(self);
978 return NULL;
979 }
980 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
981 if (self->dialect == NULL) {
982 Py_DECREF(self);
983 return NULL;
984 }
985
986 PyObject_GC_Track(self);
987 return (PyObject *)self;
988 }
989
990 /*
991 * WRITER
992 */
993 /* ---------------------------------------------------------------- */
994 static void
join_reset(WriterObj * self)995 join_reset(WriterObj *self)
996 {
997 self->rec_len = 0;
998 self->num_fields = 0;
999 }
1000
1001 #define MEM_INCR 32768
1002
1003 /* Calculate new record length or append field to record. Return new
1004 * record length.
1005 */
1006 static Py_ssize_t
join_append_data(WriterObj * self,unsigned int field_kind,void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)1007 join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
1008 Py_ssize_t field_len, int *quoted,
1009 int copy_phase)
1010 {
1011 DialectObj *dialect = self->dialect;
1012 int i;
1013 Py_ssize_t rec_len;
1014
1015 #define INCLEN \
1016 do {\
1017 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1018 goto overflow; \
1019 } \
1020 rec_len++; \
1021 } while(0)
1022
1023 #define ADDCH(c) \
1024 do {\
1025 if (copy_phase) \
1026 self->rec[rec_len] = c;\
1027 INCLEN;\
1028 } while(0)
1029
1030 rec_len = self->rec_len;
1031
1032 /* If this is not the first field we need a field separator */
1033 if (self->num_fields > 0)
1034 ADDCH(dialect->delimiter);
1035
1036 /* Handle preceding quote */
1037 if (copy_phase && *quoted)
1038 ADDCH(dialect->quotechar);
1039
1040 /* Copy/count field data */
1041 /* If field is null just pass over */
1042 for (i = 0; field_data && (i < field_len); i++) {
1043 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1044 int want_escape = 0;
1045
1046 if (c == dialect->delimiter ||
1047 c == dialect->escapechar ||
1048 c == dialect->quotechar ||
1049 PyUnicode_FindChar(
1050 dialect->lineterminator, c, 0,
1051 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1052 if (dialect->quoting == QUOTE_NONE)
1053 want_escape = 1;
1054 else {
1055 if (c == dialect->quotechar) {
1056 if (dialect->doublequote)
1057 ADDCH(dialect->quotechar);
1058 else
1059 want_escape = 1;
1060 }
1061 if (!want_escape)
1062 *quoted = 1;
1063 }
1064 if (want_escape) {
1065 if (!dialect->escapechar) {
1066 PyErr_Format(_csvstate_global->error_obj,
1067 "need to escape, but no escapechar set");
1068 return -1;
1069 }
1070 ADDCH(dialect->escapechar);
1071 }
1072 }
1073 /* Copy field character into record buffer.
1074 */
1075 ADDCH(c);
1076 }
1077
1078 if (*quoted) {
1079 if (copy_phase)
1080 ADDCH(dialect->quotechar);
1081 else {
1082 INCLEN; /* starting quote */
1083 INCLEN; /* ending quote */
1084 }
1085 }
1086 return rec_len;
1087
1088 overflow:
1089 PyErr_NoMemory();
1090 return -1;
1091 #undef ADDCH
1092 #undef INCLEN
1093 }
1094
1095 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1096 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1097 {
1098
1099 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
1100 PyErr_NoMemory();
1101 return 0;
1102 }
1103
1104 if (rec_len > self->rec_size) {
1105 if (self->rec_size == 0) {
1106 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1107 if (self->rec != NULL)
1108 PyMem_Free(self->rec);
1109 self->rec = PyMem_New(Py_UCS4, self->rec_size);
1110 }
1111 else {
1112 Py_UCS4* old_rec = self->rec;
1113
1114 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1115 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
1116 if (self->rec == NULL)
1117 PyMem_Free(old_rec);
1118 }
1119 if (self->rec == NULL) {
1120 PyErr_NoMemory();
1121 return 0;
1122 }
1123 }
1124 return 1;
1125 }
1126
1127 static int
join_append(WriterObj * self,PyObject * field,int quoted)1128 join_append(WriterObj *self, PyObject *field, int quoted)
1129 {
1130 unsigned int field_kind = -1;
1131 void *field_data = NULL;
1132 Py_ssize_t field_len = 0;
1133 Py_ssize_t rec_len;
1134
1135 if (field != NULL) {
1136 if (PyUnicode_READY(field) == -1)
1137 return 0;
1138 field_kind = PyUnicode_KIND(field);
1139 field_data = PyUnicode_DATA(field);
1140 field_len = PyUnicode_GET_LENGTH(field);
1141 }
1142 rec_len = join_append_data(self, field_kind, field_data, field_len,
1143 "ed, 0);
1144 if (rec_len < 0)
1145 return 0;
1146
1147 /* grow record buffer if necessary */
1148 if (!join_check_rec_size(self, rec_len))
1149 return 0;
1150
1151 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1152 "ed, 1);
1153 self->num_fields++;
1154
1155 return 1;
1156 }
1157
1158 static int
join_append_lineterminator(WriterObj * self)1159 join_append_lineterminator(WriterObj *self)
1160 {
1161 Py_ssize_t terminator_len, i;
1162 unsigned int term_kind;
1163 void *term_data;
1164
1165 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1166 if (terminator_len == -1)
1167 return 0;
1168
1169 /* grow record buffer if necessary */
1170 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1171 return 0;
1172
1173 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1174 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1175 for (i = 0; i < terminator_len; i++)
1176 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1177 self->rec_len += terminator_len;
1178
1179 return 1;
1180 }
1181
1182 PyDoc_STRVAR(csv_writerow_doc,
1183 "writerow(iterable)\n"
1184 "\n"
1185 "Construct and write a CSV record from an iterable of fields. Non-string\n"
1186 "elements will be converted to string.");
1187
1188 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1189 csv_writerow(WriterObj *self, PyObject *seq)
1190 {
1191 DialectObj *dialect = self->dialect;
1192 PyObject *iter, *field, *line, *result;
1193
1194 iter = PyObject_GetIter(seq);
1195 if (iter == NULL)
1196 return PyErr_Format(_csvstate_global->error_obj,
1197 "iterable expected, not %.200s",
1198 seq->ob_type->tp_name);
1199
1200 /* Join all fields in internal buffer.
1201 */
1202 join_reset(self);
1203 while ((field = PyIter_Next(iter))) {
1204 int append_ok;
1205 int quoted;
1206
1207 switch (dialect->quoting) {
1208 case QUOTE_NONNUMERIC:
1209 quoted = !PyNumber_Check(field);
1210 break;
1211 case QUOTE_ALL:
1212 quoted = 1;
1213 break;
1214 default:
1215 quoted = 0;
1216 break;
1217 }
1218
1219 if (PyUnicode_Check(field)) {
1220 append_ok = join_append(self, field, quoted);
1221 Py_DECREF(field);
1222 }
1223 else if (field == Py_None) {
1224 append_ok = join_append(self, NULL, quoted);
1225 Py_DECREF(field);
1226 }
1227 else {
1228 PyObject *str;
1229
1230 str = PyObject_Str(field);
1231 Py_DECREF(field);
1232 if (str == NULL) {
1233 Py_DECREF(iter);
1234 return NULL;
1235 }
1236 append_ok = join_append(self, str, quoted);
1237 Py_DECREF(str);
1238 }
1239 if (!append_ok) {
1240 Py_DECREF(iter);
1241 return NULL;
1242 }
1243 }
1244 Py_DECREF(iter);
1245 if (PyErr_Occurred())
1246 return NULL;
1247
1248 if (self->num_fields > 0 && self->rec_size == 0) {
1249 if (dialect->quoting == QUOTE_NONE) {
1250 PyErr_Format(_csvstate_global->error_obj,
1251 "single empty field record must be quoted");
1252 return NULL;
1253 }
1254 self->num_fields--;
1255 if (!join_append(self, NULL, 1))
1256 return NULL;
1257 }
1258
1259 /* Add line terminator.
1260 */
1261 if (!join_append_lineterminator(self))
1262 return NULL;
1263
1264 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1265 (void *) self->rec, self->rec_len);
1266 if (line == NULL)
1267 return NULL;
1268 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1269 Py_DECREF(line);
1270 return result;
1271 }
1272
1273 PyDoc_STRVAR(csv_writerows_doc,
1274 "writerows(iterable of iterables)\n"
1275 "\n"
1276 "Construct and write a series of iterables to a csv file. Non-string\n"
1277 "elements will be converted to string.");
1278
1279 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1280 csv_writerows(WriterObj *self, PyObject *seqseq)
1281 {
1282 PyObject *row_iter, *row_obj, *result;
1283
1284 row_iter = PyObject_GetIter(seqseq);
1285 if (row_iter == NULL) {
1286 PyErr_SetString(PyExc_TypeError,
1287 "writerows() argument must be iterable");
1288 return NULL;
1289 }
1290 while ((row_obj = PyIter_Next(row_iter))) {
1291 result = csv_writerow(self, row_obj);
1292 Py_DECREF(row_obj);
1293 if (!result) {
1294 Py_DECREF(row_iter);
1295 return NULL;
1296 }
1297 else
1298 Py_DECREF(result);
1299 }
1300 Py_DECREF(row_iter);
1301 if (PyErr_Occurred())
1302 return NULL;
1303 Py_INCREF(Py_None);
1304 return Py_None;
1305 }
1306
1307 static struct PyMethodDef Writer_methods[] = {
1308 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1309 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1310 { NULL, NULL }
1311 };
1312
1313 #define W_OFF(x) offsetof(WriterObj, x)
1314
1315 static struct PyMemberDef Writer_memberlist[] = {
1316 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1317 { NULL }
1318 };
1319
1320 static void
Writer_dealloc(WriterObj * self)1321 Writer_dealloc(WriterObj *self)
1322 {
1323 PyObject_GC_UnTrack(self);
1324 Py_XDECREF(self->dialect);
1325 Py_XDECREF(self->writeline);
1326 if (self->rec != NULL)
1327 PyMem_Free(self->rec);
1328 PyObject_GC_Del(self);
1329 }
1330
1331 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1332 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1333 {
1334 Py_VISIT(self->dialect);
1335 Py_VISIT(self->writeline);
1336 return 0;
1337 }
1338
1339 static int
Writer_clear(WriterObj * self)1340 Writer_clear(WriterObj *self)
1341 {
1342 Py_CLEAR(self->dialect);
1343 Py_CLEAR(self->writeline);
1344 return 0;
1345 }
1346
1347 PyDoc_STRVAR(Writer_Type_doc,
1348 "CSV writer\n"
1349 "\n"
1350 "Writer objects are responsible for generating tabular data\n"
1351 "in CSV format from sequence input.\n"
1352 );
1353
1354 static PyTypeObject Writer_Type = {
1355 PyVarObject_HEAD_INIT(NULL, 0)
1356 "_csv.writer", /*tp_name*/
1357 sizeof(WriterObj), /*tp_basicsize*/
1358 0, /*tp_itemsize*/
1359 /* methods */
1360 (destructor)Writer_dealloc, /*tp_dealloc*/
1361 (printfunc)0, /*tp_print*/
1362 (getattrfunc)0, /*tp_getattr*/
1363 (setattrfunc)0, /*tp_setattr*/
1364 0, /*tp_reserved*/
1365 (reprfunc)0, /*tp_repr*/
1366 0, /*tp_as_number*/
1367 0, /*tp_as_sequence*/
1368 0, /*tp_as_mapping*/
1369 (hashfunc)0, /*tp_hash*/
1370 (ternaryfunc)0, /*tp_call*/
1371 (reprfunc)0, /*tp_str*/
1372 0, /*tp_getattro*/
1373 0, /*tp_setattro*/
1374 0, /*tp_as_buffer*/
1375 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1376 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1377 Writer_Type_doc,
1378 (traverseproc)Writer_traverse, /*tp_traverse*/
1379 (inquiry)Writer_clear, /*tp_clear*/
1380 0, /*tp_richcompare*/
1381 0, /*tp_weaklistoffset*/
1382 (getiterfunc)0, /*tp_iter*/
1383 (getiterfunc)0, /*tp_iternext*/
1384 Writer_methods, /*tp_methods*/
1385 Writer_memberlist, /*tp_members*/
1386 0, /*tp_getset*/
1387 };
1388
1389 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1390 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1391 {
1392 PyObject * output_file, * dialect = NULL;
1393 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1394 _Py_IDENTIFIER(write);
1395
1396 if (!self)
1397 return NULL;
1398
1399 self->dialect = NULL;
1400 self->writeline = NULL;
1401
1402 self->rec = NULL;
1403 self->rec_size = 0;
1404 self->rec_len = 0;
1405 self->num_fields = 0;
1406
1407 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1408 Py_DECREF(self);
1409 return NULL;
1410 }
1411 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
1412 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1413 PyErr_SetString(PyExc_TypeError,
1414 "argument 1 must have a \"write\" method");
1415 Py_DECREF(self);
1416 return NULL;
1417 }
1418 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1419 if (self->dialect == NULL) {
1420 Py_DECREF(self);
1421 return NULL;
1422 }
1423 PyObject_GC_Track(self);
1424 return (PyObject *)self;
1425 }
1426
1427 /*
1428 * DIALECT REGISTRY
1429 */
1430 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1431 csv_list_dialects(PyObject *module, PyObject *args)
1432 {
1433 return PyDict_Keys(_csvstate_global->dialects);
1434 }
1435
1436 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1437 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1438 {
1439 PyObject *name_obj, *dialect_obj = NULL;
1440 PyObject *dialect;
1441
1442 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1443 return NULL;
1444 if (!PyUnicode_Check(name_obj)) {
1445 PyErr_SetString(PyExc_TypeError,
1446 "dialect name must be a string");
1447 return NULL;
1448 }
1449 if (PyUnicode_READY(name_obj) == -1)
1450 return NULL;
1451 dialect = _call_dialect(dialect_obj, kwargs);
1452 if (dialect == NULL)
1453 return NULL;
1454 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
1455 Py_DECREF(dialect);
1456 return NULL;
1457 }
1458 Py_DECREF(dialect);
1459 Py_INCREF(Py_None);
1460 return Py_None;
1461 }
1462
1463 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1464 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1465 {
1466 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1467 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1468 Py_INCREF(Py_None);
1469 return Py_None;
1470 }
1471
1472 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1473 csv_get_dialect(PyObject *module, PyObject *name_obj)
1474 {
1475 return get_dialect_from_registry(name_obj);
1476 }
1477
1478 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1479 csv_field_size_limit(PyObject *module, PyObject *args)
1480 {
1481 PyObject *new_limit = NULL;
1482 long old_limit = _csvstate_global->field_limit;
1483
1484 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1485 return NULL;
1486 if (new_limit != NULL) {
1487 if (!PyLong_CheckExact(new_limit)) {
1488 PyErr_Format(PyExc_TypeError,
1489 "limit must be an integer");
1490 return NULL;
1491 }
1492 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1493 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1494 _csvstate_global->field_limit = old_limit;
1495 return NULL;
1496 }
1497 }
1498 return PyLong_FromLong(old_limit);
1499 }
1500
1501 /*
1502 * MODULE
1503 */
1504
1505 PyDoc_STRVAR(csv_module_doc,
1506 "CSV parsing and writing.\n"
1507 "\n"
1508 "This module provides classes that assist in the reading and writing\n"
1509 "of Comma Separated Value (CSV) files, and implements the interface\n"
1510 "described by PEP 305. Although many CSV files are simple to parse,\n"
1511 "the format is not formally defined by a stable specification and\n"
1512 "is subtle enough that parsing lines of a CSV file with something\n"
1513 "like line.split(\",\") is bound to fail. The module supports three\n"
1514 "basic APIs: reading, writing, and registration of dialects.\n"
1515 "\n"
1516 "\n"
1517 "DIALECT REGISTRATION:\n"
1518 "\n"
1519 "Readers and writers support a dialect argument, which is a convenient\n"
1520 "handle on a group of settings. When the dialect argument is a string,\n"
1521 "it identifies one of the dialects previously registered with the module.\n"
1522 "If it is a class or instance, the attributes of the argument are used as\n"
1523 "the settings for the reader or writer:\n"
1524 "\n"
1525 " class excel:\n"
1526 " delimiter = ','\n"
1527 " quotechar = '\"'\n"
1528 " escapechar = None\n"
1529 " doublequote = True\n"
1530 " skipinitialspace = False\n"
1531 " lineterminator = '\\r\\n'\n"
1532 " quoting = QUOTE_MINIMAL\n"
1533 "\n"
1534 "SETTINGS:\n"
1535 "\n"
1536 " * quotechar - specifies a one-character string to use as the \n"
1537 " quoting character. It defaults to '\"'.\n"
1538 " * delimiter - specifies a one-character string to use as the \n"
1539 " field separator. It defaults to ','.\n"
1540 " * skipinitialspace - specifies how to interpret whitespace which\n"
1541 " immediately follows a delimiter. It defaults to False, which\n"
1542 " means that whitespace immediately following a delimiter is part\n"
1543 " of the following field.\n"
1544 " * lineterminator - specifies the character sequence which should \n"
1545 " terminate rows.\n"
1546 " * quoting - controls when quotes should be generated by the writer.\n"
1547 " It can take on any of the following module constants:\n"
1548 "\n"
1549 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1550 " field contains either the quotechar or the delimiter\n"
1551 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1552 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1553 " fields which do not parse as integers or floating point\n"
1554 " numbers.\n"
1555 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1556 " * escapechar - specifies a one-character string used to escape \n"
1557 " the delimiter when quoting is set to QUOTE_NONE.\n"
1558 " * doublequote - controls the handling of quotes inside fields. When\n"
1559 " True, two consecutive quotes are interpreted as one during read,\n"
1560 " and when writing, each quote character embedded in the data is\n"
1561 " written as two quotes\n");
1562
1563 PyDoc_STRVAR(csv_reader_doc,
1564 " csv_reader = reader(iterable [, dialect='excel']\n"
1565 " [optional keyword args])\n"
1566 " for row in csv_reader:\n"
1567 " process(row)\n"
1568 "\n"
1569 "The \"iterable\" argument can be any object that returns a line\n"
1570 "of input for each iteration, such as a file object or a list. The\n"
1571 "optional \"dialect\" parameter is discussed below. The function\n"
1572 "also accepts optional keyword arguments which override settings\n"
1573 "provided by the dialect.\n"
1574 "\n"
1575 "The returned object is an iterator. Each iteration returns a row\n"
1576 "of the CSV file (which can span multiple input lines).\n");
1577
1578 PyDoc_STRVAR(csv_writer_doc,
1579 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1580 " [optional keyword args])\n"
1581 " for row in sequence:\n"
1582 " csv_writer.writerow(row)\n"
1583 "\n"
1584 " [or]\n"
1585 "\n"
1586 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1587 " [optional keyword args])\n"
1588 " csv_writer.writerows(rows)\n"
1589 "\n"
1590 "The \"fileobj\" argument can be any object that supports the file API.\n");
1591
1592 PyDoc_STRVAR(csv_list_dialects_doc,
1593 "Return a list of all know dialect names.\n"
1594 " names = csv.list_dialects()");
1595
1596 PyDoc_STRVAR(csv_get_dialect_doc,
1597 "Return the dialect instance associated with name.\n"
1598 " dialect = csv.get_dialect(name)");
1599
1600 PyDoc_STRVAR(csv_register_dialect_doc,
1601 "Create a mapping from a string name to a dialect class.\n"
1602 " dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1603
1604 PyDoc_STRVAR(csv_unregister_dialect_doc,
1605 "Delete the name/dialect mapping associated with a string name.\n"
1606 " csv.unregister_dialect(name)");
1607
1608 PyDoc_STRVAR(csv_field_size_limit_doc,
1609 "Sets an upper limit on parsed fields.\n"
1610 " csv.field_size_limit([limit])\n"
1611 "\n"
1612 "Returns old limit. If limit is not given, no new limit is set and\n"
1613 "the old limit is returned");
1614
1615 static struct PyMethodDef csv_methods[] = {
1616 { "reader", (PyCFunction)csv_reader,
1617 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1618 { "writer", (PyCFunction)csv_writer,
1619 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1620 { "list_dialects", (PyCFunction)csv_list_dialects,
1621 METH_NOARGS, csv_list_dialects_doc},
1622 { "register_dialect", (PyCFunction)csv_register_dialect,
1623 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1624 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1625 METH_O, csv_unregister_dialect_doc},
1626 { "get_dialect", (PyCFunction)csv_get_dialect,
1627 METH_O, csv_get_dialect_doc},
1628 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1629 METH_VARARGS, csv_field_size_limit_doc},
1630 { NULL, NULL }
1631 };
1632
1633 static struct PyModuleDef _csvmodule = {
1634 PyModuleDef_HEAD_INIT,
1635 "_csv",
1636 csv_module_doc,
1637 sizeof(_csvstate),
1638 csv_methods,
1639 NULL,
1640 _csv_traverse,
1641 _csv_clear,
1642 _csv_free
1643 };
1644
1645 PyMODINIT_FUNC
PyInit__csv(void)1646 PyInit__csv(void)
1647 {
1648 PyObject *module;
1649 const StyleDesc *style;
1650
1651 if (PyType_Ready(&Dialect_Type) < 0)
1652 return NULL;
1653
1654 if (PyType_Ready(&Reader_Type) < 0)
1655 return NULL;
1656
1657 if (PyType_Ready(&Writer_Type) < 0)
1658 return NULL;
1659
1660 /* Create the module and add the functions */
1661 module = PyModule_Create(&_csvmodule);
1662 if (module == NULL)
1663 return NULL;
1664
1665 /* Add version to the module. */
1666 if (PyModule_AddStringConstant(module, "__version__",
1667 MODULE_VERSION) == -1)
1668 return NULL;
1669
1670 /* Set the field limit */
1671 _csvstate(module)->field_limit = 128 * 1024;
1672 /* Do I still need to add this var to the Module Dict? */
1673
1674 /* Add _dialects dictionary */
1675 _csvstate(module)->dialects = PyDict_New();
1676 if (_csvstate(module)->dialects == NULL)
1677 return NULL;
1678 Py_INCREF(_csvstate(module)->dialects);
1679 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
1680 return NULL;
1681
1682 /* Add quote styles into dictionary */
1683 for (style = quote_styles; style->name; style++) {
1684 if (PyModule_AddIntConstant(module, style->name,
1685 style->style) == -1)
1686 return NULL;
1687 }
1688
1689 /* Add the Dialect type */
1690 Py_INCREF(&Dialect_Type);
1691 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1692 return NULL;
1693
1694 /* Add the CSV exception object to the module. */
1695 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1696 if (_csvstate(module)->error_obj == NULL)
1697 return NULL;
1698 Py_INCREF(_csvstate(module)->error_obj);
1699 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
1700 return module;
1701 }
1702