1 /* csv module */
2 
3 /*
4 
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module.  Users should not use this module directly, but import the csv.py
7 module instead.
8 
9 */
10 
11 #define MODULE_VERSION "1.0"
12 
13 #include "Python.h"
14 #include "structmember.h"
15 
16 
17 typedef struct {
18     PyObject *error_obj;   /* CSV exception */
19     PyObject *dialects;   /* Dialect registry */
20     long field_limit;   /* max parsed field size */
21 } _csvstate;
22 
23 #define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24 
25 static int
_csv_clear(PyObject * m)26 _csv_clear(PyObject *m)
27 {
28     Py_CLEAR(_csvstate(m)->error_obj);
29     Py_CLEAR(_csvstate(m)->dialects);
30     return 0;
31 }
32 
33 static int
_csv_traverse(PyObject * m,visitproc visit,void * arg)34 _csv_traverse(PyObject *m, visitproc visit, void *arg)
35 {
36     Py_VISIT(_csvstate(m)->error_obj);
37     Py_VISIT(_csvstate(m)->dialects);
38     return 0;
39 }
40 
41 static void
_csv_free(void * m)42 _csv_free(void *m)
43 {
44    _csv_clear((PyObject *)m);
45 }
46 
47 static struct PyModuleDef _csvmodule;
48 
49 #define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
50 
51 typedef enum {
52     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
54     EAT_CRNL,AFTER_ESCAPED_CRNL
55 } ParserState;
56 
57 typedef enum {
58     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
59 } QuoteStyle;
60 
61 typedef struct {
62     QuoteStyle style;
63     const char *name;
64 } StyleDesc;
65 
66 static const StyleDesc quote_styles[] = {
67     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
68     { QUOTE_ALL,        "QUOTE_ALL" },
69     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70     { QUOTE_NONE,       "QUOTE_NONE" },
71     { 0 }
72 };
73 
74 typedef struct {
75     PyObject_HEAD
76 
77     int doublequote;            /* is " represented by ""? */
78     Py_UCS4 delimiter;       /* field separator */
79     Py_UCS4 quotechar;       /* quote character */
80     Py_UCS4 escapechar;      /* escape character */
81     int skipinitialspace;       /* ignore spaces following delimiter? */
82     PyObject *lineterminator; /* string to write between records */
83     int quoting;                /* style of quoting to write */
84 
85     int strict;                 /* raise exception on bad CSV */
86 } DialectObj;
87 
88 static PyTypeObject Dialect_Type;
89 
90 typedef struct {
91     PyObject_HEAD
92 
93     PyObject *input_iter;   /* iterate over this for input lines */
94 
95     DialectObj *dialect;    /* parsing dialect */
96 
97     PyObject *fields;           /* field list for current record */
98     ParserState state;          /* current CSV parse state */
99     Py_UCS4 *field;             /* temporary buffer */
100     Py_ssize_t field_size;      /* size of allocated buffer */
101     Py_ssize_t field_len;       /* length of current field */
102     int numeric_field;          /* treat field as numeric */
103     unsigned long line_num;     /* Source-file line number */
104 } ReaderObj;
105 
106 static PyTypeObject Reader_Type;
107 
108 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
109 
110 typedef struct {
111     PyObject_HEAD
112 
113     PyObject *writeline;    /* write output lines to this file */
114 
115     DialectObj *dialect;    /* parsing dialect */
116 
117     Py_UCS4 *rec;            /* buffer for parser.join */
118     Py_ssize_t rec_size;        /* size of allocated record */
119     Py_ssize_t rec_len;         /* length of record */
120     int num_fields;             /* number of fields in record */
121 } WriterObj;
122 
123 static PyTypeObject Writer_Type;
124 
125 /*
126  * DIALECT class
127  */
128 
129 static PyObject *
get_dialect_from_registry(PyObject * name_obj)130 get_dialect_from_registry(PyObject * name_obj)
131 {
132     PyObject *dialect_obj;
133 
134     dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
135     if (dialect_obj == NULL) {
136         if (!PyErr_Occurred())
137             PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
138     }
139     else
140         Py_INCREF(dialect_obj);
141     return dialect_obj;
142 }
143 
144 static PyObject *
get_string(PyObject * str)145 get_string(PyObject *str)
146 {
147     Py_XINCREF(str);
148     return str;
149 }
150 
151 static PyObject *
get_nullchar_as_None(Py_UCS4 c)152 get_nullchar_as_None(Py_UCS4 c)
153 {
154     if (c == '\0') {
155         Py_INCREF(Py_None);
156         return Py_None;
157     }
158     else
159         return PyUnicode_FromOrdinal(c);
160 }
161 
162 static PyObject *
Dialect_get_lineterminator(DialectObj * self)163 Dialect_get_lineterminator(DialectObj *self)
164 {
165     return get_string(self->lineterminator);
166 }
167 
168 static PyObject *
Dialect_get_delimiter(DialectObj * self)169 Dialect_get_delimiter(DialectObj *self)
170 {
171     return get_nullchar_as_None(self->delimiter);
172 }
173 
174 static PyObject *
Dialect_get_escapechar(DialectObj * self)175 Dialect_get_escapechar(DialectObj *self)
176 {
177     return get_nullchar_as_None(self->escapechar);
178 }
179 
180 static PyObject *
Dialect_get_quotechar(DialectObj * self)181 Dialect_get_quotechar(DialectObj *self)
182 {
183     return get_nullchar_as_None(self->quotechar);
184 }
185 
186 static PyObject *
Dialect_get_quoting(DialectObj * self)187 Dialect_get_quoting(DialectObj *self)
188 {
189     return PyLong_FromLong(self->quoting);
190 }
191 
192 static int
_set_bool(const char * name,int * target,PyObject * src,int dflt)193 _set_bool(const char *name, int *target, PyObject *src, int dflt)
194 {
195     if (src == NULL)
196         *target = dflt;
197     else {
198         int b = PyObject_IsTrue(src);
199         if (b < 0)
200             return -1;
201         *target = b;
202     }
203     return 0;
204 }
205 
206 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)207 _set_int(const char *name, int *target, PyObject *src, int dflt)
208 {
209     if (src == NULL)
210         *target = dflt;
211     else {
212         long value;
213         if (!PyLong_CheckExact(src)) {
214             PyErr_Format(PyExc_TypeError,
215                          "\"%s\" must be an integer", name);
216             return -1;
217         }
218         value = PyLong_AsLong(src);
219         if (value == -1 && PyErr_Occurred())
220             return -1;
221 #if SIZEOF_LONG > SIZEOF_INT
222         if (value > INT_MAX || value < INT_MIN) {
223             PyErr_Format(PyExc_ValueError,
224                          "integer out of range for \"%s\"", name);
225             return -1;
226         }
227 #endif
228         *target = (int)value;
229     }
230     return 0;
231 }
232 
233 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)234 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
235 {
236     if (src == NULL)
237         *target = dflt;
238     else {
239         *target = '\0';
240         if (src != Py_None) {
241             Py_ssize_t len;
242             if (!PyUnicode_Check(src)) {
243                 PyErr_Format(PyExc_TypeError,
244                     "\"%s\" must be string, not %.200s", name,
245                     src->ob_type->tp_name);
246                 return -1;
247             }
248             len = PyUnicode_GetLength(src);
249             if (len > 1) {
250                 PyErr_Format(PyExc_TypeError,
251                     "\"%s\" must be a 1-character string",
252                     name);
253                 return -1;
254             }
255             /* PyUnicode_READY() is called in PyUnicode_GetLength() */
256             if (len > 0)
257                 *target = PyUnicode_READ_CHAR(src, 0);
258         }
259     }
260     return 0;
261 }
262 
263 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)264 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265 {
266     if (src == NULL)
267         *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
268     else {
269         if (src == Py_None)
270             *target = NULL;
271         else if (!PyUnicode_Check(src)) {
272             PyErr_Format(PyExc_TypeError,
273                          "\"%s\" must be a string", name);
274             return -1;
275         }
276         else {
277             if (PyUnicode_READY(src) == -1)
278                 return -1;
279             Py_INCREF(src);
280             Py_XSETREF(*target, src);
281         }
282     }
283     return 0;
284 }
285 
286 static int
dialect_check_quoting(int quoting)287 dialect_check_quoting(int quoting)
288 {
289     const StyleDesc *qs;
290 
291     for (qs = quote_styles; qs->name; qs++) {
292         if ((int)qs->style == quoting)
293             return 0;
294     }
295     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
296     return -1;
297 }
298 
299 #define D_OFF(x) offsetof(DialectObj, x)
300 
301 static struct PyMemberDef Dialect_memberlist[] = {
302     { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
303     { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
304     { "strict",             T_INT, D_OFF(strict), READONLY },
305     { NULL }
306 };
307 
308 static PyGetSetDef Dialect_getsetlist[] = {
309     { "delimiter",          (getter)Dialect_get_delimiter},
310     { "escapechar",             (getter)Dialect_get_escapechar},
311     { "lineterminator",         (getter)Dialect_get_lineterminator},
312     { "quotechar",              (getter)Dialect_get_quotechar},
313     { "quoting",                (getter)Dialect_get_quoting},
314     {NULL},
315 };
316 
317 static void
Dialect_dealloc(DialectObj * self)318 Dialect_dealloc(DialectObj *self)
319 {
320     Py_XDECREF(self->lineterminator);
321     Py_TYPE(self)->tp_free((PyObject *)self);
322 }
323 
324 static char *dialect_kws[] = {
325     "dialect",
326     "delimiter",
327     "doublequote",
328     "escapechar",
329     "lineterminator",
330     "quotechar",
331     "quoting",
332     "skipinitialspace",
333     "strict",
334     NULL
335 };
336 
337 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)338 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
339 {
340     DialectObj *self;
341     PyObject *ret = NULL;
342     PyObject *dialect = NULL;
343     PyObject *delimiter = NULL;
344     PyObject *doublequote = NULL;
345     PyObject *escapechar = NULL;
346     PyObject *lineterminator = NULL;
347     PyObject *quotechar = NULL;
348     PyObject *quoting = NULL;
349     PyObject *skipinitialspace = NULL;
350     PyObject *strict = NULL;
351 
352     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
353                                      "|OOOOOOOOO", dialect_kws,
354                                      &dialect,
355                                      &delimiter,
356                                      &doublequote,
357                                      &escapechar,
358                                      &lineterminator,
359                                      &quotechar,
360                                      &quoting,
361                                      &skipinitialspace,
362                                      &strict))
363         return NULL;
364 
365     if (dialect != NULL) {
366         if (PyUnicode_Check(dialect)) {
367             dialect = get_dialect_from_registry(dialect);
368             if (dialect == NULL)
369                 return NULL;
370         }
371         else
372             Py_INCREF(dialect);
373         /* Can we reuse this instance? */
374         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
375             delimiter == 0 &&
376             doublequote == 0 &&
377             escapechar == 0 &&
378             lineterminator == 0 &&
379             quotechar == 0 &&
380             quoting == 0 &&
381             skipinitialspace == 0 &&
382             strict == 0)
383             return dialect;
384     }
385 
386     self = (DialectObj *)type->tp_alloc(type, 0);
387     if (self == NULL) {
388         Py_XDECREF(dialect);
389         return NULL;
390     }
391     self->lineterminator = NULL;
392 
393     Py_XINCREF(delimiter);
394     Py_XINCREF(doublequote);
395     Py_XINCREF(escapechar);
396     Py_XINCREF(lineterminator);
397     Py_XINCREF(quotechar);
398     Py_XINCREF(quoting);
399     Py_XINCREF(skipinitialspace);
400     Py_XINCREF(strict);
401     if (dialect != NULL) {
402 #define DIALECT_GETATTR(v, n) \
403         if (v == NULL) \
404             v = PyObject_GetAttrString(dialect, n)
405         DIALECT_GETATTR(delimiter, "delimiter");
406         DIALECT_GETATTR(doublequote, "doublequote");
407         DIALECT_GETATTR(escapechar, "escapechar");
408         DIALECT_GETATTR(lineterminator, "lineterminator");
409         DIALECT_GETATTR(quotechar, "quotechar");
410         DIALECT_GETATTR(quoting, "quoting");
411         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
412         DIALECT_GETATTR(strict, "strict");
413         PyErr_Clear();
414     }
415 
416     /* check types and convert to C values */
417 #define DIASET(meth, name, target, src, dflt) \
418     if (meth(name, target, src, dflt)) \
419         goto err
420     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
421     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
422     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
423     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
424     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
425     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
426     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
427     DIASET(_set_bool, "strict", &self->strict, strict, 0);
428 
429     /* validate options */
430     if (dialect_check_quoting(self->quoting))
431         goto err;
432     if (self->delimiter == 0) {
433         PyErr_SetString(PyExc_TypeError,
434                         "\"delimiter\" must be a 1-character string");
435         goto err;
436     }
437     if (quotechar == Py_None && quoting == NULL)
438         self->quoting = QUOTE_NONE;
439     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
440         PyErr_SetString(PyExc_TypeError,
441                         "quotechar must be set if quoting enabled");
442         goto err;
443     }
444     if (self->lineterminator == 0) {
445         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
446         goto err;
447     }
448 
449     ret = (PyObject *)self;
450     Py_INCREF(self);
451 err:
452     Py_XDECREF(self);
453     Py_XDECREF(dialect);
454     Py_XDECREF(delimiter);
455     Py_XDECREF(doublequote);
456     Py_XDECREF(escapechar);
457     Py_XDECREF(lineterminator);
458     Py_XDECREF(quotechar);
459     Py_XDECREF(quoting);
460     Py_XDECREF(skipinitialspace);
461     Py_XDECREF(strict);
462     return ret;
463 }
464 
465 
466 PyDoc_STRVAR(Dialect_Type_doc,
467 "CSV dialect\n"
468 "\n"
469 "The Dialect type records CSV parsing and generation options.\n");
470 
471 static PyTypeObject Dialect_Type = {
472     PyVarObject_HEAD_INIT(NULL, 0)
473     "_csv.Dialect",                         /* tp_name */
474     sizeof(DialectObj),                     /* tp_basicsize */
475     0,                                      /* tp_itemsize */
476     /*  methods  */
477     (destructor)Dialect_dealloc,            /* tp_dealloc */
478     (printfunc)0,                           /* tp_print */
479     (getattrfunc)0,                         /* tp_getattr */
480     (setattrfunc)0,                         /* tp_setattr */
481     0,                                      /* tp_reserved */
482     (reprfunc)0,                            /* tp_repr */
483     0,                                      /* tp_as_number */
484     0,                                      /* tp_as_sequence */
485     0,                                      /* tp_as_mapping */
486     (hashfunc)0,                            /* tp_hash */
487     (ternaryfunc)0,                         /* tp_call */
488     (reprfunc)0,                                /* tp_str */
489     0,                                      /* tp_getattro */
490     0,                                      /* tp_setattro */
491     0,                                      /* tp_as_buffer */
492     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
493     Dialect_Type_doc,                       /* tp_doc */
494     0,                                      /* tp_traverse */
495     0,                                      /* tp_clear */
496     0,                                      /* tp_richcompare */
497     0,                                      /* tp_weaklistoffset */
498     0,                                      /* tp_iter */
499     0,                                      /* tp_iternext */
500     0,                                          /* tp_methods */
501     Dialect_memberlist,                     /* tp_members */
502     Dialect_getsetlist,                     /* tp_getset */
503     0,                                          /* tp_base */
504     0,                                          /* tp_dict */
505     0,                                          /* tp_descr_get */
506     0,                                          /* tp_descr_set */
507     0,                                          /* tp_dictoffset */
508     0,                                          /* tp_init */
509     0,                                          /* tp_alloc */
510     dialect_new,                                /* tp_new */
511     0,                                          /* tp_free */
512 };
513 
514 /*
515  * Return an instance of the dialect type, given a Python instance or kwarg
516  * description of the dialect
517  */
518 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)519 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
520 {
521     PyObject *type = (PyObject *)&Dialect_Type;
522     if (dialect_inst) {
523         return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
524     }
525     else {
526         return _PyObject_FastCallDict(type, NULL, 0, kwargs);
527     }
528 }
529 
530 /*
531  * READER
532  */
533 static int
parse_save_field(ReaderObj * self)534 parse_save_field(ReaderObj *self)
535 {
536     PyObject *field;
537 
538     field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
539                                       (void *) self->field, self->field_len);
540     if (field == NULL)
541         return -1;
542     self->field_len = 0;
543     if (self->numeric_field) {
544         PyObject *tmp;
545 
546         self->numeric_field = 0;
547         tmp = PyNumber_Float(field);
548         Py_DECREF(field);
549         if (tmp == NULL)
550             return -1;
551         field = tmp;
552     }
553     if (PyList_Append(self->fields, field) < 0) {
554         Py_DECREF(field);
555         return -1;
556     }
557     Py_DECREF(field);
558     return 0;
559 }
560 
561 static int
parse_grow_buff(ReaderObj * self)562 parse_grow_buff(ReaderObj *self)
563 {
564     if (self->field_size == 0) {
565         self->field_size = 4096;
566         if (self->field != NULL)
567             PyMem_Free(self->field);
568         self->field = PyMem_New(Py_UCS4, self->field_size);
569     }
570     else {
571         Py_UCS4 *field = self->field;
572         if (self->field_size > PY_SSIZE_T_MAX / 2) {
573             PyErr_NoMemory();
574             return 0;
575         }
576         self->field_size *= 2;
577         self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
578     }
579     if (self->field == NULL) {
580         PyErr_NoMemory();
581         return 0;
582     }
583     return 1;
584 }
585 
586 static int
parse_add_char(ReaderObj * self,Py_UCS4 c)587 parse_add_char(ReaderObj *self, Py_UCS4 c)
588 {
589     if (self->field_len >= _csvstate_global->field_limit) {
590         PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
591                      _csvstate_global->field_limit);
592         return -1;
593     }
594     if (self->field_len == self->field_size && !parse_grow_buff(self))
595         return -1;
596     self->field[self->field_len++] = c;
597     return 0;
598 }
599 
600 static int
parse_process_char(ReaderObj * self,Py_UCS4 c)601 parse_process_char(ReaderObj *self, Py_UCS4 c)
602 {
603     DialectObj *dialect = self->dialect;
604 
605     switch (self->state) {
606     case START_RECORD:
607         /* start of record */
608         if (c == '\0')
609             /* empty line - return [] */
610             break;
611         else if (c == '\n' || c == '\r') {
612             self->state = EAT_CRNL;
613             break;
614         }
615         /* normal character - handle as START_FIELD */
616         self->state = START_FIELD;
617         /* fallthru */
618     case START_FIELD:
619         /* expecting field */
620         if (c == '\n' || c == '\r' || c == '\0') {
621             /* save empty field - return [fields] */
622             if (parse_save_field(self) < 0)
623                 return -1;
624             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
625         }
626         else if (c == dialect->quotechar &&
627                  dialect->quoting != QUOTE_NONE) {
628             /* start quoted field */
629             self->state = IN_QUOTED_FIELD;
630         }
631         else if (c == dialect->escapechar) {
632             /* possible escaped character */
633             self->state = ESCAPED_CHAR;
634         }
635         else if (c == ' ' && dialect->skipinitialspace)
636             /* ignore space at start of field */
637             ;
638         else if (c == dialect->delimiter) {
639             /* save empty field */
640             if (parse_save_field(self) < 0)
641                 return -1;
642         }
643         else {
644             /* begin new unquoted field */
645             if (dialect->quoting == QUOTE_NONNUMERIC)
646                 self->numeric_field = 1;
647             if (parse_add_char(self, c) < 0)
648                 return -1;
649             self->state = IN_FIELD;
650         }
651         break;
652 
653     case ESCAPED_CHAR:
654         if (c == '\n' || c=='\r') {
655             if (parse_add_char(self, c) < 0)
656                 return -1;
657             self->state = AFTER_ESCAPED_CRNL;
658             break;
659         }
660         if (c == '\0')
661             c = '\n';
662         if (parse_add_char(self, c) < 0)
663             return -1;
664         self->state = IN_FIELD;
665         break;
666 
667     case AFTER_ESCAPED_CRNL:
668         if (c == '\0')
669             break;
670         /*fallthru*/
671 
672     case IN_FIELD:
673         /* in unquoted field */
674         if (c == '\n' || c == '\r' || c == '\0') {
675             /* end of line - return [fields] */
676             if (parse_save_field(self) < 0)
677                 return -1;
678             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
679         }
680         else if (c == dialect->escapechar) {
681             /* possible escaped character */
682             self->state = ESCAPED_CHAR;
683         }
684         else if (c == dialect->delimiter) {
685             /* save field - wait for new field */
686             if (parse_save_field(self) < 0)
687                 return -1;
688             self->state = START_FIELD;
689         }
690         else {
691             /* normal character - save in field */
692             if (parse_add_char(self, c) < 0)
693                 return -1;
694         }
695         break;
696 
697     case IN_QUOTED_FIELD:
698         /* in quoted field */
699         if (c == '\0')
700             ;
701         else if (c == dialect->escapechar) {
702             /* Possible escape character */
703             self->state = ESCAPE_IN_QUOTED_FIELD;
704         }
705         else if (c == dialect->quotechar &&
706                  dialect->quoting != QUOTE_NONE) {
707             if (dialect->doublequote) {
708                 /* doublequote; " represented by "" */
709                 self->state = QUOTE_IN_QUOTED_FIELD;
710             }
711             else {
712                 /* end of quote part of field */
713                 self->state = IN_FIELD;
714             }
715         }
716         else {
717             /* normal character - save in field */
718             if (parse_add_char(self, c) < 0)
719                 return -1;
720         }
721         break;
722 
723     case ESCAPE_IN_QUOTED_FIELD:
724         if (c == '\0')
725             c = '\n';
726         if (parse_add_char(self, c) < 0)
727             return -1;
728         self->state = IN_QUOTED_FIELD;
729         break;
730 
731     case QUOTE_IN_QUOTED_FIELD:
732         /* doublequote - seen a quote in a quoted field */
733         if (dialect->quoting != QUOTE_NONE &&
734             c == dialect->quotechar) {
735             /* save "" as " */
736             if (parse_add_char(self, c) < 0)
737                 return -1;
738             self->state = IN_QUOTED_FIELD;
739         }
740         else if (c == dialect->delimiter) {
741             /* save field - wait for new field */
742             if (parse_save_field(self) < 0)
743                 return -1;
744             self->state = START_FIELD;
745         }
746         else if (c == '\n' || c == '\r' || c == '\0') {
747             /* end of line - return [fields] */
748             if (parse_save_field(self) < 0)
749                 return -1;
750             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
751         }
752         else if (!dialect->strict) {
753             if (parse_add_char(self, c) < 0)
754                 return -1;
755             self->state = IN_FIELD;
756         }
757         else {
758             /* illegal */
759             PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
760                             dialect->delimiter,
761                             dialect->quotechar);
762             return -1;
763         }
764         break;
765 
766     case EAT_CRNL:
767         if (c == '\n' || c == '\r')
768             ;
769         else if (c == '\0')
770             self->state = START_RECORD;
771         else {
772             PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
773             return -1;
774         }
775         break;
776 
777     }
778     return 0;
779 }
780 
781 static int
parse_reset(ReaderObj * self)782 parse_reset(ReaderObj *self)
783 {
784     Py_XSETREF(self->fields, PyList_New(0));
785     if (self->fields == NULL)
786         return -1;
787     self->field_len = 0;
788     self->state = START_RECORD;
789     self->numeric_field = 0;
790     return 0;
791 }
792 
793 static PyObject *
Reader_iternext(ReaderObj * self)794 Reader_iternext(ReaderObj *self)
795 {
796     PyObject *fields = NULL;
797     Py_UCS4 c;
798     Py_ssize_t pos, linelen;
799     unsigned int kind;
800     void *data;
801     PyObject *lineobj;
802 
803     if (parse_reset(self) < 0)
804         return NULL;
805     do {
806         lineobj = PyIter_Next(self->input_iter);
807         if (lineobj == NULL) {
808             /* End of input OR exception */
809             if (!PyErr_Occurred() && (self->field_len != 0 ||
810                                       self->state == IN_QUOTED_FIELD)) {
811                 if (self->dialect->strict)
812                     PyErr_SetString(_csvstate_global->error_obj,
813                                     "unexpected end of data");
814                 else if (parse_save_field(self) >= 0)
815                     break;
816             }
817             return NULL;
818         }
819         if (!PyUnicode_Check(lineobj)) {
820             PyErr_Format(_csvstate_global->error_obj,
821                          "iterator should return strings, "
822                          "not %.200s "
823                          "(did you open the file in text mode?)",
824                          lineobj->ob_type->tp_name
825                 );
826             Py_DECREF(lineobj);
827             return NULL;
828         }
829         if (PyUnicode_READY(lineobj) == -1) {
830             Py_DECREF(lineobj);
831             return NULL;
832         }
833         ++self->line_num;
834         kind = PyUnicode_KIND(lineobj);
835         data = PyUnicode_DATA(lineobj);
836         pos = 0;
837         linelen = PyUnicode_GET_LENGTH(lineobj);
838         while (linelen--) {
839             c = PyUnicode_READ(kind, data, pos);
840             if (c == '\0') {
841                 Py_DECREF(lineobj);
842                 PyErr_Format(_csvstate_global->error_obj,
843                              "line contains NULL byte");
844                 goto err;
845             }
846             if (parse_process_char(self, c) < 0) {
847                 Py_DECREF(lineobj);
848                 goto err;
849             }
850             pos++;
851         }
852         Py_DECREF(lineobj);
853         if (parse_process_char(self, 0) < 0)
854             goto err;
855     } while (self->state != START_RECORD);
856 
857     fields = self->fields;
858     self->fields = NULL;
859 err:
860     return fields;
861 }
862 
863 static void
Reader_dealloc(ReaderObj * self)864 Reader_dealloc(ReaderObj *self)
865 {
866     PyObject_GC_UnTrack(self);
867     Py_XDECREF(self->dialect);
868     Py_XDECREF(self->input_iter);
869     Py_XDECREF(self->fields);
870     if (self->field != NULL)
871         PyMem_Free(self->field);
872     PyObject_GC_Del(self);
873 }
874 
875 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)876 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
877 {
878     Py_VISIT(self->dialect);
879     Py_VISIT(self->input_iter);
880     Py_VISIT(self->fields);
881     return 0;
882 }
883 
884 static int
Reader_clear(ReaderObj * self)885 Reader_clear(ReaderObj *self)
886 {
887     Py_CLEAR(self->dialect);
888     Py_CLEAR(self->input_iter);
889     Py_CLEAR(self->fields);
890     return 0;
891 }
892 
893 PyDoc_STRVAR(Reader_Type_doc,
894 "CSV reader\n"
895 "\n"
896 "Reader objects are responsible for reading and parsing tabular data\n"
897 "in CSV format.\n"
898 );
899 
900 static struct PyMethodDef Reader_methods[] = {
901     { NULL, NULL }
902 };
903 #define R_OFF(x) offsetof(ReaderObj, x)
904 
905 static struct PyMemberDef Reader_memberlist[] = {
906     { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
907     { "line_num", T_ULONG, R_OFF(line_num), READONLY },
908     { NULL }
909 };
910 
911 
912 static PyTypeObject Reader_Type = {
913     PyVarObject_HEAD_INIT(NULL, 0)
914     "_csv.reader",                          /*tp_name*/
915     sizeof(ReaderObj),                      /*tp_basicsize*/
916     0,                                      /*tp_itemsize*/
917     /* methods */
918     (destructor)Reader_dealloc,             /*tp_dealloc*/
919     (printfunc)0,                           /*tp_print*/
920     (getattrfunc)0,                         /*tp_getattr*/
921     (setattrfunc)0,                         /*tp_setattr*/
922     0,                                     /*tp_reserved*/
923     (reprfunc)0,                            /*tp_repr*/
924     0,                                      /*tp_as_number*/
925     0,                                      /*tp_as_sequence*/
926     0,                                      /*tp_as_mapping*/
927     (hashfunc)0,                            /*tp_hash*/
928     (ternaryfunc)0,                         /*tp_call*/
929     (reprfunc)0,                                /*tp_str*/
930     0,                                      /*tp_getattro*/
931     0,                                      /*tp_setattro*/
932     0,                                      /*tp_as_buffer*/
933     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
934         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
935     Reader_Type_doc,                        /*tp_doc*/
936     (traverseproc)Reader_traverse,          /*tp_traverse*/
937     (inquiry)Reader_clear,                  /*tp_clear*/
938     0,                                      /*tp_richcompare*/
939     0,                                      /*tp_weaklistoffset*/
940     PyObject_SelfIter,                          /*tp_iter*/
941     (getiterfunc)Reader_iternext,           /*tp_iternext*/
942     Reader_methods,                         /*tp_methods*/
943     Reader_memberlist,                      /*tp_members*/
944     0,                                      /*tp_getset*/
945 
946 };
947 
948 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)949 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
950 {
951     PyObject * iterator, * dialect = NULL;
952     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
953 
954     if (!self)
955         return NULL;
956 
957     self->dialect = NULL;
958     self->fields = NULL;
959     self->input_iter = NULL;
960     self->field = NULL;
961     self->field_size = 0;
962     self->line_num = 0;
963 
964     if (parse_reset(self) < 0) {
965         Py_DECREF(self);
966         return NULL;
967     }
968 
969     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
970         Py_DECREF(self);
971         return NULL;
972     }
973     self->input_iter = PyObject_GetIter(iterator);
974     if (self->input_iter == NULL) {
975         PyErr_SetString(PyExc_TypeError,
976                         "argument 1 must be an iterator");
977         Py_DECREF(self);
978         return NULL;
979     }
980     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
981     if (self->dialect == NULL) {
982         Py_DECREF(self);
983         return NULL;
984     }
985 
986     PyObject_GC_Track(self);
987     return (PyObject *)self;
988 }
989 
990 /*
991  * WRITER
992  */
993 /* ---------------------------------------------------------------- */
994 static void
join_reset(WriterObj * self)995 join_reset(WriterObj *self)
996 {
997     self->rec_len = 0;
998     self->num_fields = 0;
999 }
1000 
1001 #define MEM_INCR 32768
1002 
1003 /* Calculate new record length or append field to record.  Return new
1004  * record length.
1005  */
1006 static Py_ssize_t
join_append_data(WriterObj * self,unsigned int field_kind,void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)1007 join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
1008                  Py_ssize_t field_len, int *quoted,
1009                  int copy_phase)
1010 {
1011     DialectObj *dialect = self->dialect;
1012     int i;
1013     Py_ssize_t rec_len;
1014 
1015 #define INCLEN \
1016     do {\
1017         if (!copy_phase && rec_len == PY_SSIZE_T_MAX) {    \
1018             goto overflow; \
1019         } \
1020         rec_len++; \
1021     } while(0)
1022 
1023 #define ADDCH(c)                                \
1024     do {\
1025         if (copy_phase) \
1026             self->rec[rec_len] = c;\
1027         INCLEN;\
1028     } while(0)
1029 
1030     rec_len = self->rec_len;
1031 
1032     /* If this is not the first field we need a field separator */
1033     if (self->num_fields > 0)
1034         ADDCH(dialect->delimiter);
1035 
1036     /* Handle preceding quote */
1037     if (copy_phase && *quoted)
1038         ADDCH(dialect->quotechar);
1039 
1040     /* Copy/count field data */
1041     /* If field is null just pass over */
1042     for (i = 0; field_data && (i < field_len); i++) {
1043         Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1044         int want_escape = 0;
1045 
1046         if (c == dialect->delimiter ||
1047             c == dialect->escapechar ||
1048             c == dialect->quotechar  ||
1049             PyUnicode_FindChar(
1050                 dialect->lineterminator, c, 0,
1051                 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1052             if (dialect->quoting == QUOTE_NONE)
1053                 want_escape = 1;
1054             else {
1055                 if (c == dialect->quotechar) {
1056                     if (dialect->doublequote)
1057                         ADDCH(dialect->quotechar);
1058                     else
1059                         want_escape = 1;
1060                 }
1061                 if (!want_escape)
1062                     *quoted = 1;
1063             }
1064             if (want_escape) {
1065                 if (!dialect->escapechar) {
1066                     PyErr_Format(_csvstate_global->error_obj,
1067                                  "need to escape, but no escapechar set");
1068                     return -1;
1069                 }
1070                 ADDCH(dialect->escapechar);
1071             }
1072         }
1073         /* Copy field character into record buffer.
1074          */
1075         ADDCH(c);
1076     }
1077 
1078     if (*quoted) {
1079         if (copy_phase)
1080             ADDCH(dialect->quotechar);
1081         else {
1082             INCLEN; /* starting quote */
1083             INCLEN; /* ending quote */
1084         }
1085     }
1086     return rec_len;
1087 
1088   overflow:
1089     PyErr_NoMemory();
1090     return -1;
1091 #undef ADDCH
1092 #undef INCLEN
1093 }
1094 
1095 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1096 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1097 {
1098 
1099     if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
1100         PyErr_NoMemory();
1101         return 0;
1102     }
1103 
1104     if (rec_len > self->rec_size) {
1105         if (self->rec_size == 0) {
1106             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1107             if (self->rec != NULL)
1108                 PyMem_Free(self->rec);
1109             self->rec = PyMem_New(Py_UCS4, self->rec_size);
1110         }
1111         else {
1112             Py_UCS4* old_rec = self->rec;
1113 
1114             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1115             self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
1116             if (self->rec == NULL)
1117                 PyMem_Free(old_rec);
1118         }
1119         if (self->rec == NULL) {
1120             PyErr_NoMemory();
1121             return 0;
1122         }
1123     }
1124     return 1;
1125 }
1126 
1127 static int
join_append(WriterObj * self,PyObject * field,int quoted)1128 join_append(WriterObj *self, PyObject *field, int quoted)
1129 {
1130     unsigned int field_kind = -1;
1131     void *field_data = NULL;
1132     Py_ssize_t field_len = 0;
1133     Py_ssize_t rec_len;
1134 
1135     if (field != NULL) {
1136         if (PyUnicode_READY(field) == -1)
1137             return 0;
1138         field_kind = PyUnicode_KIND(field);
1139         field_data = PyUnicode_DATA(field);
1140         field_len = PyUnicode_GET_LENGTH(field);
1141     }
1142     rec_len = join_append_data(self, field_kind, field_data, field_len,
1143                                &quoted, 0);
1144     if (rec_len < 0)
1145         return 0;
1146 
1147     /* grow record buffer if necessary */
1148     if (!join_check_rec_size(self, rec_len))
1149         return 0;
1150 
1151     self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1152                                      &quoted, 1);
1153     self->num_fields++;
1154 
1155     return 1;
1156 }
1157 
1158 static int
join_append_lineterminator(WriterObj * self)1159 join_append_lineterminator(WriterObj *self)
1160 {
1161     Py_ssize_t terminator_len, i;
1162     unsigned int term_kind;
1163     void *term_data;
1164 
1165     terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1166     if (terminator_len == -1)
1167         return 0;
1168 
1169     /* grow record buffer if necessary */
1170     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1171         return 0;
1172 
1173     term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1174     term_data = PyUnicode_DATA(self->dialect->lineterminator);
1175     for (i = 0; i < terminator_len; i++)
1176         self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1177     self->rec_len += terminator_len;
1178 
1179     return 1;
1180 }
1181 
1182 PyDoc_STRVAR(csv_writerow_doc,
1183 "writerow(iterable)\n"
1184 "\n"
1185 "Construct and write a CSV record from an iterable of fields.  Non-string\n"
1186 "elements will be converted to string.");
1187 
1188 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1189 csv_writerow(WriterObj *self, PyObject *seq)
1190 {
1191     DialectObj *dialect = self->dialect;
1192     PyObject *iter, *field, *line, *result;
1193 
1194     iter = PyObject_GetIter(seq);
1195     if (iter == NULL)
1196         return PyErr_Format(_csvstate_global->error_obj,
1197                             "iterable expected, not %.200s",
1198                             seq->ob_type->tp_name);
1199 
1200     /* Join all fields in internal buffer.
1201      */
1202     join_reset(self);
1203     while ((field = PyIter_Next(iter))) {
1204         int append_ok;
1205         int quoted;
1206 
1207         switch (dialect->quoting) {
1208         case QUOTE_NONNUMERIC:
1209             quoted = !PyNumber_Check(field);
1210             break;
1211         case QUOTE_ALL:
1212             quoted = 1;
1213             break;
1214         default:
1215             quoted = 0;
1216             break;
1217         }
1218 
1219         if (PyUnicode_Check(field)) {
1220             append_ok = join_append(self, field, quoted);
1221             Py_DECREF(field);
1222         }
1223         else if (field == Py_None) {
1224             append_ok = join_append(self, NULL, quoted);
1225             Py_DECREF(field);
1226         }
1227         else {
1228             PyObject *str;
1229 
1230             str = PyObject_Str(field);
1231             Py_DECREF(field);
1232             if (str == NULL) {
1233                 Py_DECREF(iter);
1234                 return NULL;
1235             }
1236             append_ok = join_append(self, str, quoted);
1237             Py_DECREF(str);
1238         }
1239         if (!append_ok) {
1240             Py_DECREF(iter);
1241             return NULL;
1242         }
1243     }
1244     Py_DECREF(iter);
1245     if (PyErr_Occurred())
1246         return NULL;
1247 
1248     if (self->num_fields > 0 && self->rec_size == 0) {
1249         if (dialect->quoting == QUOTE_NONE) {
1250             PyErr_Format(_csvstate_global->error_obj,
1251                 "single empty field record must be quoted");
1252             return NULL;
1253         }
1254         self->num_fields--;
1255         if (!join_append(self, NULL, 1))
1256             return NULL;
1257     }
1258 
1259     /* Add line terminator.
1260      */
1261     if (!join_append_lineterminator(self))
1262         return NULL;
1263 
1264     line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1265                                      (void *) self->rec, self->rec_len);
1266     if (line == NULL)
1267         return NULL;
1268     result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1269     Py_DECREF(line);
1270     return result;
1271 }
1272 
1273 PyDoc_STRVAR(csv_writerows_doc,
1274 "writerows(iterable of iterables)\n"
1275 "\n"
1276 "Construct and write a series of iterables to a csv file.  Non-string\n"
1277 "elements will be converted to string.");
1278 
1279 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1280 csv_writerows(WriterObj *self, PyObject *seqseq)
1281 {
1282     PyObject *row_iter, *row_obj, *result;
1283 
1284     row_iter = PyObject_GetIter(seqseq);
1285     if (row_iter == NULL) {
1286         PyErr_SetString(PyExc_TypeError,
1287                         "writerows() argument must be iterable");
1288         return NULL;
1289     }
1290     while ((row_obj = PyIter_Next(row_iter))) {
1291         result = csv_writerow(self, row_obj);
1292         Py_DECREF(row_obj);
1293         if (!result) {
1294             Py_DECREF(row_iter);
1295             return NULL;
1296         }
1297         else
1298              Py_DECREF(result);
1299     }
1300     Py_DECREF(row_iter);
1301     if (PyErr_Occurred())
1302         return NULL;
1303     Py_INCREF(Py_None);
1304     return Py_None;
1305 }
1306 
1307 static struct PyMethodDef Writer_methods[] = {
1308     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1309     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1310     { NULL, NULL }
1311 };
1312 
1313 #define W_OFF(x) offsetof(WriterObj, x)
1314 
1315 static struct PyMemberDef Writer_memberlist[] = {
1316     { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1317     { NULL }
1318 };
1319 
1320 static void
Writer_dealloc(WriterObj * self)1321 Writer_dealloc(WriterObj *self)
1322 {
1323     PyObject_GC_UnTrack(self);
1324     Py_XDECREF(self->dialect);
1325     Py_XDECREF(self->writeline);
1326     if (self->rec != NULL)
1327         PyMem_Free(self->rec);
1328     PyObject_GC_Del(self);
1329 }
1330 
1331 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1332 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1333 {
1334     Py_VISIT(self->dialect);
1335     Py_VISIT(self->writeline);
1336     return 0;
1337 }
1338 
1339 static int
Writer_clear(WriterObj * self)1340 Writer_clear(WriterObj *self)
1341 {
1342     Py_CLEAR(self->dialect);
1343     Py_CLEAR(self->writeline);
1344     return 0;
1345 }
1346 
1347 PyDoc_STRVAR(Writer_Type_doc,
1348 "CSV writer\n"
1349 "\n"
1350 "Writer objects are responsible for generating tabular data\n"
1351 "in CSV format from sequence input.\n"
1352 );
1353 
1354 static PyTypeObject Writer_Type = {
1355     PyVarObject_HEAD_INIT(NULL, 0)
1356     "_csv.writer",                          /*tp_name*/
1357     sizeof(WriterObj),                      /*tp_basicsize*/
1358     0,                                      /*tp_itemsize*/
1359     /* methods */
1360     (destructor)Writer_dealloc,             /*tp_dealloc*/
1361     (printfunc)0,                           /*tp_print*/
1362     (getattrfunc)0,                         /*tp_getattr*/
1363     (setattrfunc)0,                         /*tp_setattr*/
1364     0,                                      /*tp_reserved*/
1365     (reprfunc)0,                            /*tp_repr*/
1366     0,                                      /*tp_as_number*/
1367     0,                                      /*tp_as_sequence*/
1368     0,                                      /*tp_as_mapping*/
1369     (hashfunc)0,                            /*tp_hash*/
1370     (ternaryfunc)0,                         /*tp_call*/
1371     (reprfunc)0,                            /*tp_str*/
1372     0,                                      /*tp_getattro*/
1373     0,                                      /*tp_setattro*/
1374     0,                                      /*tp_as_buffer*/
1375     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1376         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
1377     Writer_Type_doc,
1378     (traverseproc)Writer_traverse,          /*tp_traverse*/
1379     (inquiry)Writer_clear,                  /*tp_clear*/
1380     0,                                      /*tp_richcompare*/
1381     0,                                      /*tp_weaklistoffset*/
1382     (getiterfunc)0,                         /*tp_iter*/
1383     (getiterfunc)0,                         /*tp_iternext*/
1384     Writer_methods,                         /*tp_methods*/
1385     Writer_memberlist,                      /*tp_members*/
1386     0,                                      /*tp_getset*/
1387 };
1388 
1389 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1390 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1391 {
1392     PyObject * output_file, * dialect = NULL;
1393     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1394     _Py_IDENTIFIER(write);
1395 
1396     if (!self)
1397         return NULL;
1398 
1399     self->dialect = NULL;
1400     self->writeline = NULL;
1401 
1402     self->rec = NULL;
1403     self->rec_size = 0;
1404     self->rec_len = 0;
1405     self->num_fields = 0;
1406 
1407     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1408         Py_DECREF(self);
1409         return NULL;
1410     }
1411     self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
1412     if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1413         PyErr_SetString(PyExc_TypeError,
1414                         "argument 1 must have a \"write\" method");
1415         Py_DECREF(self);
1416         return NULL;
1417     }
1418     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1419     if (self->dialect == NULL) {
1420         Py_DECREF(self);
1421         return NULL;
1422     }
1423     PyObject_GC_Track(self);
1424     return (PyObject *)self;
1425 }
1426 
1427 /*
1428  * DIALECT REGISTRY
1429  */
1430 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1431 csv_list_dialects(PyObject *module, PyObject *args)
1432 {
1433     return PyDict_Keys(_csvstate_global->dialects);
1434 }
1435 
1436 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1437 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1438 {
1439     PyObject *name_obj, *dialect_obj = NULL;
1440     PyObject *dialect;
1441 
1442     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1443         return NULL;
1444     if (!PyUnicode_Check(name_obj)) {
1445         PyErr_SetString(PyExc_TypeError,
1446                         "dialect name must be a string");
1447         return NULL;
1448     }
1449     if (PyUnicode_READY(name_obj) == -1)
1450         return NULL;
1451     dialect = _call_dialect(dialect_obj, kwargs);
1452     if (dialect == NULL)
1453         return NULL;
1454     if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
1455         Py_DECREF(dialect);
1456         return NULL;
1457     }
1458     Py_DECREF(dialect);
1459     Py_INCREF(Py_None);
1460     return Py_None;
1461 }
1462 
1463 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1464 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1465 {
1466     if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1467         return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1468     Py_INCREF(Py_None);
1469     return Py_None;
1470 }
1471 
1472 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1473 csv_get_dialect(PyObject *module, PyObject *name_obj)
1474 {
1475     return get_dialect_from_registry(name_obj);
1476 }
1477 
1478 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1479 csv_field_size_limit(PyObject *module, PyObject *args)
1480 {
1481     PyObject *new_limit = NULL;
1482     long old_limit = _csvstate_global->field_limit;
1483 
1484     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1485         return NULL;
1486     if (new_limit != NULL) {
1487         if (!PyLong_CheckExact(new_limit)) {
1488             PyErr_Format(PyExc_TypeError,
1489                          "limit must be an integer");
1490             return NULL;
1491         }
1492         _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1493         if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1494             _csvstate_global->field_limit = old_limit;
1495             return NULL;
1496         }
1497     }
1498     return PyLong_FromLong(old_limit);
1499 }
1500 
1501 /*
1502  * MODULE
1503  */
1504 
1505 PyDoc_STRVAR(csv_module_doc,
1506 "CSV parsing and writing.\n"
1507 "\n"
1508 "This module provides classes that assist in the reading and writing\n"
1509 "of Comma Separated Value (CSV) files, and implements the interface\n"
1510 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1511 "the format is not formally defined by a stable specification and\n"
1512 "is subtle enough that parsing lines of a CSV file with something\n"
1513 "like line.split(\",\") is bound to fail.  The module supports three\n"
1514 "basic APIs: reading, writing, and registration of dialects.\n"
1515 "\n"
1516 "\n"
1517 "DIALECT REGISTRATION:\n"
1518 "\n"
1519 "Readers and writers support a dialect argument, which is a convenient\n"
1520 "handle on a group of settings.  When the dialect argument is a string,\n"
1521 "it identifies one of the dialects previously registered with the module.\n"
1522 "If it is a class or instance, the attributes of the argument are used as\n"
1523 "the settings for the reader or writer:\n"
1524 "\n"
1525 "    class excel:\n"
1526 "        delimiter = ','\n"
1527 "        quotechar = '\"'\n"
1528 "        escapechar = None\n"
1529 "        doublequote = True\n"
1530 "        skipinitialspace = False\n"
1531 "        lineterminator = '\\r\\n'\n"
1532 "        quoting = QUOTE_MINIMAL\n"
1533 "\n"
1534 "SETTINGS:\n"
1535 "\n"
1536 "    * quotechar - specifies a one-character string to use as the \n"
1537 "        quoting character.  It defaults to '\"'.\n"
1538 "    * delimiter - specifies a one-character string to use as the \n"
1539 "        field separator.  It defaults to ','.\n"
1540 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1541 "        immediately follows a delimiter.  It defaults to False, which\n"
1542 "        means that whitespace immediately following a delimiter is part\n"
1543 "        of the following field.\n"
1544 "    * lineterminator -  specifies the character sequence which should \n"
1545 "        terminate rows.\n"
1546 "    * quoting - controls when quotes should be generated by the writer.\n"
1547 "        It can take on any of the following module constants:\n"
1548 "\n"
1549 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1550 "            field contains either the quotechar or the delimiter\n"
1551 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1552 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1553 "            fields which do not parse as integers or floating point\n"
1554 "            numbers.\n"
1555 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1556 "    * escapechar - specifies a one-character string used to escape \n"
1557 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1558 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1559 "        True, two consecutive quotes are interpreted as one during read,\n"
1560 "        and when writing, each quote character embedded in the data is\n"
1561 "        written as two quotes\n");
1562 
1563 PyDoc_STRVAR(csv_reader_doc,
1564 "    csv_reader = reader(iterable [, dialect='excel']\n"
1565 "                        [optional keyword args])\n"
1566 "    for row in csv_reader:\n"
1567 "        process(row)\n"
1568 "\n"
1569 "The \"iterable\" argument can be any object that returns a line\n"
1570 "of input for each iteration, such as a file object or a list.  The\n"
1571 "optional \"dialect\" parameter is discussed below.  The function\n"
1572 "also accepts optional keyword arguments which override settings\n"
1573 "provided by the dialect.\n"
1574 "\n"
1575 "The returned object is an iterator.  Each iteration returns a row\n"
1576 "of the CSV file (which can span multiple input lines).\n");
1577 
1578 PyDoc_STRVAR(csv_writer_doc,
1579 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1580 "                            [optional keyword args])\n"
1581 "    for row in sequence:\n"
1582 "        csv_writer.writerow(row)\n"
1583 "\n"
1584 "    [or]\n"
1585 "\n"
1586 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1587 "                            [optional keyword args])\n"
1588 "    csv_writer.writerows(rows)\n"
1589 "\n"
1590 "The \"fileobj\" argument can be any object that supports the file API.\n");
1591 
1592 PyDoc_STRVAR(csv_list_dialects_doc,
1593 "Return a list of all know dialect names.\n"
1594 "    names = csv.list_dialects()");
1595 
1596 PyDoc_STRVAR(csv_get_dialect_doc,
1597 "Return the dialect instance associated with name.\n"
1598 "    dialect = csv.get_dialect(name)");
1599 
1600 PyDoc_STRVAR(csv_register_dialect_doc,
1601 "Create a mapping from a string name to a dialect class.\n"
1602 "    dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1603 
1604 PyDoc_STRVAR(csv_unregister_dialect_doc,
1605 "Delete the name/dialect mapping associated with a string name.\n"
1606 "    csv.unregister_dialect(name)");
1607 
1608 PyDoc_STRVAR(csv_field_size_limit_doc,
1609 "Sets an upper limit on parsed fields.\n"
1610 "    csv.field_size_limit([limit])\n"
1611 "\n"
1612 "Returns old limit. If limit is not given, no new limit is set and\n"
1613 "the old limit is returned");
1614 
1615 static struct PyMethodDef csv_methods[] = {
1616     { "reader", (PyCFunction)csv_reader,
1617         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1618     { "writer", (PyCFunction)csv_writer,
1619         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1620     { "list_dialects", (PyCFunction)csv_list_dialects,
1621         METH_NOARGS, csv_list_dialects_doc},
1622     { "register_dialect", (PyCFunction)csv_register_dialect,
1623         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1624     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1625         METH_O, csv_unregister_dialect_doc},
1626     { "get_dialect", (PyCFunction)csv_get_dialect,
1627         METH_O, csv_get_dialect_doc},
1628     { "field_size_limit", (PyCFunction)csv_field_size_limit,
1629         METH_VARARGS, csv_field_size_limit_doc},
1630     { NULL, NULL }
1631 };
1632 
1633 static struct PyModuleDef _csvmodule = {
1634     PyModuleDef_HEAD_INIT,
1635     "_csv",
1636     csv_module_doc,
1637     sizeof(_csvstate),
1638     csv_methods,
1639     NULL,
1640     _csv_traverse,
1641     _csv_clear,
1642     _csv_free
1643 };
1644 
1645 PyMODINIT_FUNC
PyInit__csv(void)1646 PyInit__csv(void)
1647 {
1648     PyObject *module;
1649     const StyleDesc *style;
1650 
1651     if (PyType_Ready(&Dialect_Type) < 0)
1652         return NULL;
1653 
1654     if (PyType_Ready(&Reader_Type) < 0)
1655         return NULL;
1656 
1657     if (PyType_Ready(&Writer_Type) < 0)
1658         return NULL;
1659 
1660     /* Create the module and add the functions */
1661     module = PyModule_Create(&_csvmodule);
1662     if (module == NULL)
1663         return NULL;
1664 
1665     /* Add version to the module. */
1666     if (PyModule_AddStringConstant(module, "__version__",
1667                                    MODULE_VERSION) == -1)
1668         return NULL;
1669 
1670     /* Set the field limit */
1671     _csvstate(module)->field_limit = 128 * 1024;
1672     /* Do I still need to add this var to the Module Dict? */
1673 
1674     /* Add _dialects dictionary */
1675     _csvstate(module)->dialects = PyDict_New();
1676     if (_csvstate(module)->dialects == NULL)
1677         return NULL;
1678     Py_INCREF(_csvstate(module)->dialects);
1679     if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
1680         return NULL;
1681 
1682     /* Add quote styles into dictionary */
1683     for (style = quote_styles; style->name; style++) {
1684         if (PyModule_AddIntConstant(module, style->name,
1685                                     style->style) == -1)
1686             return NULL;
1687     }
1688 
1689     /* Add the Dialect type */
1690     Py_INCREF(&Dialect_Type);
1691     if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1692         return NULL;
1693 
1694     /* Add the CSV exception object to the module. */
1695     _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1696     if (_csvstate(module)->error_obj == NULL)
1697         return NULL;
1698     Py_INCREF(_csvstate(module)->error_obj);
1699     PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
1700     return module;
1701 }
1702