1 /* csv module */
2 
3 /*
4 
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module.  Users should not use this module directly, but import the csv.py
7 module instead.
8 
9 **** For people modifying this code, please note that as of this writing
10 **** (2003-03-23), it is intended that this code should work with Python
11 **** 2.2.
12 
13 */
14 
15 #define MODULE_VERSION "1.0"
16 
17 #include "Python.h"
18 #include "structmember.h"
19 
20 
21 /* begin 2.2 compatibility macros */
22 #ifndef PyDoc_STRVAR
23 /* Define macros for inline documentation. */
24 #define PyDoc_VAR(name) static char name[]
25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26 #ifdef WITH_DOC_STRINGS
27 #define PyDoc_STR(str) str
28 #else
29 #define PyDoc_STR(str) ""
30 #endif
31 #endif /* ifndef PyDoc_STRVAR */
32 
33 #ifndef PyMODINIT_FUNC
34 #       if defined(__cplusplus)
35 #               define PyMODINIT_FUNC extern "C" void
36 #       else /* __cplusplus */
37 #               define PyMODINIT_FUNC void
38 #       endif /* __cplusplus */
39 #endif
40 
41 #ifndef Py_CLEAR
42 #define Py_CLEAR(op)                                            \
43     do {                                                        \
44         if (op) {                                               \
45             PyObject *tmp = (PyObject *)(op);                   \
46             (op) = NULL;                                        \
47             Py_DECREF(tmp);                                     \
48         }                                                       \
49     } while (0)
50 #endif
51 #ifndef Py_VISIT
52 #define Py_VISIT(op)                                                    \
53     do {                                                                \
54         if (op) {                                                       \
55             int vret = visit((PyObject *)(op), arg);                    \
56             if (vret)                                                   \
57                 return vret;                                            \
58         }                                                               \
59     } while (0)
60 #endif
61 
62 /* end 2.2 compatibility macros */
63 
64 #define IS_BASESTRING(o) \
65     PyObject_TypeCheck(o, &PyBaseString_Type)
66 
67 static PyObject *error_obj;     /* CSV exception */
68 static PyObject *dialects;      /* Dialect registry */
69 static long field_limit = 128 * 1024;   /* max parsed field size */
70 
71 typedef enum {
72     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
73     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74     EAT_CRNL
75 } ParserState;
76 
77 typedef enum {
78     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
79 } QuoteStyle;
80 
81 typedef struct {
82     QuoteStyle style;
83     char *name;
84 } StyleDesc;
85 
86 static StyleDesc quote_styles[] = {
87     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
88     { QUOTE_ALL,        "QUOTE_ALL" },
89     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90     { QUOTE_NONE,       "QUOTE_NONE" },
91     { 0 }
92 };
93 
94 typedef struct {
95     PyObject_HEAD
96 
97     int doublequote;            /* is " represented by ""? */
98     char delimiter;             /* field separator */
99     char quotechar;             /* quote character */
100     char escapechar;            /* escape character */
101     int skipinitialspace;       /* ignore spaces following delimiter? */
102     PyObject *lineterminator; /* string to write between records */
103     int quoting;                /* style of quoting to write */
104 
105     int strict;                 /* raise exception on bad CSV */
106 } DialectObj;
107 
108 staticforward PyTypeObject Dialect_Type;
109 
110 typedef struct {
111     PyObject_HEAD
112 
113     PyObject *input_iter;   /* iterate over this for input lines */
114 
115     DialectObj *dialect;    /* parsing dialect */
116 
117     PyObject *fields;           /* field list for current record */
118     ParserState state;          /* current CSV parse state */
119     char *field;                /* build current field in here */
120     int field_size;             /* size of allocated buffer */
121     int field_len;              /* length of current field */
122     int numeric_field;          /* treat field as numeric */
123     unsigned long line_num;     /* Source-file line number */
124 } ReaderObj;
125 
126 staticforward PyTypeObject Reader_Type;
127 
128 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
129 
130 typedef struct {
131     PyObject_HEAD
132 
133     PyObject *writeline;    /* write output lines to this file */
134 
135     DialectObj *dialect;    /* parsing dialect */
136 
137     char *rec;                  /* buffer for parser.join */
138     int rec_size;               /* size of allocated record */
139     int rec_len;                /* length of record */
140     int num_fields;             /* number of fields in record */
141 } WriterObj;
142 
143 staticforward PyTypeObject Writer_Type;
144 
145 /*
146  * DIALECT class
147  */
148 
149 static PyObject *
get_dialect_from_registry(PyObject * name_obj)150 get_dialect_from_registry(PyObject * name_obj)
151 {
152     PyObject *dialect_obj;
153 
154     dialect_obj = PyDict_GetItem(dialects, name_obj);
155     if (dialect_obj == NULL) {
156         if (!PyErr_Occurred())
157             PyErr_Format(error_obj, "unknown dialect");
158     }
159     else
160         Py_INCREF(dialect_obj);
161     return dialect_obj;
162 }
163 
164 static PyObject *
get_string(PyObject * str)165 get_string(PyObject *str)
166 {
167     Py_XINCREF(str);
168     return str;
169 }
170 
171 static PyObject *
get_nullchar_as_None(char c)172 get_nullchar_as_None(char c)
173 {
174     if (c == '\0') {
175         Py_INCREF(Py_None);
176         return Py_None;
177     }
178     else
179         return PyString_FromStringAndSize((char*)&c, 1);
180 }
181 
182 static PyObject *
Dialect_get_lineterminator(DialectObj * self)183 Dialect_get_lineterminator(DialectObj *self)
184 {
185     return get_string(self->lineterminator);
186 }
187 
188 static PyObject *
Dialect_get_escapechar(DialectObj * self)189 Dialect_get_escapechar(DialectObj *self)
190 {
191     return get_nullchar_as_None(self->escapechar);
192 }
193 
194 static PyObject *
Dialect_get_quotechar(DialectObj * self)195 Dialect_get_quotechar(DialectObj *self)
196 {
197     return get_nullchar_as_None(self->quotechar);
198 }
199 
200 static PyObject *
Dialect_get_quoting(DialectObj * self)201 Dialect_get_quoting(DialectObj *self)
202 {
203     return PyInt_FromLong(self->quoting);
204 }
205 
206 static int
_set_bool(const char * name,int * target,PyObject * src,int dflt)207 _set_bool(const char *name, int *target, PyObject *src, int dflt)
208 {
209     if (src == NULL)
210         *target = dflt;
211     else
212         *target = PyObject_IsTrue(src);
213     return 0;
214 }
215 
216 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)217 _set_int(const char *name, int *target, PyObject *src, int dflt)
218 {
219     if (src == NULL)
220         *target = dflt;
221     else {
222         if (!PyInt_Check(src)) {
223             PyErr_Format(PyExc_TypeError,
224                          "\"%s\" must be an integer", name);
225             return -1;
226         }
227         *target = PyInt_AsLong(src);
228     }
229     return 0;
230 }
231 
232 static int
_set_char(const char * name,char * target,PyObject * src,char dflt)233 _set_char(const char *name, char *target, PyObject *src, char dflt)
234 {
235     if (src == NULL)
236         *target = dflt;
237     else {
238         if (src == Py_None || PyString_Size(src) == 0)
239             *target = '\0';
240         else if (!PyString_Check(src) || PyString_Size(src) != 1) {
241             PyErr_Format(PyExc_TypeError,
242                          "\"%s\" must be an 1-character string",
243                          name);
244             return -1;
245         }
246         else {
247             char *s = PyString_AsString(src);
248             if (s == NULL)
249                 return -1;
250             *target = s[0];
251         }
252     }
253     return 0;
254 }
255 
256 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)257 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
258 {
259     if (src == NULL)
260         *target = PyString_FromString(dflt);
261     else {
262         if (src == Py_None)
263             *target = NULL;
264         else if (!IS_BASESTRING(src)) {
265             PyErr_Format(PyExc_TypeError,
266                          "\"%s\" must be an string", name);
267             return -1;
268         }
269         else {
270             Py_XDECREF(*target);
271             Py_INCREF(src);
272             *target = src;
273         }
274     }
275     return 0;
276 }
277 
278 static int
dialect_check_quoting(int quoting)279 dialect_check_quoting(int quoting)
280 {
281     StyleDesc *qs = quote_styles;
282 
283     for (qs = quote_styles; qs->name; qs++) {
284         if (qs->style == quoting)
285             return 0;
286     }
287     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
288     return -1;
289 }
290 
291 #define D_OFF(x) offsetof(DialectObj, x)
292 
293 static struct PyMemberDef Dialect_memberlist[] = {
294     { "delimiter",          T_CHAR, D_OFF(delimiter), READONLY },
295     { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
296     { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
297     { "strict",             T_INT, D_OFF(strict), READONLY },
298     { NULL }
299 };
300 
301 static PyGetSetDef Dialect_getsetlist[] = {
302     { "escapechar",             (getter)Dialect_get_escapechar},
303     { "lineterminator",         (getter)Dialect_get_lineterminator},
304     { "quotechar",              (getter)Dialect_get_quotechar},
305     { "quoting",                (getter)Dialect_get_quoting},
306     {NULL},
307 };
308 
309 static void
Dialect_dealloc(DialectObj * self)310 Dialect_dealloc(DialectObj *self)
311 {
312     Py_XDECREF(self->lineterminator);
313     Py_TYPE(self)->tp_free((PyObject *)self);
314 }
315 
316 static char *dialect_kws[] = {
317     "dialect",
318     "delimiter",
319     "doublequote",
320     "escapechar",
321     "lineterminator",
322     "quotechar",
323     "quoting",
324     "skipinitialspace",
325     "strict",
326     NULL
327 };
328 
329 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)330 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
331 {
332     DialectObj *self;
333     PyObject *ret = NULL;
334     PyObject *dialect = NULL;
335     PyObject *delimiter = NULL;
336     PyObject *doublequote = NULL;
337     PyObject *escapechar = NULL;
338     PyObject *lineterminator = NULL;
339     PyObject *quotechar = NULL;
340     PyObject *quoting = NULL;
341     PyObject *skipinitialspace = NULL;
342     PyObject *strict = NULL;
343 
344     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
345                                      "|OOOOOOOOO", dialect_kws,
346                                      &dialect,
347                                      &delimiter,
348                                      &doublequote,
349                                      &escapechar,
350                                      &lineterminator,
351                                      &quotechar,
352                                      &quoting,
353                                      &skipinitialspace,
354                                      &strict))
355         return NULL;
356 
357     if (dialect != NULL) {
358         if (IS_BASESTRING(dialect)) {
359             dialect = get_dialect_from_registry(dialect);
360             if (dialect == NULL)
361                 return NULL;
362         }
363         else
364             Py_INCREF(dialect);
365         /* Can we reuse this instance? */
366         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
367             delimiter == 0 &&
368             doublequote == 0 &&
369             escapechar == 0 &&
370             lineterminator == 0 &&
371             quotechar == 0 &&
372             quoting == 0 &&
373             skipinitialspace == 0 &&
374             strict == 0)
375             return dialect;
376     }
377 
378     self = (DialectObj *)type->tp_alloc(type, 0);
379     if (self == NULL) {
380         Py_XDECREF(dialect);
381         return NULL;
382     }
383     self->lineterminator = NULL;
384 
385     Py_XINCREF(delimiter);
386     Py_XINCREF(doublequote);
387     Py_XINCREF(escapechar);
388     Py_XINCREF(lineterminator);
389     Py_XINCREF(quotechar);
390     Py_XINCREF(quoting);
391     Py_XINCREF(skipinitialspace);
392     Py_XINCREF(strict);
393     if (dialect != NULL) {
394 #define DIALECT_GETATTR(v, n) \
395         if (v == NULL) \
396             v = PyObject_GetAttrString(dialect, n)
397         DIALECT_GETATTR(delimiter, "delimiter");
398         DIALECT_GETATTR(doublequote, "doublequote");
399         DIALECT_GETATTR(escapechar, "escapechar");
400         DIALECT_GETATTR(lineterminator, "lineterminator");
401         DIALECT_GETATTR(quotechar, "quotechar");
402         DIALECT_GETATTR(quoting, "quoting");
403         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
404         DIALECT_GETATTR(strict, "strict");
405         PyErr_Clear();
406     }
407 
408     /* check types and convert to C values */
409 #define DIASET(meth, name, target, src, dflt) \
410     if (meth(name, target, src, dflt)) \
411         goto err
412     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
413     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
414     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
415     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
416     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
417     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
418     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
419     DIASET(_set_bool, "strict", &self->strict, strict, 0);
420 
421     /* validate options */
422     if (dialect_check_quoting(self->quoting))
423         goto err;
424     if (self->delimiter == 0) {
425         PyErr_SetString(PyExc_TypeError, "delimiter must be set");
426         goto err;
427     }
428     if (quotechar == Py_None && quoting == NULL)
429         self->quoting = QUOTE_NONE;
430     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
431         PyErr_SetString(PyExc_TypeError,
432                         "quotechar must be set if quoting enabled");
433         goto err;
434     }
435     if (self->lineterminator == 0) {
436         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
437         goto err;
438     }
439 
440     ret = (PyObject *)self;
441     Py_INCREF(self);
442 err:
443     Py_XDECREF(self);
444     Py_XDECREF(dialect);
445     Py_XDECREF(delimiter);
446     Py_XDECREF(doublequote);
447     Py_XDECREF(escapechar);
448     Py_XDECREF(lineterminator);
449     Py_XDECREF(quotechar);
450     Py_XDECREF(quoting);
451     Py_XDECREF(skipinitialspace);
452     Py_XDECREF(strict);
453     return ret;
454 }
455 
456 
457 PyDoc_STRVAR(Dialect_Type_doc,
458 "CSV dialect\n"
459 "\n"
460 "The Dialect type records CSV parsing and generation options.\n");
461 
462 static PyTypeObject Dialect_Type = {
463     PyVarObject_HEAD_INIT(NULL, 0)
464     "_csv.Dialect",                         /* tp_name */
465     sizeof(DialectObj),                     /* tp_basicsize */
466     0,                                      /* tp_itemsize */
467     /*  methods  */
468     (destructor)Dialect_dealloc,            /* tp_dealloc */
469     (printfunc)0,                           /* tp_print */
470     (getattrfunc)0,                         /* tp_getattr */
471     (setattrfunc)0,                         /* tp_setattr */
472     (cmpfunc)0,                             /* tp_compare */
473     (reprfunc)0,                            /* tp_repr */
474     0,                                      /* tp_as_number */
475     0,                                      /* tp_as_sequence */
476     0,                                      /* tp_as_mapping */
477     (hashfunc)0,                            /* tp_hash */
478     (ternaryfunc)0,                         /* tp_call */
479     (reprfunc)0,                                /* tp_str */
480     0,                                      /* tp_getattro */
481     0,                                      /* tp_setattro */
482     0,                                      /* tp_as_buffer */
483     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
484     Dialect_Type_doc,                       /* tp_doc */
485     0,                                      /* tp_traverse */
486     0,                                      /* tp_clear */
487     0,                                      /* tp_richcompare */
488     0,                                      /* tp_weaklistoffset */
489     0,                                      /* tp_iter */
490     0,                                      /* tp_iternext */
491     0,                                          /* tp_methods */
492     Dialect_memberlist,                     /* tp_members */
493     Dialect_getsetlist,                     /* tp_getset */
494     0,                                          /* tp_base */
495     0,                                          /* tp_dict */
496     0,                                          /* tp_descr_get */
497     0,                                          /* tp_descr_set */
498     0,                                          /* tp_dictoffset */
499     0,                                          /* tp_init */
500     0,                                          /* tp_alloc */
501     dialect_new,                                /* tp_new */
502     0,                                          /* tp_free */
503 };
504 
505 /*
506  * Return an instance of the dialect type, given a Python instance or kwarg
507  * description of the dialect
508  */
509 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)510 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
511 {
512     PyObject *ctor_args;
513     PyObject *dialect;
514 
515     ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
516     if (ctor_args == NULL)
517         return NULL;
518     dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
519     Py_DECREF(ctor_args);
520     return dialect;
521 }
522 
523 /*
524  * READER
525  */
526 static int
parse_save_field(ReaderObj * self)527 parse_save_field(ReaderObj *self)
528 {
529     PyObject *field;
530 
531     field = PyString_FromStringAndSize(self->field, self->field_len);
532     if (field == NULL)
533         return -1;
534     self->field_len = 0;
535     if (self->numeric_field) {
536         PyObject *tmp;
537 
538         self->numeric_field = 0;
539         tmp = PyNumber_Float(field);
540         if (tmp == NULL) {
541             Py_DECREF(field);
542             return -1;
543         }
544         Py_DECREF(field);
545         field = tmp;
546     }
547     PyList_Append(self->fields, field);
548     Py_DECREF(field);
549     return 0;
550 }
551 
552 static int
parse_grow_buff(ReaderObj * self)553 parse_grow_buff(ReaderObj *self)
554 {
555     if (self->field_size == 0) {
556         self->field_size = 4096;
557         if (self->field != NULL)
558             PyMem_Free(self->field);
559         self->field = PyMem_Malloc(self->field_size);
560     }
561     else {
562         if (self->field_size > INT_MAX / 2) {
563             PyErr_NoMemory();
564             return 0;
565         }
566         self->field_size *= 2;
567         self->field = PyMem_Realloc(self->field, self->field_size);
568     }
569     if (self->field == NULL) {
570         PyErr_NoMemory();
571         return 0;
572     }
573     return 1;
574 }
575 
576 static int
parse_add_char(ReaderObj * self,char c)577 parse_add_char(ReaderObj *self, char c)
578 {
579     if (self->field_len >= field_limit) {
580         PyErr_Format(error_obj, "field larger than field limit (%ld)",
581                      field_limit);
582         return -1;
583     }
584     if (self->field_len == self->field_size && !parse_grow_buff(self))
585         return -1;
586     self->field[self->field_len++] = c;
587     return 0;
588 }
589 
590 static int
parse_process_char(ReaderObj * self,char c)591 parse_process_char(ReaderObj *self, char c)
592 {
593     DialectObj *dialect = self->dialect;
594 
595     switch (self->state) {
596     case START_RECORD:
597         /* start of record */
598         if (c == '\0')
599             /* empty line - return [] */
600             break;
601         else if (c == '\n' || c == '\r') {
602             self->state = EAT_CRNL;
603             break;
604         }
605         /* normal character - handle as START_FIELD */
606         self->state = START_FIELD;
607         /* fallthru */
608     case START_FIELD:
609         /* expecting field */
610         if (c == '\n' || c == '\r' || c == '\0') {
611             /* save empty field - return [fields] */
612             if (parse_save_field(self) < 0)
613                 return -1;
614             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
615         }
616         else if (c == dialect->quotechar &&
617                  dialect->quoting != QUOTE_NONE) {
618             /* start quoted field */
619             self->state = IN_QUOTED_FIELD;
620         }
621         else if (c == dialect->escapechar) {
622             /* possible escaped character */
623             self->state = ESCAPED_CHAR;
624         }
625         else if (c == ' ' && dialect->skipinitialspace)
626             /* ignore space at start of field */
627             ;
628         else if (c == dialect->delimiter) {
629             /* save empty field */
630             if (parse_save_field(self) < 0)
631                 return -1;
632         }
633         else {
634             /* begin new unquoted field */
635             if (dialect->quoting == QUOTE_NONNUMERIC)
636                 self->numeric_field = 1;
637             if (parse_add_char(self, c) < 0)
638                 return -1;
639             self->state = IN_FIELD;
640         }
641         break;
642 
643     case ESCAPED_CHAR:
644         if (c == '\0')
645             c = '\n';
646         if (parse_add_char(self, c) < 0)
647             return -1;
648         self->state = IN_FIELD;
649         break;
650 
651     case IN_FIELD:
652         /* in unquoted field */
653         if (c == '\n' || c == '\r' || c == '\0') {
654             /* end of line - return [fields] */
655             if (parse_save_field(self) < 0)
656                 return -1;
657             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
658         }
659         else if (c == dialect->escapechar) {
660             /* possible escaped character */
661             self->state = ESCAPED_CHAR;
662         }
663         else if (c == dialect->delimiter) {
664             /* save field - wait for new field */
665             if (parse_save_field(self) < 0)
666                 return -1;
667             self->state = START_FIELD;
668         }
669         else {
670             /* normal character - save in field */
671             if (parse_add_char(self, c) < 0)
672                 return -1;
673         }
674         break;
675 
676     case IN_QUOTED_FIELD:
677         /* in quoted field */
678         if (c == '\0')
679             ;
680         else if (c == dialect->escapechar) {
681             /* Possible escape character */
682             self->state = ESCAPE_IN_QUOTED_FIELD;
683         }
684         else if (c == dialect->quotechar &&
685                  dialect->quoting != QUOTE_NONE) {
686             if (dialect->doublequote) {
687                 /* doublequote; " represented by "" */
688                 self->state = QUOTE_IN_QUOTED_FIELD;
689             }
690             else {
691                 /* end of quote part of field */
692                 self->state = IN_FIELD;
693             }
694         }
695         else {
696             /* normal character - save in field */
697             if (parse_add_char(self, c) < 0)
698                 return -1;
699         }
700         break;
701 
702     case ESCAPE_IN_QUOTED_FIELD:
703         if (c == '\0')
704             c = '\n';
705         if (parse_add_char(self, c) < 0)
706             return -1;
707         self->state = IN_QUOTED_FIELD;
708         break;
709 
710     case QUOTE_IN_QUOTED_FIELD:
711         /* doublequote - seen a quote in an quoted field */
712         if (dialect->quoting != QUOTE_NONE &&
713             c == dialect->quotechar) {
714             /* save "" as " */
715             if (parse_add_char(self, c) < 0)
716                 return -1;
717             self->state = IN_QUOTED_FIELD;
718         }
719         else if (c == dialect->delimiter) {
720             /* save field - wait for new field */
721             if (parse_save_field(self) < 0)
722                 return -1;
723             self->state = START_FIELD;
724         }
725         else if (c == '\n' || c == '\r' || c == '\0') {
726             /* end of line - return [fields] */
727             if (parse_save_field(self) < 0)
728                 return -1;
729             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
730         }
731         else if (!dialect->strict) {
732             if (parse_add_char(self, c) < 0)
733                 return -1;
734             self->state = IN_FIELD;
735         }
736         else {
737             /* illegal */
738             PyErr_Format(error_obj, "'%c' expected after '%c'",
739                             dialect->delimiter,
740                             dialect->quotechar);
741             return -1;
742         }
743         break;
744 
745     case EAT_CRNL:
746         if (c == '\n' || c == '\r')
747             ;
748         else if (c == '\0')
749             self->state = START_RECORD;
750         else {
751             PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
752             return -1;
753         }
754         break;
755 
756     }
757     return 0;
758 }
759 
760 static int
parse_reset(ReaderObj * self)761 parse_reset(ReaderObj *self)
762 {
763     Py_XDECREF(self->fields);
764     self->fields = PyList_New(0);
765     if (self->fields == NULL)
766         return -1;
767     self->field_len = 0;
768     self->state = START_RECORD;
769     self->numeric_field = 0;
770     return 0;
771 }
772 
773 static PyObject *
Reader_iternext(ReaderObj * self)774 Reader_iternext(ReaderObj *self)
775 {
776     PyObject *lineobj;
777     PyObject *fields = NULL;
778     char *line, c;
779     int linelen;
780 
781     if (parse_reset(self) < 0)
782         return NULL;
783     do {
784         lineobj = PyIter_Next(self->input_iter);
785         if (lineobj == NULL) {
786             /* End of input OR exception */
787             if (!PyErr_Occurred() && self->field_len != 0)
788                 PyErr_Format(error_obj,
789                              "newline inside string");
790             return NULL;
791         }
792         ++self->line_num;
793 
794         line = PyString_AsString(lineobj);
795         linelen = PyString_Size(lineobj);
796 
797         if (line == NULL || linelen < 0) {
798             Py_DECREF(lineobj);
799             return NULL;
800         }
801         while (linelen--) {
802             c = *line++;
803             if (c == '\0') {
804                 Py_DECREF(lineobj);
805                 PyErr_Format(error_obj,
806                              "line contains NULL byte");
807                 goto err;
808             }
809             if (parse_process_char(self, c) < 0) {
810                 Py_DECREF(lineobj);
811                 goto err;
812             }
813         }
814         Py_DECREF(lineobj);
815         if (parse_process_char(self, 0) < 0)
816             goto err;
817     } while (self->state != START_RECORD);
818 
819     fields = self->fields;
820     self->fields = NULL;
821 err:
822     return fields;
823 }
824 
825 static void
Reader_dealloc(ReaderObj * self)826 Reader_dealloc(ReaderObj *self)
827 {
828     PyObject_GC_UnTrack(self);
829     Py_XDECREF(self->dialect);
830     Py_XDECREF(self->input_iter);
831     Py_XDECREF(self->fields);
832     if (self->field != NULL)
833         PyMem_Free(self->field);
834     PyObject_GC_Del(self);
835 }
836 
837 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)838 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
839 {
840     Py_VISIT(self->dialect);
841     Py_VISIT(self->input_iter);
842     Py_VISIT(self->fields);
843     return 0;
844 }
845 
846 static int
Reader_clear(ReaderObj * self)847 Reader_clear(ReaderObj *self)
848 {
849     Py_CLEAR(self->dialect);
850     Py_CLEAR(self->input_iter);
851     Py_CLEAR(self->fields);
852     return 0;
853 }
854 
855 PyDoc_STRVAR(Reader_Type_doc,
856 "CSV reader\n"
857 "\n"
858 "Reader objects are responsible for reading and parsing tabular data\n"
859 "in CSV format.\n"
860 );
861 
862 static struct PyMethodDef Reader_methods[] = {
863     { NULL, NULL }
864 };
865 #define R_OFF(x) offsetof(ReaderObj, x)
866 
867 static struct PyMemberDef Reader_memberlist[] = {
868     { "dialect", T_OBJECT, R_OFF(dialect), RO },
869     { "line_num", T_ULONG, R_OFF(line_num), RO },
870     { NULL }
871 };
872 
873 
874 static PyTypeObject Reader_Type = {
875     PyVarObject_HEAD_INIT(NULL, 0)
876     "_csv.reader",                          /*tp_name*/
877     sizeof(ReaderObj),                      /*tp_basicsize*/
878     0,                                      /*tp_itemsize*/
879     /* methods */
880     (destructor)Reader_dealloc,             /*tp_dealloc*/
881     (printfunc)0,                           /*tp_print*/
882     (getattrfunc)0,                         /*tp_getattr*/
883     (setattrfunc)0,                         /*tp_setattr*/
884     (cmpfunc)0,                             /*tp_compare*/
885     (reprfunc)0,                            /*tp_repr*/
886     0,                                      /*tp_as_number*/
887     0,                                      /*tp_as_sequence*/
888     0,                                      /*tp_as_mapping*/
889     (hashfunc)0,                            /*tp_hash*/
890     (ternaryfunc)0,                         /*tp_call*/
891     (reprfunc)0,                                /*tp_str*/
892     0,                                      /*tp_getattro*/
893     0,                                      /*tp_setattro*/
894     0,                                      /*tp_as_buffer*/
895     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
896         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
897     Reader_Type_doc,                        /*tp_doc*/
898     (traverseproc)Reader_traverse,          /*tp_traverse*/
899     (inquiry)Reader_clear,                  /*tp_clear*/
900     0,                                      /*tp_richcompare*/
901     0,                                      /*tp_weaklistoffset*/
902     PyObject_SelfIter,                          /*tp_iter*/
903     (getiterfunc)Reader_iternext,           /*tp_iternext*/
904     Reader_methods,                         /*tp_methods*/
905     Reader_memberlist,                      /*tp_members*/
906     0,                                      /*tp_getset*/
907 
908 };
909 
910 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)911 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
912 {
913     PyObject * iterator, * dialect = NULL;
914     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
915 
916     if (!self)
917         return NULL;
918 
919     self->dialect = NULL;
920     self->fields = NULL;
921     self->input_iter = NULL;
922     self->field = NULL;
923     self->field_size = 0;
924     self->line_num = 0;
925 
926     if (parse_reset(self) < 0) {
927         Py_DECREF(self);
928         return NULL;
929     }
930 
931     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
932         Py_DECREF(self);
933         return NULL;
934     }
935     self->input_iter = PyObject_GetIter(iterator);
936     if (self->input_iter == NULL) {
937         PyErr_SetString(PyExc_TypeError,
938                         "argument 1 must be an iterator");
939         Py_DECREF(self);
940         return NULL;
941     }
942     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
943     if (self->dialect == NULL) {
944         Py_DECREF(self);
945         return NULL;
946     }
947 
948     PyObject_GC_Track(self);
949     return (PyObject *)self;
950 }
951 
952 /*
953  * WRITER
954  */
955 /* ---------------------------------------------------------------- */
956 static void
join_reset(WriterObj * self)957 join_reset(WriterObj *self)
958 {
959     self->rec_len = 0;
960     self->num_fields = 0;
961 }
962 
963 #define MEM_INCR 32768
964 
965 /* Calculate new record length or append field to record.  Return new
966  * record length.
967  */
968 static int
join_append_data(WriterObj * self,char * field,int quote_empty,int * quoted,int copy_phase)969 join_append_data(WriterObj *self, char *field, int quote_empty,
970                  int *quoted, int copy_phase)
971 {
972     DialectObj *dialect = self->dialect;
973     int i, rec_len;
974     char *lineterm;
975 
976 #define ADDCH(c) \
977     do {\
978         if (copy_phase) \
979             self->rec[rec_len] = c;\
980         rec_len++;\
981     } while(0)
982 
983     lineterm = PyString_AsString(dialect->lineterminator);
984     if (lineterm == NULL)
985         return -1;
986 
987     rec_len = self->rec_len;
988 
989     /* If this is not the first field we need a field separator */
990     if (self->num_fields > 0)
991         ADDCH(dialect->delimiter);
992 
993     /* Handle preceding quote */
994     if (copy_phase && *quoted)
995         ADDCH(dialect->quotechar);
996 
997     /* Copy/count field data */
998     for (i = 0;; i++) {
999         char c = field[i];
1000         int want_escape = 0;
1001 
1002         if (c == '\0')
1003             break;
1004 
1005         if (c == dialect->delimiter ||
1006             c == dialect->escapechar ||
1007             c == dialect->quotechar ||
1008             strchr(lineterm, c)) {
1009             if (dialect->quoting == QUOTE_NONE)
1010                 want_escape = 1;
1011             else {
1012                 if (c == dialect->quotechar) {
1013                     if (dialect->doublequote)
1014                         ADDCH(dialect->quotechar);
1015                     else
1016                         want_escape = 1;
1017                 }
1018                 if (!want_escape)
1019                     *quoted = 1;
1020             }
1021             if (want_escape) {
1022                 if (!dialect->escapechar) {
1023                     PyErr_Format(error_obj,
1024                                  "need to escape, but no escapechar set");
1025                     return -1;
1026                 }
1027                 ADDCH(dialect->escapechar);
1028             }
1029         }
1030         /* Copy field character into record buffer.
1031          */
1032         ADDCH(c);
1033     }
1034 
1035     /* If field is empty check if it needs to be quoted.
1036      */
1037     if (i == 0 && quote_empty) {
1038         if (dialect->quoting == QUOTE_NONE) {
1039             PyErr_Format(error_obj,
1040                          "single empty field record must be quoted");
1041             return -1;
1042         }
1043         else
1044             *quoted = 1;
1045     }
1046 
1047     if (*quoted) {
1048         if (copy_phase)
1049             ADDCH(dialect->quotechar);
1050         else
1051             rec_len += 2;
1052     }
1053     return rec_len;
1054 #undef ADDCH
1055 }
1056 
1057 static int
join_check_rec_size(WriterObj * self,int rec_len)1058 join_check_rec_size(WriterObj *self, int rec_len)
1059 {
1060 
1061     if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1062         PyErr_NoMemory();
1063         return 0;
1064     }
1065 
1066     if (rec_len > self->rec_size) {
1067         if (self->rec_size == 0) {
1068             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1069             if (self->rec != NULL)
1070                 PyMem_Free(self->rec);
1071             self->rec = PyMem_Malloc(self->rec_size);
1072         }
1073         else {
1074             char *old_rec = self->rec;
1075 
1076             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1077             self->rec = PyMem_Realloc(self->rec, self->rec_size);
1078             if (self->rec == NULL)
1079                 PyMem_Free(old_rec);
1080         }
1081         if (self->rec == NULL) {
1082             PyErr_NoMemory();
1083             return 0;
1084         }
1085     }
1086     return 1;
1087 }
1088 
1089 static int
join_append(WriterObj * self,char * field,int * quoted,int quote_empty)1090 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1091 {
1092     int rec_len;
1093 
1094     rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1095     if (rec_len < 0)
1096         return 0;
1097 
1098     /* grow record buffer if necessary */
1099     if (!join_check_rec_size(self, rec_len))
1100         return 0;
1101 
1102     self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1103     self->num_fields++;
1104 
1105     return 1;
1106 }
1107 
1108 static int
join_append_lineterminator(WriterObj * self)1109 join_append_lineterminator(WriterObj *self)
1110 {
1111     int terminator_len;
1112     char *terminator;
1113 
1114     terminator_len = PyString_Size(self->dialect->lineterminator);
1115     if (terminator_len == -1)
1116         return 0;
1117 
1118     /* grow record buffer if necessary */
1119     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1120         return 0;
1121 
1122     terminator = PyString_AsString(self->dialect->lineterminator);
1123     if (terminator == NULL)
1124         return 0;
1125     memmove(self->rec + self->rec_len, terminator, terminator_len);
1126     self->rec_len += terminator_len;
1127 
1128     return 1;
1129 }
1130 
1131 PyDoc_STRVAR(csv_writerow_doc,
1132 "writerow(sequence)\n"
1133 "\n"
1134 "Construct and write a CSV record from a sequence of fields.  Non-string\n"
1135 "elements will be converted to string.");
1136 
1137 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1138 csv_writerow(WriterObj *self, PyObject *seq)
1139 {
1140     DialectObj *dialect = self->dialect;
1141     int len, i;
1142 
1143     if (!PySequence_Check(seq))
1144         return PyErr_Format(error_obj, "sequence expected");
1145 
1146     len = PySequence_Length(seq);
1147     if (len < 0)
1148         return NULL;
1149 
1150     /* Join all fields in internal buffer.
1151      */
1152     join_reset(self);
1153     for (i = 0; i < len; i++) {
1154         PyObject *field;
1155         int append_ok;
1156         int quoted;
1157 
1158         field = PySequence_GetItem(seq, i);
1159         if (field == NULL)
1160             return NULL;
1161 
1162         switch (dialect->quoting) {
1163         case QUOTE_NONNUMERIC:
1164             quoted = !PyNumber_Check(field);
1165             break;
1166         case QUOTE_ALL:
1167             quoted = 1;
1168             break;
1169         default:
1170             quoted = 0;
1171             break;
1172         }
1173 
1174         if (PyString_Check(field)) {
1175             append_ok = join_append(self,
1176                                     PyString_AS_STRING(field),
1177                                     &quoted, len == 1);
1178             Py_DECREF(field);
1179         }
1180         else if (field == Py_None) {
1181             append_ok = join_append(self, "", &quoted, len == 1);
1182             Py_DECREF(field);
1183         }
1184         else {
1185             PyObject *str;
1186 
1187             str = PyObject_Str(field);
1188             Py_DECREF(field);
1189             if (str == NULL)
1190                 return NULL;
1191 
1192             append_ok = join_append(self, PyString_AS_STRING(str),
1193                                     &quoted, len == 1);
1194             Py_DECREF(str);
1195         }
1196         if (!append_ok)
1197             return NULL;
1198     }
1199 
1200     /* Add line terminator.
1201      */
1202     if (!join_append_lineterminator(self))
1203         return 0;
1204 
1205     return PyObject_CallFunction(self->writeline,
1206                                  "(s#)", self->rec, self->rec_len);
1207 }
1208 
1209 PyDoc_STRVAR(csv_writerows_doc,
1210 "writerows(sequence of sequences)\n"
1211 "\n"
1212 "Construct and write a series of sequences to a csv file.  Non-string\n"
1213 "elements will be converted to string.");
1214 
1215 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1216 csv_writerows(WriterObj *self, PyObject *seqseq)
1217 {
1218     PyObject *row_iter, *row_obj, *result;
1219 
1220     row_iter = PyObject_GetIter(seqseq);
1221     if (row_iter == NULL) {
1222         PyErr_SetString(PyExc_TypeError,
1223                         "writerows() argument must be iterable");
1224         return NULL;
1225     }
1226     while ((row_obj = PyIter_Next(row_iter))) {
1227         result = csv_writerow(self, row_obj);
1228         Py_DECREF(row_obj);
1229         if (!result) {
1230             Py_DECREF(row_iter);
1231             return NULL;
1232         }
1233         else
1234              Py_DECREF(result);
1235     }
1236     Py_DECREF(row_iter);
1237     if (PyErr_Occurred())
1238         return NULL;
1239     Py_INCREF(Py_None);
1240     return Py_None;
1241 }
1242 
1243 static struct PyMethodDef Writer_methods[] = {
1244     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1245     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1246     { NULL, NULL }
1247 };
1248 
1249 #define W_OFF(x) offsetof(WriterObj, x)
1250 
1251 static struct PyMemberDef Writer_memberlist[] = {
1252     { "dialect", T_OBJECT, W_OFF(dialect), RO },
1253     { NULL }
1254 };
1255 
1256 static void
Writer_dealloc(WriterObj * self)1257 Writer_dealloc(WriterObj *self)
1258 {
1259     PyObject_GC_UnTrack(self);
1260     Py_XDECREF(self->dialect);
1261     Py_XDECREF(self->writeline);
1262     if (self->rec != NULL)
1263         PyMem_Free(self->rec);
1264     PyObject_GC_Del(self);
1265 }
1266 
1267 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1268 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1269 {
1270     Py_VISIT(self->dialect);
1271     Py_VISIT(self->writeline);
1272     return 0;
1273 }
1274 
1275 static int
Writer_clear(WriterObj * self)1276 Writer_clear(WriterObj *self)
1277 {
1278     Py_CLEAR(self->dialect);
1279     Py_CLEAR(self->writeline);
1280     return 0;
1281 }
1282 
1283 PyDoc_STRVAR(Writer_Type_doc,
1284 "CSV writer\n"
1285 "\n"
1286 "Writer objects are responsible for generating tabular data\n"
1287 "in CSV format from sequence input.\n"
1288 );
1289 
1290 static PyTypeObject Writer_Type = {
1291     PyVarObject_HEAD_INIT(NULL, 0)
1292     "_csv.writer",                          /*tp_name*/
1293     sizeof(WriterObj),                      /*tp_basicsize*/
1294     0,                                      /*tp_itemsize*/
1295     /* methods */
1296     (destructor)Writer_dealloc,             /*tp_dealloc*/
1297     (printfunc)0,                           /*tp_print*/
1298     (getattrfunc)0,                         /*tp_getattr*/
1299     (setattrfunc)0,                         /*tp_setattr*/
1300     (cmpfunc)0,                             /*tp_compare*/
1301     (reprfunc)0,                            /*tp_repr*/
1302     0,                                      /*tp_as_number*/
1303     0,                                      /*tp_as_sequence*/
1304     0,                                      /*tp_as_mapping*/
1305     (hashfunc)0,                            /*tp_hash*/
1306     (ternaryfunc)0,                         /*tp_call*/
1307     (reprfunc)0,                            /*tp_str*/
1308     0,                                      /*tp_getattro*/
1309     0,                                      /*tp_setattro*/
1310     0,                                      /*tp_as_buffer*/
1311     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1312         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
1313     Writer_Type_doc,
1314     (traverseproc)Writer_traverse,          /*tp_traverse*/
1315     (inquiry)Writer_clear,                  /*tp_clear*/
1316     0,                                      /*tp_richcompare*/
1317     0,                                      /*tp_weaklistoffset*/
1318     (getiterfunc)0,                         /*tp_iter*/
1319     (getiterfunc)0,                         /*tp_iternext*/
1320     Writer_methods,                         /*tp_methods*/
1321     Writer_memberlist,                      /*tp_members*/
1322     0,                                      /*tp_getset*/
1323 };
1324 
1325 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1326 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1327 {
1328     PyObject * output_file, * dialect = NULL;
1329     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1330 
1331     if (!self)
1332         return NULL;
1333 
1334     self->dialect = NULL;
1335     self->writeline = NULL;
1336 
1337     self->rec = NULL;
1338     self->rec_size = 0;
1339     self->rec_len = 0;
1340     self->num_fields = 0;
1341 
1342     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1343         Py_DECREF(self);
1344         return NULL;
1345     }
1346     self->writeline = PyObject_GetAttrString(output_file, "write");
1347     if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1348         PyErr_SetString(PyExc_TypeError,
1349                         "argument 1 must have a \"write\" method");
1350         Py_DECREF(self);
1351         return NULL;
1352     }
1353     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1354     if (self->dialect == NULL) {
1355         Py_DECREF(self);
1356         return NULL;
1357     }
1358     PyObject_GC_Track(self);
1359     return (PyObject *)self;
1360 }
1361 
1362 /*
1363  * DIALECT REGISTRY
1364  */
1365 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1366 csv_list_dialects(PyObject *module, PyObject *args)
1367 {
1368     return PyDict_Keys(dialects);
1369 }
1370 
1371 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1372 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1373 {
1374     PyObject *name_obj, *dialect_obj = NULL;
1375     PyObject *dialect;
1376 
1377     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1378         return NULL;
1379     if (!IS_BASESTRING(name_obj)) {
1380         PyErr_SetString(PyExc_TypeError,
1381                         "dialect name must be a string or unicode");
1382         return NULL;
1383     }
1384     dialect = _call_dialect(dialect_obj, kwargs);
1385     if (dialect == NULL)
1386         return NULL;
1387     if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1388         Py_DECREF(dialect);
1389         return NULL;
1390     }
1391     Py_DECREF(dialect);
1392     Py_INCREF(Py_None);
1393     return Py_None;
1394 }
1395 
1396 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1397 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1398 {
1399     if (PyDict_DelItem(dialects, name_obj) < 0)
1400         return PyErr_Format(error_obj, "unknown dialect");
1401     Py_INCREF(Py_None);
1402     return Py_None;
1403 }
1404 
1405 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1406 csv_get_dialect(PyObject *module, PyObject *name_obj)
1407 {
1408     return get_dialect_from_registry(name_obj);
1409 }
1410 
1411 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1412 csv_field_size_limit(PyObject *module, PyObject *args)
1413 {
1414     PyObject *new_limit = NULL;
1415     long old_limit = field_limit;
1416 
1417     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1418         return NULL;
1419     if (new_limit != NULL) {
1420         if (!PyInt_Check(new_limit)) {
1421             PyErr_Format(PyExc_TypeError,
1422                          "limit must be an integer");
1423             return NULL;
1424         }
1425         field_limit = PyInt_AsLong(new_limit);
1426     }
1427     return PyInt_FromLong(old_limit);
1428 }
1429 
1430 /*
1431  * MODULE
1432  */
1433 
1434 PyDoc_STRVAR(csv_module_doc,
1435 "CSV parsing and writing.\n"
1436 "\n"
1437 "This module provides classes that assist in the reading and writing\n"
1438 "of Comma Separated Value (CSV) files, and implements the interface\n"
1439 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1440 "the format is not formally defined by a stable specification and\n"
1441 "is subtle enough that parsing lines of a CSV file with something\n"
1442 "like line.split(\",\") is bound to fail.  The module supports three\n"
1443 "basic APIs: reading, writing, and registration of dialects.\n"
1444 "\n"
1445 "\n"
1446 "DIALECT REGISTRATION:\n"
1447 "\n"
1448 "Readers and writers support a dialect argument, which is a convenient\n"
1449 "handle on a group of settings.  When the dialect argument is a string,\n"
1450 "it identifies one of the dialects previously registered with the module.\n"
1451 "If it is a class or instance, the attributes of the argument are used as\n"
1452 "the settings for the reader or writer:\n"
1453 "\n"
1454 "    class excel:\n"
1455 "        delimiter = ','\n"
1456 "        quotechar = '\"'\n"
1457 "        escapechar = None\n"
1458 "        doublequote = True\n"
1459 "        skipinitialspace = False\n"
1460 "        lineterminator = '\\r\\n'\n"
1461 "        quoting = QUOTE_MINIMAL\n"
1462 "\n"
1463 "SETTINGS:\n"
1464 "\n"
1465 "    * quotechar - specifies a one-character string to use as the \n"
1466 "        quoting character.  It defaults to '\"'.\n"
1467 "    * delimiter - specifies a one-character string to use as the \n"
1468 "        field separator.  It defaults to ','.\n"
1469 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1470 "        immediately follows a delimiter.  It defaults to False, which\n"
1471 "        means that whitespace immediately following a delimiter is part\n"
1472 "        of the following field.\n"
1473 "    * lineterminator -  specifies the character sequence which should \n"
1474 "        terminate rows.\n"
1475 "    * quoting - controls when quotes should be generated by the writer.\n"
1476 "        It can take on any of the following module constants:\n"
1477 "\n"
1478 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1479 "            field contains either the quotechar or the delimiter\n"
1480 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1481 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1482 "            fields which do not parse as integers or floating point\n"
1483 "            numbers.\n"
1484 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1485 "    * escapechar - specifies a one-character string used to escape \n"
1486 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1487 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1488 "        True, two consecutive quotes are interpreted as one during read,\n"
1489 "        and when writing, each quote character embedded in the data is\n"
1490 "        written as two quotes\n");
1491 
1492 PyDoc_STRVAR(csv_reader_doc,
1493 "    csv_reader = reader(iterable [, dialect='excel']\n"
1494 "                        [optional keyword args])\n"
1495 "    for row in csv_reader:\n"
1496 "        process(row)\n"
1497 "\n"
1498 "The \"iterable\" argument can be any object that returns a line\n"
1499 "of input for each iteration, such as a file object or a list.  The\n"
1500 "optional \"dialect\" parameter is discussed below.  The function\n"
1501 "also accepts optional keyword arguments which override settings\n"
1502 "provided by the dialect.\n"
1503 "\n"
1504 "The returned object is an iterator.  Each iteration returns a row\n"
1505 "of the CSV file (which can span multiple input lines):\n");
1506 
1507 PyDoc_STRVAR(csv_writer_doc,
1508 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1509 "                            [optional keyword args])\n"
1510 "    for row in sequence:\n"
1511 "        csv_writer.writerow(row)\n"
1512 "\n"
1513 "    [or]\n"
1514 "\n"
1515 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1516 "                            [optional keyword args])\n"
1517 "    csv_writer.writerows(rows)\n"
1518 "\n"
1519 "The \"fileobj\" argument can be any object that supports the file API.\n");
1520 
1521 PyDoc_STRVAR(csv_list_dialects_doc,
1522 "Return a list of all know dialect names.\n"
1523 "    names = csv.list_dialects()");
1524 
1525 PyDoc_STRVAR(csv_get_dialect_doc,
1526 "Return the dialect instance associated with name.\n"
1527 "    dialect = csv.get_dialect(name)");
1528 
1529 PyDoc_STRVAR(csv_register_dialect_doc,
1530 "Create a mapping from a string name to a dialect class.\n"
1531 "    dialect = csv.register_dialect(name, dialect)");
1532 
1533 PyDoc_STRVAR(csv_unregister_dialect_doc,
1534 "Delete the name/dialect mapping associated with a string name.\n"
1535 "    csv.unregister_dialect(name)");
1536 
1537 PyDoc_STRVAR(csv_field_size_limit_doc,
1538 "Sets an upper limit on parsed fields.\n"
1539 "    csv.field_size_limit([limit])\n"
1540 "\n"
1541 "Returns old limit. If limit is not given, no new limit is set and\n"
1542 "the old limit is returned");
1543 
1544 static struct PyMethodDef csv_methods[] = {
1545     { "reader", (PyCFunction)csv_reader,
1546         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1547     { "writer", (PyCFunction)csv_writer,
1548         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1549     { "list_dialects", (PyCFunction)csv_list_dialects,
1550         METH_NOARGS, csv_list_dialects_doc},
1551     { "register_dialect", (PyCFunction)csv_register_dialect,
1552         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1553     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1554         METH_O, csv_unregister_dialect_doc},
1555     { "get_dialect", (PyCFunction)csv_get_dialect,
1556         METH_O, csv_get_dialect_doc},
1557     { "field_size_limit", (PyCFunction)csv_field_size_limit,
1558         METH_VARARGS, csv_field_size_limit_doc},
1559     { NULL, NULL }
1560 };
1561 
1562 PyMODINIT_FUNC
init_csv(void)1563 init_csv(void)
1564 {
1565     PyObject *module;
1566     StyleDesc *style;
1567 
1568     if (PyType_Ready(&Dialect_Type) < 0)
1569         return;
1570 
1571     if (PyType_Ready(&Reader_Type) < 0)
1572         return;
1573 
1574     if (PyType_Ready(&Writer_Type) < 0)
1575         return;
1576 
1577     /* Create the module and add the functions */
1578     module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1579     if (module == NULL)
1580         return;
1581 
1582     /* Add version to the module. */
1583     if (PyModule_AddStringConstant(module, "__version__",
1584                                    MODULE_VERSION) == -1)
1585         return;
1586 
1587     /* Add _dialects dictionary */
1588     dialects = PyDict_New();
1589     if (dialects == NULL)
1590         return;
1591     if (PyModule_AddObject(module, "_dialects", dialects))
1592         return;
1593 
1594     /* Add quote styles into dictionary */
1595     for (style = quote_styles; style->name; style++) {
1596         if (PyModule_AddIntConstant(module, style->name,
1597                                     style->style) == -1)
1598             return;
1599     }
1600 
1601     /* Add the Dialect type */
1602     Py_INCREF(&Dialect_Type);
1603     if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1604         return;
1605 
1606     /* Add the CSV exception object to the module. */
1607     error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1608     if (error_obj == NULL)
1609         return;
1610     PyModule_AddObject(module, "Error", error_obj);
1611 }
1612