1 /*
2 unicode_format.h -- implementation of str.format().
3 */
4
5 /************************************************************************/
6 /*********** Global data structures and forward declarations *********/
7 /************************************************************************/
8
9 /*
10 A SubString consists of the characters between two string or
11 unicode pointers.
12 */
13 typedef struct {
14 PyObject *str; /* borrowed reference */
15 Py_ssize_t start, end;
16 } SubString;
17
18
19 typedef enum {
20 ANS_INIT,
21 ANS_AUTO,
22 ANS_MANUAL
23 } AutoNumberState; /* Keep track if we're auto-numbering fields */
24
25 /* Keeps track of our auto-numbering state, and which number field we're on */
26 typedef struct {
27 AutoNumberState an_state;
28 int an_field_number;
29 } AutoNumber;
30
31
32 /* forward declaration for recursion */
33 static PyObject *
34 build_string(SubString *input, PyObject *args, PyObject *kwargs,
35 int recursion_depth, AutoNumber *auto_number);
36
37
38
39 /************************************************************************/
40 /************************** Utility functions ************************/
41 /************************************************************************/
42
43 static void
AutoNumber_Init(AutoNumber * auto_number)44 AutoNumber_Init(AutoNumber *auto_number)
45 {
46 auto_number->an_state = ANS_INIT;
47 auto_number->an_field_number = 0;
48 }
49
50 /* fill in a SubString from a pointer and length */
51 Py_LOCAL_INLINE(void)
SubString_init(SubString * str,PyObject * s,Py_ssize_t start,Py_ssize_t end)52 SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
53 {
54 str->str = s;
55 str->start = start;
56 str->end = end;
57 }
58
59 /* return a new string. if str->str is NULL, return None */
60 Py_LOCAL_INLINE(PyObject *)
SubString_new_object(SubString * str)61 SubString_new_object(SubString *str)
62 {
63 if (str->str == NULL)
64 Py_RETURN_NONE;
65 return PyUnicode_Substring(str->str, str->start, str->end);
66 }
67
68 /* return a new string. if str->str is NULL, return a new empty string */
69 Py_LOCAL_INLINE(PyObject *)
SubString_new_object_or_empty(SubString * str)70 SubString_new_object_or_empty(SubString *str)
71 {
72 if (str->str == NULL) {
73 return PyUnicode_New(0, 0);
74 }
75 return SubString_new_object(str);
76 }
77
78 /* Return 1 if an error has been detected switching between automatic
79 field numbering and manual field specification, else return 0. Set
80 ValueError on error. */
81 static int
autonumber_state_error(AutoNumberState state,int field_name_is_empty)82 autonumber_state_error(AutoNumberState state, int field_name_is_empty)
83 {
84 if (state == ANS_MANUAL) {
85 if (field_name_is_empty) {
86 PyErr_SetString(PyExc_ValueError, "cannot switch from "
87 "manual field specification to "
88 "automatic field numbering");
89 return 1;
90 }
91 }
92 else {
93 if (!field_name_is_empty) {
94 PyErr_SetString(PyExc_ValueError, "cannot switch from "
95 "automatic field numbering to "
96 "manual field specification");
97 return 1;
98 }
99 }
100 return 0;
101 }
102
103
104 /************************************************************************/
105 /*********** Format string parsing -- integers and identifiers *********/
106 /************************************************************************/
107
108 static Py_ssize_t
get_integer(const SubString * str)109 get_integer(const SubString *str)
110 {
111 Py_ssize_t accumulator = 0;
112 Py_ssize_t digitval;
113 Py_ssize_t i;
114
115 /* empty string is an error */
116 if (str->start >= str->end)
117 return -1;
118
119 for (i = str->start; i < str->end; i++) {
120 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
121 if (digitval < 0)
122 return -1;
123 /*
124 Detect possible overflow before it happens:
125
126 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
127 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
128 */
129 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
130 PyErr_Format(PyExc_ValueError,
131 "Too many decimal digits in format string");
132 return -1;
133 }
134 accumulator = accumulator * 10 + digitval;
135 }
136 return accumulator;
137 }
138
139 /************************************************************************/
140 /******** Functions to get field objects and specification strings ******/
141 /************************************************************************/
142
143 /* do the equivalent of obj.name */
144 static PyObject *
getattr(PyObject * obj,SubString * name)145 getattr(PyObject *obj, SubString *name)
146 {
147 PyObject *newobj;
148 PyObject *str = SubString_new_object(name);
149 if (str == NULL)
150 return NULL;
151 newobj = PyObject_GetAttr(obj, str);
152 Py_DECREF(str);
153 return newobj;
154 }
155
156 /* do the equivalent of obj[idx], where obj is a sequence */
157 static PyObject *
getitem_sequence(PyObject * obj,Py_ssize_t idx)158 getitem_sequence(PyObject *obj, Py_ssize_t idx)
159 {
160 return PySequence_GetItem(obj, idx);
161 }
162
163 /* do the equivalent of obj[idx], where obj is not a sequence */
164 static PyObject *
getitem_idx(PyObject * obj,Py_ssize_t idx)165 getitem_idx(PyObject *obj, Py_ssize_t idx)
166 {
167 PyObject *newobj;
168 PyObject *idx_obj = PyLong_FromSsize_t(idx);
169 if (idx_obj == NULL)
170 return NULL;
171 newobj = PyObject_GetItem(obj, idx_obj);
172 Py_DECREF(idx_obj);
173 return newobj;
174 }
175
176 /* do the equivalent of obj[name] */
177 static PyObject *
getitem_str(PyObject * obj,SubString * name)178 getitem_str(PyObject *obj, SubString *name)
179 {
180 PyObject *newobj;
181 PyObject *str = SubString_new_object(name);
182 if (str == NULL)
183 return NULL;
184 newobj = PyObject_GetItem(obj, str);
185 Py_DECREF(str);
186 return newobj;
187 }
188
189 typedef struct {
190 /* the entire string we're parsing. we assume that someone else
191 is managing its lifetime, and that it will exist for the
192 lifetime of the iterator. can be empty */
193 SubString str;
194
195 /* index to where we are inside field_name */
196 Py_ssize_t index;
197 } FieldNameIterator;
198
199
200 static int
FieldNameIterator_init(FieldNameIterator * self,PyObject * s,Py_ssize_t start,Py_ssize_t end)201 FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
202 Py_ssize_t start, Py_ssize_t end)
203 {
204 SubString_init(&self->str, s, start, end);
205 self->index = start;
206 return 1;
207 }
208
209 static int
_FieldNameIterator_attr(FieldNameIterator * self,SubString * name)210 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
211 {
212 Py_UCS4 c;
213
214 name->str = self->str.str;
215 name->start = self->index;
216
217 /* return everything until '.' or '[' */
218 while (self->index < self->str.end) {
219 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
220 switch (c) {
221 case '[':
222 case '.':
223 /* backup so that we this character will be seen next time */
224 self->index--;
225 break;
226 default:
227 continue;
228 }
229 break;
230 }
231 /* end of string is okay */
232 name->end = self->index;
233 return 1;
234 }
235
236 static int
_FieldNameIterator_item(FieldNameIterator * self,SubString * name)237 _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
238 {
239 int bracket_seen = 0;
240 Py_UCS4 c;
241
242 name->str = self->str.str;
243 name->start = self->index;
244
245 /* return everything until ']' */
246 while (self->index < self->str.end) {
247 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
248 switch (c) {
249 case ']':
250 bracket_seen = 1;
251 break;
252 default:
253 continue;
254 }
255 break;
256 }
257 /* make sure we ended with a ']' */
258 if (!bracket_seen) {
259 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
260 return 0;
261 }
262
263 /* end of string is okay */
264 /* don't include the ']' */
265 name->end = self->index-1;
266 return 1;
267 }
268
269 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
270 static int
FieldNameIterator_next(FieldNameIterator * self,int * is_attribute,Py_ssize_t * name_idx,SubString * name)271 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
272 Py_ssize_t *name_idx, SubString *name)
273 {
274 /* check at end of input */
275 if (self->index >= self->str.end)
276 return 1;
277
278 switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
279 case '.':
280 *is_attribute = 1;
281 if (_FieldNameIterator_attr(self, name) == 0)
282 return 0;
283 *name_idx = -1;
284 break;
285 case '[':
286 *is_attribute = 0;
287 if (_FieldNameIterator_item(self, name) == 0)
288 return 0;
289 *name_idx = get_integer(name);
290 if (*name_idx == -1 && PyErr_Occurred())
291 return 0;
292 break;
293 default:
294 /* Invalid character follows ']' */
295 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
296 "follow ']' in format field specifier");
297 return 0;
298 }
299
300 /* empty string is an error */
301 if (name->start == name->end) {
302 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
303 return 0;
304 }
305
306 return 2;
307 }
308
309
310 /* input: field_name
311 output: 'first' points to the part before the first '[' or '.'
312 'first_idx' is -1 if 'first' is not an integer, otherwise
313 it's the value of first converted to an integer
314 'rest' is an iterator to return the rest
315 */
316 static int
field_name_split(PyObject * str,Py_ssize_t start,Py_ssize_t end,SubString * first,Py_ssize_t * first_idx,FieldNameIterator * rest,AutoNumber * auto_number)317 field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
318 Py_ssize_t *first_idx, FieldNameIterator *rest,
319 AutoNumber *auto_number)
320 {
321 Py_UCS4 c;
322 Py_ssize_t i = start;
323 int field_name_is_empty;
324 int using_numeric_index;
325
326 /* find the part up until the first '.' or '[' */
327 while (i < end) {
328 switch (c = PyUnicode_READ_CHAR(str, i++)) {
329 case '[':
330 case '.':
331 /* backup so that we this character is available to the
332 "rest" iterator */
333 i--;
334 break;
335 default:
336 continue;
337 }
338 break;
339 }
340
341 /* set up the return values */
342 SubString_init(first, str, start, i);
343 FieldNameIterator_init(rest, str, i, end);
344
345 /* see if "first" is an integer, in which case it's used as an index */
346 *first_idx = get_integer(first);
347 if (*first_idx == -1 && PyErr_Occurred())
348 return 0;
349
350 field_name_is_empty = first->start >= first->end;
351
352 /* If the field name is omitted or if we have a numeric index
353 specified, then we're doing numeric indexing into args. */
354 using_numeric_index = field_name_is_empty || *first_idx != -1;
355
356 /* We always get here exactly one time for each field we're
357 processing. And we get here in field order (counting by left
358 braces). So this is the perfect place to handle automatic field
359 numbering if the field name is omitted. */
360
361 /* Check if we need to do the auto-numbering. It's not needed if
362 we're called from string.Format routines, because it's handled
363 in that class by itself. */
364 if (auto_number) {
365 /* Initialize our auto numbering state if this is the first
366 time we're either auto-numbering or manually numbering. */
367 if (auto_number->an_state == ANS_INIT && using_numeric_index)
368 auto_number->an_state = field_name_is_empty ?
369 ANS_AUTO : ANS_MANUAL;
370
371 /* Make sure our state is consistent with what we're doing
372 this time through. Only check if we're using a numeric
373 index. */
374 if (using_numeric_index)
375 if (autonumber_state_error(auto_number->an_state,
376 field_name_is_empty))
377 return 0;
378 /* Zero length field means we want to do auto-numbering of the
379 fields. */
380 if (field_name_is_empty)
381 *first_idx = (auto_number->an_field_number)++;
382 }
383
384 return 1;
385 }
386
387
388 /*
389 get_field_object returns the object inside {}, before the
390 format_spec. It handles getindex and getattr lookups and consumes
391 the entire input string.
392 */
393 static PyObject *
get_field_object(SubString * input,PyObject * args,PyObject * kwargs,AutoNumber * auto_number)394 get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
395 AutoNumber *auto_number)
396 {
397 PyObject *obj = NULL;
398 int ok;
399 int is_attribute;
400 SubString name;
401 SubString first;
402 Py_ssize_t index;
403 FieldNameIterator rest;
404
405 if (!field_name_split(input->str, input->start, input->end, &first,
406 &index, &rest, auto_number)) {
407 goto error;
408 }
409
410 if (index == -1) {
411 /* look up in kwargs */
412 PyObject *key = SubString_new_object(&first);
413 if (key == NULL) {
414 goto error;
415 }
416 if (kwargs == NULL) {
417 PyErr_SetObject(PyExc_KeyError, key);
418 Py_DECREF(key);
419 goto error;
420 }
421 /* Use PyObject_GetItem instead of PyDict_GetItem because this
422 code is no longer just used with kwargs. It might be passed
423 a non-dict when called through format_map. */
424 obj = PyObject_GetItem(kwargs, key);
425 Py_DECREF(key);
426 if (obj == NULL) {
427 goto error;
428 }
429 }
430 else {
431 /* If args is NULL, we have a format string with a positional field
432 with only kwargs to retrieve it from. This can only happen when
433 used with format_map(), where positional arguments are not
434 allowed. */
435 if (args == NULL) {
436 PyErr_SetString(PyExc_ValueError, "Format string contains "
437 "positional fields");
438 goto error;
439 }
440
441 /* look up in args */
442 obj = PySequence_GetItem(args, index);
443 if (obj == NULL) {
444 PyErr_Format(PyExc_IndexError,
445 "Replacement index %zd out of range for positional "
446 "args tuple",
447 index);
448 goto error;
449 }
450 }
451
452 /* iterate over the rest of the field_name */
453 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
454 &name)) == 2) {
455 PyObject *tmp;
456
457 if (is_attribute)
458 /* getattr lookup "." */
459 tmp = getattr(obj, &name);
460 else
461 /* getitem lookup "[]" */
462 if (index == -1)
463 tmp = getitem_str(obj, &name);
464 else
465 if (PySequence_Check(obj))
466 tmp = getitem_sequence(obj, index);
467 else
468 /* not a sequence */
469 tmp = getitem_idx(obj, index);
470 if (tmp == NULL)
471 goto error;
472
473 /* assign to obj */
474 Py_DECREF(obj);
475 obj = tmp;
476 }
477 /* end of iterator, this is the non-error case */
478 if (ok == 1)
479 return obj;
480 error:
481 Py_XDECREF(obj);
482 return NULL;
483 }
484
485 /************************************************************************/
486 /***************** Field rendering functions **************************/
487 /************************************************************************/
488
489 /*
490 render_field() is the main function in this section. It takes the
491 field object and field specification string generated by
492 get_field_and_spec, and renders the field into the output string.
493
494 render_field calls fieldobj.__format__(format_spec) method, and
495 appends to the output.
496 */
497 static int
render_field(PyObject * fieldobj,SubString * format_spec,_PyUnicodeWriter * writer)498 render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
499 {
500 int ok = 0;
501 PyObject *result = NULL;
502 PyObject *format_spec_object = NULL;
503 int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
504 int err;
505
506 /* If we know the type exactly, skip the lookup of __format__ and just
507 call the formatter directly. */
508 if (PyUnicode_CheckExact(fieldobj))
509 formatter = _PyUnicode_FormatAdvancedWriter;
510 else if (PyLong_CheckExact(fieldobj))
511 formatter = _PyLong_FormatAdvancedWriter;
512 else if (PyFloat_CheckExact(fieldobj))
513 formatter = _PyFloat_FormatAdvancedWriter;
514 else if (PyComplex_CheckExact(fieldobj))
515 formatter = _PyComplex_FormatAdvancedWriter;
516
517 if (formatter) {
518 /* we know exactly which formatter will be called when __format__ is
519 looked up, so call it directly, instead. */
520 err = formatter(writer, fieldobj, format_spec->str,
521 format_spec->start, format_spec->end);
522 return (err == 0);
523 }
524 else {
525 /* We need to create an object out of the pointers we have, because
526 __format__ takes a string/unicode object for format_spec. */
527 if (format_spec->str)
528 format_spec_object = PyUnicode_Substring(format_spec->str,
529 format_spec->start,
530 format_spec->end);
531 else
532 format_spec_object = PyUnicode_New(0, 0);
533 if (format_spec_object == NULL)
534 goto done;
535
536 result = PyObject_Format(fieldobj, format_spec_object);
537 }
538 if (result == NULL)
539 goto done;
540
541 if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
542 goto done;
543 ok = 1;
544
545 done:
546 Py_XDECREF(format_spec_object);
547 Py_XDECREF(result);
548 return ok;
549 }
550
551 static int
parse_field(SubString * str,SubString * field_name,SubString * format_spec,int * format_spec_needs_expanding,Py_UCS4 * conversion)552 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
553 int *format_spec_needs_expanding, Py_UCS4 *conversion)
554 {
555 /* Note this function works if the field name is zero length,
556 which is good. Zero length field names are handled later, in
557 field_name_split. */
558
559 Py_UCS4 c = 0;
560
561 /* initialize these, as they may be empty */
562 *conversion = '\0';
563 SubString_init(format_spec, NULL, 0, 0);
564
565 /* Search for the field name. it's terminated by the end of
566 the string, or a ':' or '!' */
567 field_name->str = str->str;
568 field_name->start = str->start;
569 while (str->start < str->end) {
570 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
571 case '{':
572 PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
573 return 0;
574 case '[':
575 for (; str->start < str->end; str->start++)
576 if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
577 break;
578 continue;
579 case '}':
580 case ':':
581 case '!':
582 break;
583 default:
584 continue;
585 }
586 break;
587 }
588
589 field_name->end = str->start - 1;
590 if (c == '!' || c == ':') {
591 Py_ssize_t count;
592 /* we have a format specifier and/or a conversion */
593 /* don't include the last character */
594
595 /* see if there's a conversion specifier */
596 if (c == '!') {
597 /* there must be another character present */
598 if (str->start >= str->end) {
599 PyErr_SetString(PyExc_ValueError,
600 "end of string while looking for conversion "
601 "specifier");
602 return 0;
603 }
604 *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
605
606 if (str->start < str->end) {
607 c = PyUnicode_READ_CHAR(str->str, str->start++);
608 if (c == '}')
609 return 1;
610 if (c != ':') {
611 PyErr_SetString(PyExc_ValueError,
612 "expected ':' after conversion specifier");
613 return 0;
614 }
615 }
616 }
617 format_spec->str = str->str;
618 format_spec->start = str->start;
619 count = 1;
620 while (str->start < str->end) {
621 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
622 case '{':
623 *format_spec_needs_expanding = 1;
624 count++;
625 break;
626 case '}':
627 count--;
628 if (count == 0) {
629 format_spec->end = str->start - 1;
630 return 1;
631 }
632 break;
633 default:
634 break;
635 }
636 }
637
638 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
639 return 0;
640 }
641 else if (c != '}') {
642 PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
643 return 0;
644 }
645
646 return 1;
647 }
648
649 /************************************************************************/
650 /******* Output string allocation and escape-to-markup processing ******/
651 /************************************************************************/
652
653 /* MarkupIterator breaks the string into pieces of either literal
654 text, or things inside {} that need to be marked up. it is
655 designed to make it easy to wrap a Python iterator around it, for
656 use with the Formatter class */
657
658 typedef struct {
659 SubString str;
660 } MarkupIterator;
661
662 static int
MarkupIterator_init(MarkupIterator * self,PyObject * str,Py_ssize_t start,Py_ssize_t end)663 MarkupIterator_init(MarkupIterator *self, PyObject *str,
664 Py_ssize_t start, Py_ssize_t end)
665 {
666 SubString_init(&self->str, str, start, end);
667 return 1;
668 }
669
670 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
671 string (or something to be expanded) */
672 static int
MarkupIterator_next(MarkupIterator * self,SubString * literal,int * field_present,SubString * field_name,SubString * format_spec,Py_UCS4 * conversion,int * format_spec_needs_expanding)673 MarkupIterator_next(MarkupIterator *self, SubString *literal,
674 int *field_present, SubString *field_name,
675 SubString *format_spec, Py_UCS4 *conversion,
676 int *format_spec_needs_expanding)
677 {
678 int at_end;
679 Py_UCS4 c = 0;
680 Py_ssize_t start;
681 Py_ssize_t len;
682 int markup_follows = 0;
683
684 /* initialize all of the output variables */
685 SubString_init(literal, NULL, 0, 0);
686 SubString_init(field_name, NULL, 0, 0);
687 SubString_init(format_spec, NULL, 0, 0);
688 *conversion = '\0';
689 *format_spec_needs_expanding = 0;
690 *field_present = 0;
691
692 /* No more input, end of iterator. This is the normal exit
693 path. */
694 if (self->str.start >= self->str.end)
695 return 1;
696
697 start = self->str.start;
698
699 /* First read any literal text. Read until the end of string, an
700 escaped '{' or '}', or an unescaped '{'. In order to never
701 allocate memory and so I can just pass pointers around, if
702 there's an escaped '{' or '}' then we'll return the literal
703 including the brace, but no format object. The next time
704 through, we'll return the rest of the literal, skipping past
705 the second consecutive brace. */
706 while (self->str.start < self->str.end) {
707 switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
708 case '{':
709 case '}':
710 markup_follows = 1;
711 break;
712 default:
713 continue;
714 }
715 break;
716 }
717
718 at_end = self->str.start >= self->str.end;
719 len = self->str.start - start;
720
721 if ((c == '}') && (at_end ||
722 (c != PyUnicode_READ_CHAR(self->str.str,
723 self->str.start)))) {
724 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
725 "in format string");
726 return 0;
727 }
728 if (at_end && c == '{') {
729 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
730 "in format string");
731 return 0;
732 }
733 if (!at_end) {
734 if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
735 /* escaped } or {, skip it in the input. there is no
736 markup object following us, just this literal text */
737 self->str.start++;
738 markup_follows = 0;
739 }
740 else
741 len--;
742 }
743
744 /* record the literal text */
745 literal->str = self->str.str;
746 literal->start = start;
747 literal->end = start + len;
748
749 if (!markup_follows)
750 return 2;
751
752 /* this is markup; parse the field */
753 *field_present = 1;
754 if (!parse_field(&self->str, field_name, format_spec,
755 format_spec_needs_expanding, conversion))
756 return 0;
757 return 2;
758 }
759
760
761 /* do the !r or !s conversion on obj */
762 static PyObject *
do_conversion(PyObject * obj,Py_UCS4 conversion)763 do_conversion(PyObject *obj, Py_UCS4 conversion)
764 {
765 /* XXX in pre-3.0, do we need to convert this to unicode, since it
766 might have returned a string? */
767 switch (conversion) {
768 case 'r':
769 return PyObject_Repr(obj);
770 case 's':
771 return PyObject_Str(obj);
772 case 'a':
773 return PyObject_ASCII(obj);
774 default:
775 if (conversion > 32 && conversion < 127) {
776 /* It's the ASCII subrange; casting to char is safe
777 (assuming the execution character set is an ASCII
778 superset). */
779 PyErr_Format(PyExc_ValueError,
780 "Unknown conversion specifier %c",
781 (char)conversion);
782 } else
783 PyErr_Format(PyExc_ValueError,
784 "Unknown conversion specifier \\x%x",
785 (unsigned int)conversion);
786 return NULL;
787 }
788 }
789
790 /* given:
791
792 {field_name!conversion:format_spec}
793
794 compute the result and write it to output.
795 format_spec_needs_expanding is an optimization. if it's false,
796 just output the string directly, otherwise recursively expand the
797 format_spec string.
798
799 field_name is allowed to be zero length, in which case we
800 are doing auto field numbering.
801 */
802
803 static int
output_markup(SubString * field_name,SubString * format_spec,int format_spec_needs_expanding,Py_UCS4 conversion,_PyUnicodeWriter * writer,PyObject * args,PyObject * kwargs,int recursion_depth,AutoNumber * auto_number)804 output_markup(SubString *field_name, SubString *format_spec,
805 int format_spec_needs_expanding, Py_UCS4 conversion,
806 _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
807 int recursion_depth, AutoNumber *auto_number)
808 {
809 PyObject *tmp = NULL;
810 PyObject *fieldobj = NULL;
811 SubString expanded_format_spec;
812 SubString *actual_format_spec;
813 int result = 0;
814
815 /* convert field_name to an object */
816 fieldobj = get_field_object(field_name, args, kwargs, auto_number);
817 if (fieldobj == NULL)
818 goto done;
819
820 if (conversion != '\0') {
821 tmp = do_conversion(fieldobj, conversion);
822 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
823 goto done;
824
825 /* do the assignment, transferring ownership: fieldobj = tmp */
826 Py_DECREF(fieldobj);
827 fieldobj = tmp;
828 tmp = NULL;
829 }
830
831 /* if needed, recursively compute the format_spec */
832 if (format_spec_needs_expanding) {
833 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
834 auto_number);
835 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
836 goto done;
837
838 /* note that in the case we're expanding the format string,
839 tmp must be kept around until after the call to
840 render_field. */
841 SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
842 actual_format_spec = &expanded_format_spec;
843 }
844 else
845 actual_format_spec = format_spec;
846
847 if (render_field(fieldobj, actual_format_spec, writer) == 0)
848 goto done;
849
850 result = 1;
851
852 done:
853 Py_XDECREF(fieldobj);
854 Py_XDECREF(tmp);
855
856 return result;
857 }
858
859 /*
860 do_markup is the top-level loop for the format() method. It
861 searches through the format string for escapes to markup codes, and
862 calls other functions to move non-markup text to the output,
863 and to perform the markup to the output.
864 */
865 static int
do_markup(SubString * input,PyObject * args,PyObject * kwargs,_PyUnicodeWriter * writer,int recursion_depth,AutoNumber * auto_number)866 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
867 _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
868 {
869 MarkupIterator iter;
870 int format_spec_needs_expanding;
871 int result;
872 int field_present;
873 SubString literal;
874 SubString field_name;
875 SubString format_spec;
876 Py_UCS4 conversion;
877
878 MarkupIterator_init(&iter, input->str, input->start, input->end);
879 while ((result = MarkupIterator_next(&iter, &literal, &field_present,
880 &field_name, &format_spec,
881 &conversion,
882 &format_spec_needs_expanding)) == 2) {
883 if (literal.end != literal.start) {
884 if (!field_present && iter.str.start == iter.str.end)
885 writer->overallocate = 0;
886 if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
887 literal.start, literal.end) < 0)
888 return 0;
889 }
890
891 if (field_present) {
892 if (iter.str.start == iter.str.end)
893 writer->overallocate = 0;
894 if (!output_markup(&field_name, &format_spec,
895 format_spec_needs_expanding, conversion, writer,
896 args, kwargs, recursion_depth, auto_number))
897 return 0;
898 }
899 }
900 return result;
901 }
902
903
904 /*
905 build_string allocates the output string and then
906 calls do_markup to do the heavy lifting.
907 */
908 static PyObject *
build_string(SubString * input,PyObject * args,PyObject * kwargs,int recursion_depth,AutoNumber * auto_number)909 build_string(SubString *input, PyObject *args, PyObject *kwargs,
910 int recursion_depth, AutoNumber *auto_number)
911 {
912 _PyUnicodeWriter writer;
913
914 /* check the recursion level */
915 if (recursion_depth <= 0) {
916 PyErr_SetString(PyExc_ValueError,
917 "Max string recursion exceeded");
918 return NULL;
919 }
920
921 _PyUnicodeWriter_Init(&writer);
922 writer.overallocate = 1;
923 writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
924
925 if (!do_markup(input, args, kwargs, &writer, recursion_depth,
926 auto_number)) {
927 _PyUnicodeWriter_Dealloc(&writer);
928 return NULL;
929 }
930
931 return _PyUnicodeWriter_Finish(&writer);
932 }
933
934 /************************************************************************/
935 /*********** main routine ***********************************************/
936 /************************************************************************/
937
938 /* this is the main entry point */
939 static PyObject *
do_string_format(PyObject * self,PyObject * args,PyObject * kwargs)940 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
941 {
942 SubString input;
943
944 /* PEP 3101 says only 2 levels, so that
945 "{0:{1}}".format('abc', 's') # works
946 "{0:{1:{2}}}".format('abc', 's', '') # fails
947 */
948 int recursion_depth = 2;
949
950 AutoNumber auto_number;
951
952 if (PyUnicode_READY(self) == -1)
953 return NULL;
954
955 AutoNumber_Init(&auto_number);
956 SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
957 return build_string(&input, args, kwargs, recursion_depth, &auto_number);
958 }
959
960 static PyObject *
do_string_format_map(PyObject * self,PyObject * obj)961 do_string_format_map(PyObject *self, PyObject *obj)
962 {
963 return do_string_format(self, NULL, obj);
964 }
965
966
967 /************************************************************************/
968 /*********** formatteriterator ******************************************/
969 /************************************************************************/
970
971 /* This is used to implement string.Formatter.vparse(). It exists so
972 Formatter can share code with the built in unicode.format() method.
973 It's really just a wrapper around MarkupIterator that is callable
974 from Python. */
975
976 typedef struct {
977 PyObject_HEAD
978 PyObject *str;
979 MarkupIterator it_markup;
980 } formatteriterobject;
981
982 static void
formatteriter_dealloc(formatteriterobject * it)983 formatteriter_dealloc(formatteriterobject *it)
984 {
985 Py_XDECREF(it->str);
986 PyObject_FREE(it);
987 }
988
989 /* returns a tuple:
990 (literal, field_name, format_spec, conversion)
991
992 literal is any literal text to output. might be zero length
993 field_name is the string before the ':'. might be None
994 format_spec is the string after the ':'. mibht be None
995 conversion is either None, or the string after the '!'
996 */
997 static PyObject *
formatteriter_next(formatteriterobject * it)998 formatteriter_next(formatteriterobject *it)
999 {
1000 SubString literal;
1001 SubString field_name;
1002 SubString format_spec;
1003 Py_UCS4 conversion;
1004 int format_spec_needs_expanding;
1005 int field_present;
1006 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1007 &field_name, &format_spec, &conversion,
1008 &format_spec_needs_expanding);
1009
1010 /* all of the SubString objects point into it->str, so no
1011 memory management needs to be done on them */
1012 assert(0 <= result && result <= 2);
1013 if (result == 0 || result == 1)
1014 /* if 0, error has already been set, if 1, iterator is empty */
1015 return NULL;
1016 else {
1017 PyObject *literal_str = NULL;
1018 PyObject *field_name_str = NULL;
1019 PyObject *format_spec_str = NULL;
1020 PyObject *conversion_str = NULL;
1021 PyObject *tuple = NULL;
1022
1023 literal_str = SubString_new_object(&literal);
1024 if (literal_str == NULL)
1025 goto done;
1026
1027 field_name_str = SubString_new_object(&field_name);
1028 if (field_name_str == NULL)
1029 goto done;
1030
1031 /* if field_name is non-zero length, return a string for
1032 format_spec (even if zero length), else return None */
1033 format_spec_str = (field_present ?
1034 SubString_new_object_or_empty :
1035 SubString_new_object)(&format_spec);
1036 if (format_spec_str == NULL)
1037 goto done;
1038
1039 /* if the conversion is not specified, return a None,
1040 otherwise create a one length string with the conversion
1041 character */
1042 if (conversion == '\0') {
1043 conversion_str = Py_None;
1044 Py_INCREF(conversion_str);
1045 }
1046 else
1047 conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1048 &conversion, 1);
1049 if (conversion_str == NULL)
1050 goto done;
1051
1052 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1053 conversion_str);
1054 done:
1055 Py_XDECREF(literal_str);
1056 Py_XDECREF(field_name_str);
1057 Py_XDECREF(format_spec_str);
1058 Py_XDECREF(conversion_str);
1059 return tuple;
1060 }
1061 }
1062
1063 static PyMethodDef formatteriter_methods[] = {
1064 {NULL, NULL} /* sentinel */
1065 };
1066
1067 static PyTypeObject PyFormatterIter_Type = {
1068 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1069 "formatteriterator", /* tp_name */
1070 sizeof(formatteriterobject), /* tp_basicsize */
1071 0, /* tp_itemsize */
1072 /* methods */
1073 (destructor)formatteriter_dealloc, /* tp_dealloc */
1074 0, /* tp_vectorcall_offset */
1075 0, /* tp_getattr */
1076 0, /* tp_setattr */
1077 0, /* tp_as_async */
1078 0, /* tp_repr */
1079 0, /* tp_as_number */
1080 0, /* tp_as_sequence */
1081 0, /* tp_as_mapping */
1082 0, /* tp_hash */
1083 0, /* tp_call */
1084 0, /* tp_str */
1085 PyObject_GenericGetAttr, /* tp_getattro */
1086 0, /* tp_setattro */
1087 0, /* tp_as_buffer */
1088 Py_TPFLAGS_DEFAULT, /* tp_flags */
1089 0, /* tp_doc */
1090 0, /* tp_traverse */
1091 0, /* tp_clear */
1092 0, /* tp_richcompare */
1093 0, /* tp_weaklistoffset */
1094 PyObject_SelfIter, /* tp_iter */
1095 (iternextfunc)formatteriter_next, /* tp_iternext */
1096 formatteriter_methods, /* tp_methods */
1097 0,
1098 };
1099
1100 /* unicode_formatter_parser is used to implement
1101 string.Formatter.vformat. it parses a string and returns tuples
1102 describing the parsed elements. It's a wrapper around
1103 stringlib/string_format.h's MarkupIterator */
1104 static PyObject *
formatter_parser(PyObject * ignored,PyObject * self)1105 formatter_parser(PyObject *ignored, PyObject *self)
1106 {
1107 formatteriterobject *it;
1108
1109 if (!PyUnicode_Check(self)) {
1110 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1111 return NULL;
1112 }
1113
1114 if (PyUnicode_READY(self) == -1)
1115 return NULL;
1116
1117 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1118 if (it == NULL)
1119 return NULL;
1120
1121 /* take ownership, give the object to the iterator */
1122 Py_INCREF(self);
1123 it->str = self;
1124
1125 /* initialize the contained MarkupIterator */
1126 MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
1127 return (PyObject *)it;
1128 }
1129
1130
1131 /************************************************************************/
1132 /*********** fieldnameiterator ******************************************/
1133 /************************************************************************/
1134
1135
1136 /* This is used to implement string.Formatter.vparse(). It parses the
1137 field name into attribute and item values. It's a Python-callable
1138 wrapper around FieldNameIterator */
1139
1140 typedef struct {
1141 PyObject_HEAD
1142 PyObject *str;
1143 FieldNameIterator it_field;
1144 } fieldnameiterobject;
1145
1146 static void
fieldnameiter_dealloc(fieldnameiterobject * it)1147 fieldnameiter_dealloc(fieldnameiterobject *it)
1148 {
1149 Py_XDECREF(it->str);
1150 PyObject_FREE(it);
1151 }
1152
1153 /* returns a tuple:
1154 (is_attr, value)
1155 is_attr is true if we used attribute syntax (e.g., '.foo')
1156 false if we used index syntax (e.g., '[foo]')
1157 value is an integer or string
1158 */
1159 static PyObject *
fieldnameiter_next(fieldnameiterobject * it)1160 fieldnameiter_next(fieldnameiterobject *it)
1161 {
1162 int result;
1163 int is_attr;
1164 Py_ssize_t idx;
1165 SubString name;
1166
1167 result = FieldNameIterator_next(&it->it_field, &is_attr,
1168 &idx, &name);
1169 if (result == 0 || result == 1)
1170 /* if 0, error has already been set, if 1, iterator is empty */
1171 return NULL;
1172 else {
1173 PyObject* result = NULL;
1174 PyObject* is_attr_obj = NULL;
1175 PyObject* obj = NULL;
1176
1177 is_attr_obj = PyBool_FromLong(is_attr);
1178 if (is_attr_obj == NULL)
1179 goto done;
1180
1181 /* either an integer or a string */
1182 if (idx != -1)
1183 obj = PyLong_FromSsize_t(idx);
1184 else
1185 obj = SubString_new_object(&name);
1186 if (obj == NULL)
1187 goto done;
1188
1189 /* return a tuple of values */
1190 result = PyTuple_Pack(2, is_attr_obj, obj);
1191
1192 done:
1193 Py_XDECREF(is_attr_obj);
1194 Py_XDECREF(obj);
1195 return result;
1196 }
1197 }
1198
1199 static PyMethodDef fieldnameiter_methods[] = {
1200 {NULL, NULL} /* sentinel */
1201 };
1202
1203 static PyTypeObject PyFieldNameIter_Type = {
1204 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1205 "fieldnameiterator", /* tp_name */
1206 sizeof(fieldnameiterobject), /* tp_basicsize */
1207 0, /* tp_itemsize */
1208 /* methods */
1209 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1210 0, /* tp_vectorcall_offset */
1211 0, /* tp_getattr */
1212 0, /* tp_setattr */
1213 0, /* tp_as_async */
1214 0, /* tp_repr */
1215 0, /* tp_as_number */
1216 0, /* tp_as_sequence */
1217 0, /* tp_as_mapping */
1218 0, /* tp_hash */
1219 0, /* tp_call */
1220 0, /* tp_str */
1221 PyObject_GenericGetAttr, /* tp_getattro */
1222 0, /* tp_setattro */
1223 0, /* tp_as_buffer */
1224 Py_TPFLAGS_DEFAULT, /* tp_flags */
1225 0, /* tp_doc */
1226 0, /* tp_traverse */
1227 0, /* tp_clear */
1228 0, /* tp_richcompare */
1229 0, /* tp_weaklistoffset */
1230 PyObject_SelfIter, /* tp_iter */
1231 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1232 fieldnameiter_methods, /* tp_methods */
1233 0};
1234
1235 /* unicode_formatter_field_name_split is used to implement
1236 string.Formatter.vformat. it takes a PEP 3101 "field name", and
1237 returns a tuple of (first, rest): "first", the part before the
1238 first '.' or '['; and "rest", an iterator for the rest of the field
1239 name. it's a wrapper around stringlib/string_format.h's
1240 field_name_split. The iterator it returns is a
1241 FieldNameIterator */
1242 static PyObject *
formatter_field_name_split(PyObject * ignored,PyObject * self)1243 formatter_field_name_split(PyObject *ignored, PyObject *self)
1244 {
1245 SubString first;
1246 Py_ssize_t first_idx;
1247 fieldnameiterobject *it;
1248
1249 PyObject *first_obj = NULL;
1250 PyObject *result = NULL;
1251
1252 if (!PyUnicode_Check(self)) {
1253 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1254 return NULL;
1255 }
1256
1257 if (PyUnicode_READY(self) == -1)
1258 return NULL;
1259
1260 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1261 if (it == NULL)
1262 return NULL;
1263
1264 /* take ownership, give the object to the iterator. this is
1265 just to keep the field_name alive */
1266 Py_INCREF(self);
1267 it->str = self;
1268
1269 /* Pass in auto_number = NULL. We'll return an empty string for
1270 first_obj in that case. */
1271 if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
1272 &first, &first_idx, &it->it_field, NULL))
1273 goto done;
1274
1275 /* first becomes an integer, if possible; else a string */
1276 if (first_idx != -1)
1277 first_obj = PyLong_FromSsize_t(first_idx);
1278 else
1279 /* convert "first" into a string object */
1280 first_obj = SubString_new_object(&first);
1281 if (first_obj == NULL)
1282 goto done;
1283
1284 /* return a tuple of values */
1285 result = PyTuple_Pack(2, first_obj, it);
1286
1287 done:
1288 Py_XDECREF(it);
1289 Py_XDECREF(first_obj);
1290 return result;
1291 }
1292