1 /* implements the unicode (as opposed to string) version of the
2    built-in formatters for string, int, float.  that is, the versions
3    of int.__float__, etc., that take and return unicode objects */
4 
5 #include "Python.h"
6 #include <locale.h>
7 
8 /* Raises an exception about an unknown presentation type for this
9  * type. */
10 
11 static void
unknown_presentation_type(Py_UCS4 presentation_type,const char * type_name)12 unknown_presentation_type(Py_UCS4 presentation_type,
13                           const char* type_name)
14 {
15     /* %c might be out-of-range, hence the two cases. */
16     if (presentation_type > 32 && presentation_type < 128)
17         PyErr_Format(PyExc_ValueError,
18                      "Unknown format code '%c' "
19                      "for object of type '%.200s'",
20                      (char)presentation_type,
21                      type_name);
22     else
23         PyErr_Format(PyExc_ValueError,
24                      "Unknown format code '\\x%x' "
25                      "for object of type '%.200s'",
26                      (unsigned int)presentation_type,
27                      type_name);
28 }
29 
30 static void
invalid_comma_type(Py_UCS4 presentation_type)31 invalid_comma_type(Py_UCS4 presentation_type)
32 {
33     if (presentation_type > 32 && presentation_type < 128)
34         PyErr_Format(PyExc_ValueError,
35                      "Cannot specify ',' or '_' with '%c'.",
36                      (char)presentation_type);
37     else
38         PyErr_Format(PyExc_ValueError,
39                      "Cannot specify ',' or '_' with '\\x%x'.",
40                      (unsigned int)presentation_type);
41 }
42 
43 static void
invalid_comma_and_underscore(void)44 invalid_comma_and_underscore(void)
45 {
46     PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
47 }
48 
49 /*
50     get_integer consumes 0 or more decimal digit characters from an
51     input string, updates *result with the corresponding positive
52     integer, and returns the number of digits consumed.
53 
54     returns -1 on error.
55 */
56 static int
get_integer(PyObject * str,Py_ssize_t * ppos,Py_ssize_t end,Py_ssize_t * result)57 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
58                   Py_ssize_t *result)
59 {
60     Py_ssize_t accumulator, digitval, pos = *ppos;
61     int numdigits;
62     int kind = PyUnicode_KIND(str);
63     void *data = PyUnicode_DATA(str);
64 
65     accumulator = numdigits = 0;
66     for (; pos < end; pos++, numdigits++) {
67         digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
68         if (digitval < 0)
69             break;
70         /*
71            Detect possible overflow before it happens:
72 
73               accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
74               accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
75         */
76         if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
77             PyErr_Format(PyExc_ValueError,
78                          "Too many decimal digits in format string");
79             *ppos = pos;
80             return -1;
81         }
82         accumulator = accumulator * 10 + digitval;
83     }
84     *ppos = pos;
85     *result = accumulator;
86     return numdigits;
87 }
88 
89 /************************************************************************/
90 /*********** standard format specifier parsing **************************/
91 /************************************************************************/
92 
93 /* returns true if this character is a specifier alignment token */
94 Py_LOCAL_INLINE(int)
is_alignment_token(Py_UCS4 c)95 is_alignment_token(Py_UCS4 c)
96 {
97     switch (c) {
98     case '<': case '>': case '=': case '^':
99         return 1;
100     default:
101         return 0;
102     }
103 }
104 
105 /* returns true if this character is a sign element */
106 Py_LOCAL_INLINE(int)
is_sign_element(Py_UCS4 c)107 is_sign_element(Py_UCS4 c)
108 {
109     switch (c) {
110     case ' ': case '+': case '-':
111         return 1;
112     default:
113         return 0;
114     }
115 }
116 
117 /* Locale type codes. LT_NO_LOCALE must be zero. */
118 enum LocaleType {
119     LT_NO_LOCALE = 0,
120     LT_DEFAULT_LOCALE,
121     LT_UNDERSCORE_LOCALE,
122     LT_UNDER_FOUR_LOCALE,
123     LT_CURRENT_LOCALE
124 };
125 
126 typedef struct {
127     Py_UCS4 fill_char;
128     Py_UCS4 align;
129     int alternate;
130     Py_UCS4 sign;
131     Py_ssize_t width;
132     enum LocaleType thousands_separators;
133     Py_ssize_t precision;
134     Py_UCS4 type;
135 } InternalFormatSpec;
136 
137 #if 0
138 /* Occasionally useful for debugging. Should normally be commented out. */
139 static void
140 DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
141 {
142     printf("internal format spec: fill_char %d\n", format->fill_char);
143     printf("internal format spec: align %d\n", format->align);
144     printf("internal format spec: alternate %d\n", format->alternate);
145     printf("internal format spec: sign %d\n", format->sign);
146     printf("internal format spec: width %zd\n", format->width);
147     printf("internal format spec: thousands_separators %d\n",
148            format->thousands_separators);
149     printf("internal format spec: precision %zd\n", format->precision);
150     printf("internal format spec: type %c\n", format->type);
151     printf("\n");
152 }
153 #endif
154 
155 
156 /*
157   ptr points to the start of the format_spec, end points just past its end.
158   fills in format with the parsed information.
159   returns 1 on success, 0 on failure.
160   if failure, sets the exception
161 */
162 static int
parse_internal_render_format_spec(PyObject * format_spec,Py_ssize_t start,Py_ssize_t end,InternalFormatSpec * format,char default_type,char default_align)163 parse_internal_render_format_spec(PyObject *format_spec,
164                                   Py_ssize_t start, Py_ssize_t end,
165                                   InternalFormatSpec *format,
166                                   char default_type,
167                                   char default_align)
168 {
169     Py_ssize_t pos = start;
170     int kind = PyUnicode_KIND(format_spec);
171     void *data = PyUnicode_DATA(format_spec);
172     /* end-pos is used throughout this code to specify the length of
173        the input string */
174 #define READ_spec(index) PyUnicode_READ(kind, data, index)
175 
176     Py_ssize_t consumed;
177     int align_specified = 0;
178     int fill_char_specified = 0;
179 
180     format->fill_char = ' ';
181     format->align = default_align;
182     format->alternate = 0;
183     format->sign = '\0';
184     format->width = -1;
185     format->thousands_separators = LT_NO_LOCALE;
186     format->precision = -1;
187     format->type = default_type;
188 
189     /* If the second char is an alignment token,
190        then parse the fill char */
191     if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
192         format->align = READ_spec(pos+1);
193         format->fill_char = READ_spec(pos);
194         fill_char_specified = 1;
195         align_specified = 1;
196         pos += 2;
197     }
198     else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
199         format->align = READ_spec(pos);
200         align_specified = 1;
201         ++pos;
202     }
203 
204     /* Parse the various sign options */
205     if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
206         format->sign = READ_spec(pos);
207         ++pos;
208     }
209 
210     /* If the next character is #, we're in alternate mode.  This only
211        applies to integers. */
212     if (end-pos >= 1 && READ_spec(pos) == '#') {
213         format->alternate = 1;
214         ++pos;
215     }
216 
217     /* The special case for 0-padding (backwards compat) */
218     if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
219         format->fill_char = '0';
220         if (!align_specified) {
221             format->align = '=';
222         }
223         ++pos;
224     }
225 
226     consumed = get_integer(format_spec, &pos, end, &format->width);
227     if (consumed == -1)
228         /* Overflow error. Exception already set. */
229         return 0;
230 
231     /* If consumed is 0, we didn't consume any characters for the
232        width. In that case, reset the width to -1, because
233        get_integer() will have set it to zero. -1 is how we record
234        that the width wasn't specified. */
235     if (consumed == 0)
236         format->width = -1;
237 
238     /* Comma signifies add thousands separators */
239     if (end-pos && READ_spec(pos) == ',') {
240         format->thousands_separators = LT_DEFAULT_LOCALE;
241         ++pos;
242     }
243     /* Underscore signifies add thousands separators */
244     if (end-pos && READ_spec(pos) == '_') {
245         if (format->thousands_separators != LT_NO_LOCALE) {
246             invalid_comma_and_underscore();
247             return 0;
248         }
249         format->thousands_separators = LT_UNDERSCORE_LOCALE;
250         ++pos;
251     }
252     if (end-pos && READ_spec(pos) == ',') {
253         invalid_comma_and_underscore();
254         return 0;
255     }
256 
257     /* Parse field precision */
258     if (end-pos && READ_spec(pos) == '.') {
259         ++pos;
260 
261         consumed = get_integer(format_spec, &pos, end, &format->precision);
262         if (consumed == -1)
263             /* Overflow error. Exception already set. */
264             return 0;
265 
266         /* Not having a precision after a dot is an error. */
267         if (consumed == 0) {
268             PyErr_Format(PyExc_ValueError,
269                          "Format specifier missing precision");
270             return 0;
271         }
272 
273     }
274 
275     /* Finally, parse the type field. */
276 
277     if (end-pos > 1) {
278         /* More than one char remain, invalid format specifier. */
279         PyErr_Format(PyExc_ValueError, "Invalid format specifier");
280         return 0;
281     }
282 
283     if (end-pos == 1) {
284         format->type = READ_spec(pos);
285         ++pos;
286     }
287 
288     /* Do as much validating as we can, just by looking at the format
289        specifier.  Do not take into account what type of formatting
290        we're doing (int, float, string). */
291 
292     if (format->thousands_separators) {
293         switch (format->type) {
294         case 'd':
295         case 'e':
296         case 'f':
297         case 'g':
298         case 'E':
299         case 'G':
300         case '%':
301         case 'F':
302         case '\0':
303             /* These are allowed. See PEP 378.*/
304             break;
305         case 'b':
306         case 'o':
307         case 'x':
308         case 'X':
309             /* Underscores are allowed in bin/oct/hex. See PEP 515. */
310             if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
311                 /* Every four digits, not every three, in bin/oct/hex. */
312                 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
313                 break;
314             }
315         default:
316             invalid_comma_type(format->type);
317             return 0;
318         }
319     }
320 
321     assert (format->align <= 127);
322     assert (format->sign <= 127);
323     return 1;
324 }
325 
326 /* Calculate the padding needed. */
327 static void
calc_padding(Py_ssize_t nchars,Py_ssize_t width,Py_UCS4 align,Py_ssize_t * n_lpadding,Py_ssize_t * n_rpadding,Py_ssize_t * n_total)328 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
329              Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
330              Py_ssize_t *n_total)
331 {
332     if (width >= 0) {
333         if (nchars > width)
334             *n_total = nchars;
335         else
336             *n_total = width;
337     }
338     else {
339         /* not specified, use all of the chars and no more */
340         *n_total = nchars;
341     }
342 
343     /* Figure out how much leading space we need, based on the
344        aligning */
345     if (align == '>')
346         *n_lpadding = *n_total - nchars;
347     else if (align == '^')
348         *n_lpadding = (*n_total - nchars) / 2;
349     else if (align == '<' || align == '=')
350         *n_lpadding = 0;
351     else {
352         /* We should never have an unspecified alignment. */
353         *n_lpadding = 0;
354         assert(0);
355     }
356 
357     *n_rpadding = *n_total - nchars - *n_lpadding;
358 }
359 
360 /* Do the padding, and return a pointer to where the caller-supplied
361    content goes. */
362 static int
fill_padding(_PyUnicodeWriter * writer,Py_ssize_t nchars,Py_UCS4 fill_char,Py_ssize_t n_lpadding,Py_ssize_t n_rpadding)363 fill_padding(_PyUnicodeWriter *writer,
364              Py_ssize_t nchars,
365              Py_UCS4 fill_char, Py_ssize_t n_lpadding,
366              Py_ssize_t n_rpadding)
367 {
368     Py_ssize_t pos;
369 
370     /* Pad on left. */
371     if (n_lpadding) {
372         pos = writer->pos;
373         _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
374     }
375 
376     /* Pad on right. */
377     if (n_rpadding) {
378         pos = writer->pos + nchars + n_lpadding;
379         _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
380     }
381 
382     /* Pointer to the user content. */
383     writer->pos += n_lpadding;
384     return 0;
385 }
386 
387 /************************************************************************/
388 /*********** common routines for numeric formatting *********************/
389 /************************************************************************/
390 
391 /* Locale info needed for formatting integers and the part of floats
392    before and including the decimal. Note that locales only support
393    8-bit chars, not unicode. */
394 typedef struct {
395     PyObject *decimal_point;
396     PyObject *thousands_sep;
397     const char *grouping;
398 } LocaleInfo;
399 
400 #define STATIC_LOCALE_INFO_INIT {0, 0, 0}
401 
402 /* describes the layout for an integer, see the comment in
403    calc_number_widths() for details */
404 typedef struct {
405     Py_ssize_t n_lpadding;
406     Py_ssize_t n_prefix;
407     Py_ssize_t n_spadding;
408     Py_ssize_t n_rpadding;
409     char sign;
410     Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
411     Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
412                                     any grouping chars. */
413     Py_ssize_t n_decimal;   /* 0 if only an integer */
414     Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
415                                excluding the decimal itself, if
416                                present. */
417 
418     /* These 2 are not the widths of fields, but are needed by
419        STRINGLIB_GROUPING. */
420     Py_ssize_t n_digits;    /* The number of digits before a decimal
421                                or exponent. */
422     Py_ssize_t n_min_width; /* The min_width we used when we computed
423                                the n_grouped_digits width. */
424 } NumberFieldWidths;
425 
426 
427 /* Given a number of the form:
428    digits[remainder]
429    where ptr points to the start and end points to the end, find where
430     the integer part ends. This could be a decimal, an exponent, both,
431     or neither.
432    If a decimal point is present, set *has_decimal and increment
433     remainder beyond it.
434    Results are undefined (but shouldn't crash) for improperly
435     formatted strings.
436 */
437 static void
parse_number(PyObject * s,Py_ssize_t pos,Py_ssize_t end,Py_ssize_t * n_remainder,int * has_decimal)438 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
439              Py_ssize_t *n_remainder, int *has_decimal)
440 {
441     Py_ssize_t remainder;
442     int kind = PyUnicode_KIND(s);
443     void *data = PyUnicode_DATA(s);
444 
445     while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
446         ++pos;
447     remainder = pos;
448 
449     /* Does remainder start with a decimal point? */
450     *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
451 
452     /* Skip the decimal point. */
453     if (*has_decimal)
454         remainder++;
455 
456     *n_remainder = end - remainder;
457 }
458 
459 /* not all fields of format are used.  for example, precision is
460    unused.  should this take discrete params in order to be more clear
461    about what it does?  or is passing a single format parameter easier
462    and more efficient enough to justify a little obfuscation? */
463 static Py_ssize_t
calc_number_widths(NumberFieldWidths * spec,Py_ssize_t n_prefix,Py_UCS4 sign_char,PyObject * number,Py_ssize_t n_start,Py_ssize_t n_end,Py_ssize_t n_remainder,int has_decimal,const LocaleInfo * locale,const InternalFormatSpec * format,Py_UCS4 * maxchar)464 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
465                    Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
466                    Py_ssize_t n_end, Py_ssize_t n_remainder,
467                    int has_decimal, const LocaleInfo *locale,
468                    const InternalFormatSpec *format, Py_UCS4 *maxchar)
469 {
470     Py_ssize_t n_non_digit_non_padding;
471     Py_ssize_t n_padding;
472 
473     spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
474     spec->n_lpadding = 0;
475     spec->n_prefix = n_prefix;
476     spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
477     spec->n_remainder = n_remainder;
478     spec->n_spadding = 0;
479     spec->n_rpadding = 0;
480     spec->sign = '\0';
481     spec->n_sign = 0;
482 
483     /* the output will look like:
484        |                                                                                         |
485        | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
486        |                                                                                         |
487 
488        sign is computed from format->sign and the actual
489        sign of the number
490 
491        prefix is given (it's for the '0x' prefix)
492 
493        digits is already known
494 
495        the total width is either given, or computed from the
496        actual digits
497 
498        only one of lpadding, spadding, and rpadding can be non-zero,
499        and it's calculated from the width and other fields
500     */
501 
502     /* compute the various parts we're going to write */
503     switch (format->sign) {
504     case '+':
505         /* always put a + or - */
506         spec->n_sign = 1;
507         spec->sign = (sign_char == '-' ? '-' : '+');
508         break;
509     case ' ':
510         spec->n_sign = 1;
511         spec->sign = (sign_char == '-' ? '-' : ' ');
512         break;
513     default:
514         /* Not specified, or the default (-) */
515         if (sign_char == '-') {
516             spec->n_sign = 1;
517             spec->sign = '-';
518         }
519     }
520 
521     /* The number of chars used for non-digits and non-padding. */
522     n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
523         spec->n_remainder;
524 
525     /* min_width can go negative, that's okay. format->width == -1 means
526        we don't care. */
527     if (format->fill_char == '0' && format->align == '=')
528         spec->n_min_width = format->width - n_non_digit_non_padding;
529     else
530         spec->n_min_width = 0;
531 
532     if (spec->n_digits == 0)
533         /* This case only occurs when using 'c' formatting, we need
534            to special case it because the grouping code always wants
535            to have at least one character. */
536         spec->n_grouped_digits = 0;
537     else {
538         Py_UCS4 grouping_maxchar;
539         spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
540             NULL, 0,
541             0, NULL,
542             spec->n_digits, spec->n_min_width,
543             locale->grouping, locale->thousands_sep, &grouping_maxchar);
544         *maxchar = Py_MAX(*maxchar, grouping_maxchar);
545     }
546 
547     /* Given the desired width and the total of digit and non-digit
548        space we consume, see if we need any padding. format->width can
549        be negative (meaning no padding), but this code still works in
550        that case. */
551     n_padding = format->width -
552                         (n_non_digit_non_padding + spec->n_grouped_digits);
553     if (n_padding > 0) {
554         /* Some padding is needed. Determine if it's left, space, or right. */
555         switch (format->align) {
556         case '<':
557             spec->n_rpadding = n_padding;
558             break;
559         case '^':
560             spec->n_lpadding = n_padding / 2;
561             spec->n_rpadding = n_padding - spec->n_lpadding;
562             break;
563         case '=':
564             spec->n_spadding = n_padding;
565             break;
566         case '>':
567             spec->n_lpadding = n_padding;
568             break;
569         default:
570             /* Shouldn't get here, but treat it as '>' */
571             spec->n_lpadding = n_padding;
572             assert(0);
573             break;
574         }
575     }
576 
577     if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
578         *maxchar = Py_MAX(*maxchar, format->fill_char);
579 
580     if (spec->n_decimal)
581         *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
582 
583     return spec->n_lpadding + spec->n_sign + spec->n_prefix +
584         spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
585         spec->n_remainder + spec->n_rpadding;
586 }
587 
588 /* Fill in the digit parts of a numbers's string representation,
589    as determined in calc_number_widths().
590    Return -1 on error, or 0 on success. */
591 static int
fill_number(_PyUnicodeWriter * writer,const NumberFieldWidths * spec,PyObject * digits,Py_ssize_t d_start,Py_ssize_t d_end,PyObject * prefix,Py_ssize_t p_start,Py_UCS4 fill_char,LocaleInfo * locale,int toupper)592 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
593             PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
594             PyObject *prefix, Py_ssize_t p_start,
595             Py_UCS4 fill_char,
596             LocaleInfo *locale, int toupper)
597 {
598     /* Used to keep track of digits, decimal, and remainder. */
599     Py_ssize_t d_pos = d_start;
600     const unsigned int kind = writer->kind;
601     const void *data = writer->data;
602     Py_ssize_t r;
603 
604     if (spec->n_lpadding) {
605         _PyUnicode_FastFill(writer->buffer,
606                             writer->pos, spec->n_lpadding, fill_char);
607         writer->pos += spec->n_lpadding;
608     }
609     if (spec->n_sign == 1) {
610         PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
611         writer->pos++;
612     }
613     if (spec->n_prefix) {
614         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
615                                       prefix, p_start,
616                                       spec->n_prefix);
617         if (toupper) {
618             Py_ssize_t t;
619             for (t = 0; t < spec->n_prefix; t++) {
620                 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
621                 c = Py_TOUPPER(c);
622                 assert (c <= 127);
623                 PyUnicode_WRITE(kind, data, writer->pos + t, c);
624             }
625         }
626         writer->pos += spec->n_prefix;
627     }
628     if (spec->n_spadding) {
629         _PyUnicode_FastFill(writer->buffer,
630                             writer->pos, spec->n_spadding, fill_char);
631         writer->pos += spec->n_spadding;
632     }
633 
634     /* Only for type 'c' special case, it has no digits. */
635     if (spec->n_digits != 0) {
636         /* Fill the digits with InsertThousandsGrouping. */
637         char *pdigits;
638         if (PyUnicode_READY(digits))
639             return -1;
640         pdigits = PyUnicode_DATA(digits);
641         if (PyUnicode_KIND(digits) < kind) {
642             pdigits = _PyUnicode_AsKind(digits, kind);
643             if (pdigits == NULL)
644                 return -1;
645         }
646         r = _PyUnicode_InsertThousandsGrouping(
647                 writer->buffer, writer->pos,
648                 spec->n_grouped_digits,
649                 pdigits + kind * d_pos,
650                 spec->n_digits, spec->n_min_width,
651                 locale->grouping, locale->thousands_sep, NULL);
652         if (r == -1)
653             return -1;
654         assert(r == spec->n_grouped_digits);
655         if (PyUnicode_KIND(digits) < kind)
656             PyMem_Free(pdigits);
657         d_pos += spec->n_digits;
658     }
659     if (toupper) {
660         Py_ssize_t t;
661         for (t = 0; t < spec->n_grouped_digits; t++) {
662             Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
663             c = Py_TOUPPER(c);
664             if (c > 127) {
665                 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
666                 return -1;
667             }
668             PyUnicode_WRITE(kind, data, writer->pos + t, c);
669         }
670     }
671     writer->pos += spec->n_grouped_digits;
672 
673     if (spec->n_decimal) {
674         _PyUnicode_FastCopyCharacters(
675             writer->buffer, writer->pos,
676             locale->decimal_point, 0, spec->n_decimal);
677         writer->pos += spec->n_decimal;
678         d_pos += 1;
679     }
680 
681     if (spec->n_remainder) {
682         _PyUnicode_FastCopyCharacters(
683             writer->buffer, writer->pos,
684             digits, d_pos, spec->n_remainder);
685         writer->pos += spec->n_remainder;
686         /* d_pos += spec->n_remainder; */
687     }
688 
689     if (spec->n_rpadding) {
690         _PyUnicode_FastFill(writer->buffer,
691                             writer->pos, spec->n_rpadding,
692                             fill_char);
693         writer->pos += spec->n_rpadding;
694     }
695     return 0;
696 }
697 
698 static const char no_grouping[1] = {CHAR_MAX};
699 
700 /* Find the decimal point character(s?), thousands_separator(s?), and
701    grouping description, either for the current locale if type is
702    LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
703    LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
704 static int
get_locale_info(enum LocaleType type,LocaleInfo * locale_info)705 get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
706 {
707     switch (type) {
708     case LT_CURRENT_LOCALE: {
709         struct lconv *locale_data = localeconv();
710         locale_info->decimal_point = PyUnicode_DecodeLocale(
711                                          locale_data->decimal_point,
712                                          NULL);
713         if (locale_info->decimal_point == NULL)
714             return -1;
715         locale_info->thousands_sep = PyUnicode_DecodeLocale(
716                                          locale_data->thousands_sep,
717                                          NULL);
718         if (locale_info->thousands_sep == NULL)
719             return -1;
720         locale_info->grouping = locale_data->grouping;
721         break;
722     }
723     case LT_DEFAULT_LOCALE:
724     case LT_UNDERSCORE_LOCALE:
725     case LT_UNDER_FOUR_LOCALE:
726         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
727         locale_info->thousands_sep = PyUnicode_FromOrdinal(
728             type == LT_DEFAULT_LOCALE ? ',' : '_');
729         if (!locale_info->decimal_point || !locale_info->thousands_sep)
730             return -1;
731         if (type != LT_UNDER_FOUR_LOCALE)
732             locale_info->grouping = "\3"; /* Group every 3 characters.  The
733                                          (implicit) trailing 0 means repeat
734                                          infinitely. */
735         else
736             locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
737         break;
738     case LT_NO_LOCALE:
739         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
740         locale_info->thousands_sep = PyUnicode_New(0, 0);
741         if (!locale_info->decimal_point || !locale_info->thousands_sep)
742             return -1;
743         locale_info->grouping = no_grouping;
744         break;
745     }
746     return 0;
747 }
748 
749 static void
free_locale_info(LocaleInfo * locale_info)750 free_locale_info(LocaleInfo *locale_info)
751 {
752     Py_XDECREF(locale_info->decimal_point);
753     Py_XDECREF(locale_info->thousands_sep);
754 }
755 
756 /************************************************************************/
757 /*********** string formatting ******************************************/
758 /************************************************************************/
759 
760 static int
format_string_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)761 format_string_internal(PyObject *value, const InternalFormatSpec *format,
762                        _PyUnicodeWriter *writer)
763 {
764     Py_ssize_t lpad;
765     Py_ssize_t rpad;
766     Py_ssize_t total;
767     Py_ssize_t len;
768     int result = -1;
769     Py_UCS4 maxchar;
770 
771     assert(PyUnicode_IS_READY(value));
772     len = PyUnicode_GET_LENGTH(value);
773 
774     /* sign is not allowed on strings */
775     if (format->sign != '\0') {
776         PyErr_SetString(PyExc_ValueError,
777                         "Sign not allowed in string format specifier");
778         goto done;
779     }
780 
781     /* alternate is not allowed on strings */
782     if (format->alternate) {
783         PyErr_SetString(PyExc_ValueError,
784                         "Alternate form (#) not allowed in string format "
785                         "specifier");
786         goto done;
787     }
788 
789     /* '=' alignment not allowed on strings */
790     if (format->align == '=') {
791         PyErr_SetString(PyExc_ValueError,
792                         "'=' alignment not allowed "
793                         "in string format specifier");
794         goto done;
795     }
796 
797     if ((format->width == -1 || format->width <= len)
798         && (format->precision == -1 || format->precision >= len)) {
799         /* Fast path */
800         return _PyUnicodeWriter_WriteStr(writer, value);
801     }
802 
803     /* if precision is specified, output no more that format.precision
804        characters */
805     if (format->precision >= 0 && len >= format->precision) {
806         len = format->precision;
807     }
808 
809     calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
810 
811     maxchar = writer->maxchar;
812     if (lpad != 0 || rpad != 0)
813         maxchar = Py_MAX(maxchar, format->fill_char);
814     if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
815         Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
816         maxchar = Py_MAX(maxchar, valmaxchar);
817     }
818 
819     /* allocate the resulting string */
820     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
821         goto done;
822 
823     /* Write into that space. First the padding. */
824     result = fill_padding(writer, len, format->fill_char, lpad, rpad);
825     if (result == -1)
826         goto done;
827 
828     /* Then the source string. */
829     if (len) {
830         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
831                                       value, 0, len);
832     }
833     writer->pos += (len + rpad);
834     result = 0;
835 
836 done:
837     return result;
838 }
839 
840 
841 /************************************************************************/
842 /*********** long formatting ********************************************/
843 /************************************************************************/
844 
845 static int
format_long_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)846 format_long_internal(PyObject *value, const InternalFormatSpec *format,
847                      _PyUnicodeWriter *writer)
848 {
849     int result = -1;
850     Py_UCS4 maxchar = 127;
851     PyObject *tmp = NULL;
852     Py_ssize_t inumeric_chars;
853     Py_UCS4 sign_char = '\0';
854     Py_ssize_t n_digits;       /* count of digits need from the computed
855                                   string */
856     Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
857                                    produces non-digits */
858     Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
859     Py_ssize_t n_total;
860     Py_ssize_t prefix = 0;
861     NumberFieldWidths spec;
862     long x;
863 
864     /* Locale settings, either from the actual locale or
865        from a hard-code pseudo-locale */
866     LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
867 
868     /* no precision allowed on integers */
869     if (format->precision != -1) {
870         PyErr_SetString(PyExc_ValueError,
871                         "Precision not allowed in integer format specifier");
872         goto done;
873     }
874 
875     /* special case for character formatting */
876     if (format->type == 'c') {
877         /* error to specify a sign */
878         if (format->sign != '\0') {
879             PyErr_SetString(PyExc_ValueError,
880                             "Sign not allowed with integer"
881                             " format specifier 'c'");
882             goto done;
883         }
884         /* error to request alternate format */
885         if (format->alternate) {
886             PyErr_SetString(PyExc_ValueError,
887                             "Alternate form (#) not allowed with integer"
888                             " format specifier 'c'");
889             goto done;
890         }
891 
892         /* taken from unicodeobject.c formatchar() */
893         /* Integer input truncated to a character */
894         x = PyLong_AsLong(value);
895         if (x == -1 && PyErr_Occurred())
896             goto done;
897         if (x < 0 || x > 0x10ffff) {
898             PyErr_SetString(PyExc_OverflowError,
899                             "%c arg not in range(0x110000)");
900             goto done;
901         }
902         tmp = PyUnicode_FromOrdinal(x);
903         inumeric_chars = 0;
904         n_digits = 1;
905         maxchar = Py_MAX(maxchar, (Py_UCS4)x);
906 
907         /* As a sort-of hack, we tell calc_number_widths that we only
908            have "remainder" characters. calc_number_widths thinks
909            these are characters that don't get formatted, only copied
910            into the output string. We do this for 'c' formatting,
911            because the characters are likely to be non-digits. */
912         n_remainder = 1;
913     }
914     else {
915         int base;
916         int leading_chars_to_skip = 0;  /* Number of characters added by
917                                            PyNumber_ToBase that we want to
918                                            skip over. */
919 
920         /* Compute the base and how many characters will be added by
921            PyNumber_ToBase */
922         switch (format->type) {
923         case 'b':
924             base = 2;
925             leading_chars_to_skip = 2; /* 0b */
926             break;
927         case 'o':
928             base = 8;
929             leading_chars_to_skip = 2; /* 0o */
930             break;
931         case 'x':
932         case 'X':
933             base = 16;
934             leading_chars_to_skip = 2; /* 0x */
935             break;
936         default:  /* shouldn't be needed, but stops a compiler warning */
937         case 'd':
938         case 'n':
939             base = 10;
940             break;
941         }
942 
943         if (format->sign != '+' && format->sign != ' '
944             && format->width == -1
945             && format->type != 'X' && format->type != 'n'
946             && !format->thousands_separators
947             && PyLong_CheckExact(value))
948         {
949             /* Fast path */
950             return _PyLong_FormatWriter(writer, value, base, format->alternate);
951         }
952 
953         /* The number of prefix chars is the same as the leading
954            chars to skip */
955         if (format->alternate)
956             n_prefix = leading_chars_to_skip;
957 
958         /* Do the hard part, converting to a string in a given base */
959         tmp = _PyLong_Format(value, base);
960         if (tmp == NULL || PyUnicode_READY(tmp) == -1)
961             goto done;
962 
963         inumeric_chars = 0;
964         n_digits = PyUnicode_GET_LENGTH(tmp);
965 
966         prefix = inumeric_chars;
967 
968         /* Is a sign character present in the output?  If so, remember it
969            and skip it */
970         if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
971             sign_char = '-';
972             ++prefix;
973             ++leading_chars_to_skip;
974         }
975 
976         /* Skip over the leading chars (0x, 0b, etc.) */
977         n_digits -= leading_chars_to_skip;
978         inumeric_chars += leading_chars_to_skip;
979     }
980 
981     /* Determine the grouping, separator, and decimal point, if any. */
982     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
983                         format->thousands_separators,
984                         &locale) == -1)
985         goto done;
986 
987     /* Calculate how much memory we'll need. */
988     n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
989                                  inumeric_chars + n_digits, n_remainder, 0,
990                                  &locale, format, &maxchar);
991 
992     /* Allocate the memory. */
993     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
994         goto done;
995 
996     /* Populate the memory. */
997     result = fill_number(writer, &spec,
998                          tmp, inumeric_chars, inumeric_chars + n_digits,
999                          tmp, prefix, format->fill_char,
1000                          &locale, format->type == 'X');
1001 
1002 done:
1003     Py_XDECREF(tmp);
1004     free_locale_info(&locale);
1005     return result;
1006 }
1007 
1008 /************************************************************************/
1009 /*********** float formatting *******************************************/
1010 /************************************************************************/
1011 
1012 /* much of this is taken from unicodeobject.c */
1013 static int
format_float_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1014 format_float_internal(PyObject *value,
1015                       const InternalFormatSpec *format,
1016                       _PyUnicodeWriter *writer)
1017 {
1018     char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
1019     Py_ssize_t n_digits;
1020     Py_ssize_t n_remainder;
1021     Py_ssize_t n_total;
1022     int has_decimal;
1023     double val;
1024     int precision, default_precision = 6;
1025     Py_UCS4 type = format->type;
1026     int add_pct = 0;
1027     Py_ssize_t index;
1028     NumberFieldWidths spec;
1029     int flags = 0;
1030     int result = -1;
1031     Py_UCS4 maxchar = 127;
1032     Py_UCS4 sign_char = '\0';
1033     int float_type; /* Used to see if we have a nan, inf, or regular float. */
1034     PyObject *unicode_tmp = NULL;
1035 
1036     /* Locale settings, either from the actual locale or
1037        from a hard-code pseudo-locale */
1038     LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
1039 
1040     if (format->precision > INT_MAX) {
1041         PyErr_SetString(PyExc_ValueError, "precision too big");
1042         goto done;
1043     }
1044     precision = (int)format->precision;
1045 
1046     if (format->alternate)
1047         flags |= Py_DTSF_ALT;
1048 
1049     if (type == '\0') {
1050         /* Omitted type specifier.  Behaves in the same way as repr(x)
1051            and str(x) if no precision is given, else like 'g', but with
1052            at least one digit after the decimal point. */
1053         flags |= Py_DTSF_ADD_DOT_0;
1054         type = 'r';
1055         default_precision = 0;
1056     }
1057 
1058     if (type == 'n')
1059         /* 'n' is the same as 'g', except for the locale used to
1060            format the result. We take care of that later. */
1061         type = 'g';
1062 
1063     val = PyFloat_AsDouble(value);
1064     if (val == -1.0 && PyErr_Occurred())
1065         goto done;
1066 
1067     if (type == '%') {
1068         type = 'f';
1069         val *= 100;
1070         add_pct = 1;
1071     }
1072 
1073     if (precision < 0)
1074         precision = default_precision;
1075     else if (type == 'r')
1076         type = 'g';
1077 
1078     /* Cast "type", because if we're in unicode we need to pass an
1079        8-bit char. This is safe, because we've restricted what "type"
1080        can be. */
1081     buf = PyOS_double_to_string(val, (char)type, precision, flags,
1082                                 &float_type);
1083     if (buf == NULL)
1084         goto done;
1085     n_digits = strlen(buf);
1086 
1087     if (add_pct) {
1088         /* We know that buf has a trailing zero (since we just called
1089            strlen() on it), and we don't use that fact any more. So we
1090            can just write over the trailing zero. */
1091         buf[n_digits] = '%';
1092         n_digits += 1;
1093     }
1094 
1095     if (format->sign != '+' && format->sign != ' '
1096         && format->width == -1
1097         && format->type != 'n'
1098         && !format->thousands_separators)
1099     {
1100         /* Fast path */
1101         result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1102         PyMem_Free(buf);
1103         return result;
1104     }
1105 
1106     /* Since there is no unicode version of PyOS_double_to_string,
1107        just use the 8 bit version and then convert to unicode. */
1108     unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1109     PyMem_Free(buf);
1110     if (unicode_tmp == NULL)
1111         goto done;
1112 
1113     /* Is a sign character present in the output?  If so, remember it
1114        and skip it */
1115     index = 0;
1116     if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1117         sign_char = '-';
1118         ++index;
1119         --n_digits;
1120     }
1121 
1122     /* Determine if we have any "remainder" (after the digits, might include
1123        decimal or exponent or both (or neither)) */
1124     parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1125 
1126     /* Determine the grouping, separator, and decimal point, if any. */
1127     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1128                         format->thousands_separators,
1129                         &locale) == -1)
1130         goto done;
1131 
1132     /* Calculate how much memory we'll need. */
1133     n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
1134                                  index + n_digits, n_remainder, has_decimal,
1135                                  &locale, format, &maxchar);
1136 
1137     /* Allocate the memory. */
1138     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1139         goto done;
1140 
1141     /* Populate the memory. */
1142     result = fill_number(writer, &spec,
1143                          unicode_tmp, index, index + n_digits,
1144                          NULL, 0, format->fill_char,
1145                          &locale, 0);
1146 
1147 done:
1148     Py_XDECREF(unicode_tmp);
1149     free_locale_info(&locale);
1150     return result;
1151 }
1152 
1153 /************************************************************************/
1154 /*********** complex formatting *****************************************/
1155 /************************************************************************/
1156 
1157 static int
format_complex_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1158 format_complex_internal(PyObject *value,
1159                         const InternalFormatSpec *format,
1160                         _PyUnicodeWriter *writer)
1161 {
1162     double re;
1163     double im;
1164     char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1165     char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1166 
1167     InternalFormatSpec tmp_format = *format;
1168     Py_ssize_t n_re_digits;
1169     Py_ssize_t n_im_digits;
1170     Py_ssize_t n_re_remainder;
1171     Py_ssize_t n_im_remainder;
1172     Py_ssize_t n_re_total;
1173     Py_ssize_t n_im_total;
1174     int re_has_decimal;
1175     int im_has_decimal;
1176     int precision, default_precision = 6;
1177     Py_UCS4 type = format->type;
1178     Py_ssize_t i_re;
1179     Py_ssize_t i_im;
1180     NumberFieldWidths re_spec;
1181     NumberFieldWidths im_spec;
1182     int flags = 0;
1183     int result = -1;
1184     Py_UCS4 maxchar = 127;
1185     enum PyUnicode_Kind rkind;
1186     void *rdata;
1187     Py_UCS4 re_sign_char = '\0';
1188     Py_UCS4 im_sign_char = '\0';
1189     int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1190     int im_float_type;
1191     int add_parens = 0;
1192     int skip_re = 0;
1193     Py_ssize_t lpad;
1194     Py_ssize_t rpad;
1195     Py_ssize_t total;
1196     PyObject *re_unicode_tmp = NULL;
1197     PyObject *im_unicode_tmp = NULL;
1198 
1199     /* Locale settings, either from the actual locale or
1200        from a hard-code pseudo-locale */
1201     LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
1202 
1203     if (format->precision > INT_MAX) {
1204         PyErr_SetString(PyExc_ValueError, "precision too big");
1205         goto done;
1206     }
1207     precision = (int)format->precision;
1208 
1209     /* Zero padding is not allowed. */
1210     if (format->fill_char == '0') {
1211         PyErr_SetString(PyExc_ValueError,
1212                         "Zero padding is not allowed in complex format "
1213                         "specifier");
1214         goto done;
1215     }
1216 
1217     /* Neither is '=' alignment . */
1218     if (format->align == '=') {
1219         PyErr_SetString(PyExc_ValueError,
1220                         "'=' alignment flag is not allowed in complex format "
1221                         "specifier");
1222         goto done;
1223     }
1224 
1225     re = PyComplex_RealAsDouble(value);
1226     if (re == -1.0 && PyErr_Occurred())
1227         goto done;
1228     im = PyComplex_ImagAsDouble(value);
1229     if (im == -1.0 && PyErr_Occurred())
1230         goto done;
1231 
1232     if (format->alternate)
1233         flags |= Py_DTSF_ALT;
1234 
1235     if (type == '\0') {
1236         /* Omitted type specifier. Should be like str(self). */
1237         type = 'r';
1238         default_precision = 0;
1239         if (re == 0.0 && copysign(1.0, re) == 1.0)
1240             skip_re = 1;
1241         else
1242             add_parens = 1;
1243     }
1244 
1245     if (type == 'n')
1246         /* 'n' is the same as 'g', except for the locale used to
1247            format the result. We take care of that later. */
1248         type = 'g';
1249 
1250     if (precision < 0)
1251         precision = default_precision;
1252     else if (type == 'r')
1253         type = 'g';
1254 
1255     /* Cast "type", because if we're in unicode we need to pass an
1256        8-bit char. This is safe, because we've restricted what "type"
1257        can be. */
1258     re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1259                                    &re_float_type);
1260     if (re_buf == NULL)
1261         goto done;
1262     im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1263                                    &im_float_type);
1264     if (im_buf == NULL)
1265         goto done;
1266 
1267     n_re_digits = strlen(re_buf);
1268     n_im_digits = strlen(im_buf);
1269 
1270     /* Since there is no unicode version of PyOS_double_to_string,
1271        just use the 8 bit version and then convert to unicode. */
1272     re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1273     if (re_unicode_tmp == NULL)
1274         goto done;
1275     i_re = 0;
1276 
1277     im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1278     if (im_unicode_tmp == NULL)
1279         goto done;
1280     i_im = 0;
1281 
1282     /* Is a sign character present in the output?  If so, remember it
1283        and skip it */
1284     if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1285         re_sign_char = '-';
1286         ++i_re;
1287         --n_re_digits;
1288     }
1289     if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1290         im_sign_char = '-';
1291         ++i_im;
1292         --n_im_digits;
1293     }
1294 
1295     /* Determine if we have any "remainder" (after the digits, might include
1296        decimal or exponent or both (or neither)) */
1297     parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1298                  &n_re_remainder, &re_has_decimal);
1299     parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1300                  &n_im_remainder, &im_has_decimal);
1301 
1302     /* Determine the grouping, separator, and decimal point, if any. */
1303     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1304                         format->thousands_separators,
1305                         &locale) == -1)
1306         goto done;
1307 
1308     /* Turn off any padding. We'll do it later after we've composed
1309        the numbers without padding. */
1310     tmp_format.fill_char = '\0';
1311     tmp_format.align = '<';
1312     tmp_format.width = -1;
1313 
1314     /* Calculate how much memory we'll need. */
1315     n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1316                                     i_re, i_re + n_re_digits, n_re_remainder,
1317                                     re_has_decimal, &locale, &tmp_format,
1318                                     &maxchar);
1319 
1320     /* Same formatting, but always include a sign, unless the real part is
1321      * going to be omitted, in which case we use whatever sign convention was
1322      * requested by the original format. */
1323     if (!skip_re)
1324         tmp_format.sign = '+';
1325     n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1326                                     i_im, i_im + n_im_digits, n_im_remainder,
1327                                     im_has_decimal, &locale, &tmp_format,
1328                                     &maxchar);
1329 
1330     if (skip_re)
1331         n_re_total = 0;
1332 
1333     /* Add 1 for the 'j', and optionally 2 for parens. */
1334     calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1335                  format->width, format->align, &lpad, &rpad, &total);
1336 
1337     if (lpad || rpad)
1338         maxchar = Py_MAX(maxchar, format->fill_char);
1339 
1340     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1341         goto done;
1342     rkind = writer->kind;
1343     rdata = writer->data;
1344 
1345     /* Populate the memory. First, the padding. */
1346     result = fill_padding(writer,
1347                           n_re_total + n_im_total + 1 + add_parens * 2,
1348                           format->fill_char, lpad, rpad);
1349     if (result == -1)
1350         goto done;
1351 
1352     if (add_parens) {
1353         PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1354         writer->pos++;
1355     }
1356 
1357     if (!skip_re) {
1358         result = fill_number(writer, &re_spec,
1359                              re_unicode_tmp, i_re, i_re + n_re_digits,
1360                              NULL, 0,
1361                              0,
1362                              &locale, 0);
1363         if (result == -1)
1364             goto done;
1365     }
1366     result = fill_number(writer, &im_spec,
1367                          im_unicode_tmp, i_im, i_im + n_im_digits,
1368                          NULL, 0,
1369                          0,
1370                          &locale, 0);
1371     if (result == -1)
1372         goto done;
1373     PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1374     writer->pos++;
1375 
1376     if (add_parens) {
1377         PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1378         writer->pos++;
1379     }
1380 
1381     writer->pos += rpad;
1382 
1383 done:
1384     PyMem_Free(re_buf);
1385     PyMem_Free(im_buf);
1386     Py_XDECREF(re_unicode_tmp);
1387     Py_XDECREF(im_unicode_tmp);
1388     free_locale_info(&locale);
1389     return result;
1390 }
1391 
1392 /************************************************************************/
1393 /*********** built in formatters ****************************************/
1394 /************************************************************************/
1395 static int
format_obj(PyObject * obj,_PyUnicodeWriter * writer)1396 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1397 {
1398     PyObject *str;
1399     int err;
1400 
1401     str = PyObject_Str(obj);
1402     if (str == NULL)
1403         return -1;
1404     err = _PyUnicodeWriter_WriteStr(writer, str);
1405     Py_DECREF(str);
1406     return err;
1407 }
1408 
1409 int
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1410 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1411                                 PyObject *obj,
1412                                 PyObject *format_spec,
1413                                 Py_ssize_t start, Py_ssize_t end)
1414 {
1415     InternalFormatSpec format;
1416 
1417     assert(PyUnicode_Check(obj));
1418 
1419     /* check for the special case of zero length format spec, make
1420        it equivalent to str(obj) */
1421     if (start == end) {
1422         if (PyUnicode_CheckExact(obj))
1423             return _PyUnicodeWriter_WriteStr(writer, obj);
1424         else
1425             return format_obj(obj, writer);
1426     }
1427 
1428     /* parse the format_spec */
1429     if (!parse_internal_render_format_spec(format_spec, start, end,
1430                                            &format, 's', '<'))
1431         return -1;
1432 
1433     /* type conversion? */
1434     switch (format.type) {
1435     case 's':
1436         /* no type conversion needed, already a string.  do the formatting */
1437         return format_string_internal(obj, &format, writer);
1438     default:
1439         /* unknown */
1440         unknown_presentation_type(format.type, obj->ob_type->tp_name);
1441         return -1;
1442     }
1443 }
1444 
1445 int
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1446 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1447                              PyObject *obj,
1448                              PyObject *format_spec,
1449                              Py_ssize_t start, Py_ssize_t end)
1450 {
1451     PyObject *tmp = NULL, *str = NULL;
1452     InternalFormatSpec format;
1453     int result = -1;
1454 
1455     /* check for the special case of zero length format spec, make
1456        it equivalent to str(obj) */
1457     if (start == end) {
1458         if (PyLong_CheckExact(obj))
1459             return _PyLong_FormatWriter(writer, obj, 10, 0);
1460         else
1461             return format_obj(obj, writer);
1462     }
1463 
1464     /* parse the format_spec */
1465     if (!parse_internal_render_format_spec(format_spec, start, end,
1466                                            &format, 'd', '>'))
1467         goto done;
1468 
1469     /* type conversion? */
1470     switch (format.type) {
1471     case 'b':
1472     case 'c':
1473     case 'd':
1474     case 'o':
1475     case 'x':
1476     case 'X':
1477     case 'n':
1478         /* no type conversion needed, already an int.  do the formatting */
1479         result = format_long_internal(obj, &format, writer);
1480         break;
1481 
1482     case 'e':
1483     case 'E':
1484     case 'f':
1485     case 'F':
1486     case 'g':
1487     case 'G':
1488     case '%':
1489         /* convert to float */
1490         tmp = PyNumber_Float(obj);
1491         if (tmp == NULL)
1492             goto done;
1493         result = format_float_internal(tmp, &format, writer);
1494         break;
1495 
1496     default:
1497         /* unknown */
1498         unknown_presentation_type(format.type, obj->ob_type->tp_name);
1499         goto done;
1500     }
1501 
1502 done:
1503     Py_XDECREF(tmp);
1504     Py_XDECREF(str);
1505     return result;
1506 }
1507 
1508 int
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1509 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1510                               PyObject *obj,
1511                               PyObject *format_spec,
1512                               Py_ssize_t start, Py_ssize_t end)
1513 {
1514     InternalFormatSpec format;
1515 
1516     /* check for the special case of zero length format spec, make
1517        it equivalent to str(obj) */
1518     if (start == end)
1519         return format_obj(obj, writer);
1520 
1521     /* parse the format_spec */
1522     if (!parse_internal_render_format_spec(format_spec, start, end,
1523                                            &format, '\0', '>'))
1524         return -1;
1525 
1526     /* type conversion? */
1527     switch (format.type) {
1528     case '\0': /* No format code: like 'g', but with at least one decimal. */
1529     case 'e':
1530     case 'E':
1531     case 'f':
1532     case 'F':
1533     case 'g':
1534     case 'G':
1535     case 'n':
1536     case '%':
1537         /* no conversion, already a float.  do the formatting */
1538         return format_float_internal(obj, &format, writer);
1539 
1540     default:
1541         /* unknown */
1542         unknown_presentation_type(format.type, obj->ob_type->tp_name);
1543         return -1;
1544     }
1545 }
1546 
1547 int
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1548 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1549                                 PyObject *obj,
1550                                 PyObject *format_spec,
1551                                 Py_ssize_t start, Py_ssize_t end)
1552 {
1553     InternalFormatSpec format;
1554 
1555     /* check for the special case of zero length format spec, make
1556        it equivalent to str(obj) */
1557     if (start == end)
1558         return format_obj(obj, writer);
1559 
1560     /* parse the format_spec */
1561     if (!parse_internal_render_format_spec(format_spec, start, end,
1562                                            &format, '\0', '>'))
1563         return -1;
1564 
1565     /* type conversion? */
1566     switch (format.type) {
1567     case '\0': /* No format code: like 'g', but with at least one decimal. */
1568     case 'e':
1569     case 'E':
1570     case 'f':
1571     case 'F':
1572     case 'g':
1573     case 'G':
1574     case 'n':
1575         /* no conversion, already a complex.  do the formatting */
1576         return format_complex_internal(obj, &format, writer);
1577 
1578     default:
1579         /* unknown */
1580         unknown_presentation_type(format.type, obj->ob_type->tp_name);
1581         return -1;
1582     }
1583 }
1584