1 /* implements the unicode (as opposed to string) version of the
2    built-in formatters for string, int, float.  that is, the versions
3    of int.__float__, etc., that take and return unicode objects */
4 
5 #include "Python.h"
6 #include <locale.h>
7 
8 /* Raises an exception about an unknown presentation type for this
9  * type. */
10 
11 static void
unknown_presentation_type(Py_UCS4 presentation_type,const char * type_name)12 unknown_presentation_type(Py_UCS4 presentation_type,
13                           const char* type_name)
14 {
15     /* %c might be out-of-range, hence the two cases. */
16     if (presentation_type > 32 && presentation_type < 128)
17         PyErr_Format(PyExc_ValueError,
18                      "Unknown format code '%c' "
19                      "for object of type '%.200s'",
20                      (char)presentation_type,
21                      type_name);
22     else
23         PyErr_Format(PyExc_ValueError,
24                      "Unknown format code '\\x%x' "
25                      "for object of type '%.200s'",
26                      (unsigned int)presentation_type,
27                      type_name);
28 }
29 
30 static void
invalid_thousands_separator_type(char specifier,Py_UCS4 presentation_type)31 invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
32 {
33     assert(specifier == ',' || specifier == '_');
34     if (presentation_type > 32 && presentation_type < 128)
35         PyErr_Format(PyExc_ValueError,
36                      "Cannot specify '%c' with '%c'.",
37                      specifier, (char)presentation_type);
38     else
39         PyErr_Format(PyExc_ValueError,
40                      "Cannot specify '%c' with '\\x%x'.",
41                      specifier, (unsigned int)presentation_type);
42 }
43 
44 static void
invalid_comma_and_underscore(void)45 invalid_comma_and_underscore(void)
46 {
47     PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
48 }
49 
50 /*
51     get_integer consumes 0 or more decimal digit characters from an
52     input string, updates *result with the corresponding positive
53     integer, and returns the number of digits consumed.
54 
55     returns -1 on error.
56 */
57 static int
get_integer(PyObject * str,Py_ssize_t * ppos,Py_ssize_t end,Py_ssize_t * result)58 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
59                   Py_ssize_t *result)
60 {
61     Py_ssize_t accumulator, digitval, pos = *ppos;
62     int numdigits;
63     int kind = PyUnicode_KIND(str);
64     void *data = PyUnicode_DATA(str);
65 
66     accumulator = numdigits = 0;
67     for (; pos < end; pos++, numdigits++) {
68         digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
69         if (digitval < 0)
70             break;
71         /*
72            Detect possible overflow before it happens:
73 
74               accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
75               accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
76         */
77         if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
78             PyErr_Format(PyExc_ValueError,
79                          "Too many decimal digits in format string");
80             *ppos = pos;
81             return -1;
82         }
83         accumulator = accumulator * 10 + digitval;
84     }
85     *ppos = pos;
86     *result = accumulator;
87     return numdigits;
88 }
89 
90 /************************************************************************/
91 /*********** standard format specifier parsing **************************/
92 /************************************************************************/
93 
94 /* returns true if this character is a specifier alignment token */
95 Py_LOCAL_INLINE(int)
is_alignment_token(Py_UCS4 c)96 is_alignment_token(Py_UCS4 c)
97 {
98     switch (c) {
99     case '<': case '>': case '=': case '^':
100         return 1;
101     default:
102         return 0;
103     }
104 }
105 
106 /* returns true if this character is a sign element */
107 Py_LOCAL_INLINE(int)
is_sign_element(Py_UCS4 c)108 is_sign_element(Py_UCS4 c)
109 {
110     switch (c) {
111     case ' ': case '+': case '-':
112         return 1;
113     default:
114         return 0;
115     }
116 }
117 
118 /* Locale type codes. LT_NO_LOCALE must be zero. */
119 enum LocaleType {
120     LT_NO_LOCALE = 0,
121     LT_DEFAULT_LOCALE = ',',
122     LT_UNDERSCORE_LOCALE = '_',
123     LT_UNDER_FOUR_LOCALE,
124     LT_CURRENT_LOCALE
125 };
126 
127 typedef struct {
128     Py_UCS4 fill_char;
129     Py_UCS4 align;
130     int alternate;
131     Py_UCS4 sign;
132     Py_ssize_t width;
133     enum LocaleType thousands_separators;
134     Py_ssize_t precision;
135     Py_UCS4 type;
136 } InternalFormatSpec;
137 
138 #if 0
139 /* Occasionally useful for debugging. Should normally be commented out. */
140 static void
141 DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
142 {
143     printf("internal format spec: fill_char %d\n", format->fill_char);
144     printf("internal format spec: align %d\n", format->align);
145     printf("internal format spec: alternate %d\n", format->alternate);
146     printf("internal format spec: sign %d\n", format->sign);
147     printf("internal format spec: width %zd\n", format->width);
148     printf("internal format spec: thousands_separators %d\n",
149            format->thousands_separators);
150     printf("internal format spec: precision %zd\n", format->precision);
151     printf("internal format spec: type %c\n", format->type);
152     printf("\n");
153 }
154 #endif
155 
156 
157 /*
158   ptr points to the start of the format_spec, end points just past its end.
159   fills in format with the parsed information.
160   returns 1 on success, 0 on failure.
161   if failure, sets the exception
162 */
163 static int
parse_internal_render_format_spec(PyObject * format_spec,Py_ssize_t start,Py_ssize_t end,InternalFormatSpec * format,char default_type,char default_align)164 parse_internal_render_format_spec(PyObject *format_spec,
165                                   Py_ssize_t start, Py_ssize_t end,
166                                   InternalFormatSpec *format,
167                                   char default_type,
168                                   char default_align)
169 {
170     Py_ssize_t pos = start;
171     int kind = PyUnicode_KIND(format_spec);
172     void *data = PyUnicode_DATA(format_spec);
173     /* end-pos is used throughout this code to specify the length of
174        the input string */
175 #define READ_spec(index) PyUnicode_READ(kind, data, index)
176 
177     Py_ssize_t consumed;
178     int align_specified = 0;
179     int fill_char_specified = 0;
180 
181     format->fill_char = ' ';
182     format->align = default_align;
183     format->alternate = 0;
184     format->sign = '\0';
185     format->width = -1;
186     format->thousands_separators = LT_NO_LOCALE;
187     format->precision = -1;
188     format->type = default_type;
189 
190     /* If the second char is an alignment token,
191        then parse the fill char */
192     if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
193         format->align = READ_spec(pos+1);
194         format->fill_char = READ_spec(pos);
195         fill_char_specified = 1;
196         align_specified = 1;
197         pos += 2;
198     }
199     else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
200         format->align = READ_spec(pos);
201         align_specified = 1;
202         ++pos;
203     }
204 
205     /* Parse the various sign options */
206     if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
207         format->sign = READ_spec(pos);
208         ++pos;
209     }
210 
211     /* If the next character is #, we're in alternate mode.  This only
212        applies to integers. */
213     if (end-pos >= 1 && READ_spec(pos) == '#') {
214         format->alternate = 1;
215         ++pos;
216     }
217 
218     /* The special case for 0-padding (backwards compat) */
219     if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
220         format->fill_char = '0';
221         if (!align_specified) {
222             format->align = '=';
223         }
224         ++pos;
225     }
226 
227     consumed = get_integer(format_spec, &pos, end, &format->width);
228     if (consumed == -1)
229         /* Overflow error. Exception already set. */
230         return 0;
231 
232     /* If consumed is 0, we didn't consume any characters for the
233        width. In that case, reset the width to -1, because
234        get_integer() will have set it to zero. -1 is how we record
235        that the width wasn't specified. */
236     if (consumed == 0)
237         format->width = -1;
238 
239     /* Comma signifies add thousands separators */
240     if (end-pos && READ_spec(pos) == ',') {
241         format->thousands_separators = LT_DEFAULT_LOCALE;
242         ++pos;
243     }
244     /* Underscore signifies add thousands separators */
245     if (end-pos && READ_spec(pos) == '_') {
246         if (format->thousands_separators != LT_NO_LOCALE) {
247             invalid_comma_and_underscore();
248             return 0;
249         }
250         format->thousands_separators = LT_UNDERSCORE_LOCALE;
251         ++pos;
252     }
253     if (end-pos && READ_spec(pos) == ',') {
254         invalid_comma_and_underscore();
255         return 0;
256     }
257 
258     /* Parse field precision */
259     if (end-pos && READ_spec(pos) == '.') {
260         ++pos;
261 
262         consumed = get_integer(format_spec, &pos, end, &format->precision);
263         if (consumed == -1)
264             /* Overflow error. Exception already set. */
265             return 0;
266 
267         /* Not having a precision after a dot is an error. */
268         if (consumed == 0) {
269             PyErr_Format(PyExc_ValueError,
270                          "Format specifier missing precision");
271             return 0;
272         }
273 
274     }
275 
276     /* Finally, parse the type field. */
277 
278     if (end-pos > 1) {
279         /* More than one char remain, invalid format specifier. */
280         PyErr_Format(PyExc_ValueError, "Invalid format specifier");
281         return 0;
282     }
283 
284     if (end-pos == 1) {
285         format->type = READ_spec(pos);
286         ++pos;
287     }
288 
289     /* Do as much validating as we can, just by looking at the format
290        specifier.  Do not take into account what type of formatting
291        we're doing (int, float, string). */
292 
293     if (format->thousands_separators) {
294         switch (format->type) {
295         case 'd':
296         case 'e':
297         case 'f':
298         case 'g':
299         case 'E':
300         case 'G':
301         case '%':
302         case 'F':
303         case '\0':
304             /* These are allowed. See PEP 378.*/
305             break;
306         case 'b':
307         case 'o':
308         case 'x':
309         case 'X':
310             /* Underscores are allowed in bin/oct/hex. See PEP 515. */
311             if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
312                 /* Every four digits, not every three, in bin/oct/hex. */
313                 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
314                 break;
315             }
316             /* fall through */
317         default:
318             invalid_thousands_separator_type(format->thousands_separators, format->type);
319             return 0;
320         }
321     }
322 
323     assert (format->align <= 127);
324     assert (format->sign <= 127);
325     return 1;
326 }
327 
328 /* Calculate the padding needed. */
329 static void
calc_padding(Py_ssize_t nchars,Py_ssize_t width,Py_UCS4 align,Py_ssize_t * n_lpadding,Py_ssize_t * n_rpadding,Py_ssize_t * n_total)330 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
331              Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
332              Py_ssize_t *n_total)
333 {
334     if (width >= 0) {
335         if (nchars > width)
336             *n_total = nchars;
337         else
338             *n_total = width;
339     }
340     else {
341         /* not specified, use all of the chars and no more */
342         *n_total = nchars;
343     }
344 
345     /* Figure out how much leading space we need, based on the
346        aligning */
347     if (align == '>')
348         *n_lpadding = *n_total - nchars;
349     else if (align == '^')
350         *n_lpadding = (*n_total - nchars) / 2;
351     else if (align == '<' || align == '=')
352         *n_lpadding = 0;
353     else {
354         /* We should never have an unspecified alignment. */
355         Py_UNREACHABLE();
356     }
357 
358     *n_rpadding = *n_total - nchars - *n_lpadding;
359 }
360 
361 /* Do the padding, and return a pointer to where the caller-supplied
362    content goes. */
363 static int
fill_padding(_PyUnicodeWriter * writer,Py_ssize_t nchars,Py_UCS4 fill_char,Py_ssize_t n_lpadding,Py_ssize_t n_rpadding)364 fill_padding(_PyUnicodeWriter *writer,
365              Py_ssize_t nchars,
366              Py_UCS4 fill_char, Py_ssize_t n_lpadding,
367              Py_ssize_t n_rpadding)
368 {
369     Py_ssize_t pos;
370 
371     /* Pad on left. */
372     if (n_lpadding) {
373         pos = writer->pos;
374         _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
375     }
376 
377     /* Pad on right. */
378     if (n_rpadding) {
379         pos = writer->pos + nchars + n_lpadding;
380         _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
381     }
382 
383     /* Pointer to the user content. */
384     writer->pos += n_lpadding;
385     return 0;
386 }
387 
388 /************************************************************************/
389 /*********** common routines for numeric formatting *********************/
390 /************************************************************************/
391 
392 /* Locale info needed for formatting integers and the part of floats
393    before and including the decimal. Note that locales only support
394    8-bit chars, not unicode. */
395 typedef struct {
396     PyObject *decimal_point;
397     PyObject *thousands_sep;
398     const char *grouping;
399     char *grouping_buffer;
400 } LocaleInfo;
401 
402 #define STATIC_LOCALE_INFO_INIT {0, 0, 0, 0}
403 
404 /* describes the layout for an integer, see the comment in
405    calc_number_widths() for details */
406 typedef struct {
407     Py_ssize_t n_lpadding;
408     Py_ssize_t n_prefix;
409     Py_ssize_t n_spadding;
410     Py_ssize_t n_rpadding;
411     char sign;
412     Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
413     Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
414                                     any grouping chars. */
415     Py_ssize_t n_decimal;   /* 0 if only an integer */
416     Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
417                                excluding the decimal itself, if
418                                present. */
419 
420     /* These 2 are not the widths of fields, but are needed by
421        STRINGLIB_GROUPING. */
422     Py_ssize_t n_digits;    /* The number of digits before a decimal
423                                or exponent. */
424     Py_ssize_t n_min_width; /* The min_width we used when we computed
425                                the n_grouped_digits width. */
426 } NumberFieldWidths;
427 
428 
429 /* Given a number of the form:
430    digits[remainder]
431    where ptr points to the start and end points to the end, find where
432     the integer part ends. This could be a decimal, an exponent, both,
433     or neither.
434    If a decimal point is present, set *has_decimal and increment
435     remainder beyond it.
436    Results are undefined (but shouldn't crash) for improperly
437     formatted strings.
438 */
439 static void
parse_number(PyObject * s,Py_ssize_t pos,Py_ssize_t end,Py_ssize_t * n_remainder,int * has_decimal)440 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
441              Py_ssize_t *n_remainder, int *has_decimal)
442 {
443     Py_ssize_t remainder;
444     int kind = PyUnicode_KIND(s);
445     void *data = PyUnicode_DATA(s);
446 
447     while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
448         ++pos;
449     remainder = pos;
450 
451     /* Does remainder start with a decimal point? */
452     *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
453 
454     /* Skip the decimal point. */
455     if (*has_decimal)
456         remainder++;
457 
458     *n_remainder = end - remainder;
459 }
460 
461 /* not all fields of format are used.  for example, precision is
462    unused.  should this take discrete params in order to be more clear
463    about what it does?  or is passing a single format parameter easier
464    and more efficient enough to justify a little obfuscation?
465    Return -1 on error. */
466 static Py_ssize_t
calc_number_widths(NumberFieldWidths * spec,Py_ssize_t n_prefix,Py_UCS4 sign_char,PyObject * number,Py_ssize_t n_start,Py_ssize_t n_end,Py_ssize_t n_remainder,int has_decimal,const LocaleInfo * locale,const InternalFormatSpec * format,Py_UCS4 * maxchar)467 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
468                    Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
469                    Py_ssize_t n_end, Py_ssize_t n_remainder,
470                    int has_decimal, const LocaleInfo *locale,
471                    const InternalFormatSpec *format, Py_UCS4 *maxchar)
472 {
473     Py_ssize_t n_non_digit_non_padding;
474     Py_ssize_t n_padding;
475 
476     spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
477     spec->n_lpadding = 0;
478     spec->n_prefix = n_prefix;
479     spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
480     spec->n_remainder = n_remainder;
481     spec->n_spadding = 0;
482     spec->n_rpadding = 0;
483     spec->sign = '\0';
484     spec->n_sign = 0;
485 
486     /* the output will look like:
487        |                                                                                         |
488        | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
489        |                                                                                         |
490 
491        sign is computed from format->sign and the actual
492        sign of the number
493 
494        prefix is given (it's for the '0x' prefix)
495 
496        digits is already known
497 
498        the total width is either given, or computed from the
499        actual digits
500 
501        only one of lpadding, spadding, and rpadding can be non-zero,
502        and it's calculated from the width and other fields
503     */
504 
505     /* compute the various parts we're going to write */
506     switch (format->sign) {
507     case '+':
508         /* always put a + or - */
509         spec->n_sign = 1;
510         spec->sign = (sign_char == '-' ? '-' : '+');
511         break;
512     case ' ':
513         spec->n_sign = 1;
514         spec->sign = (sign_char == '-' ? '-' : ' ');
515         break;
516     default:
517         /* Not specified, or the default (-) */
518         if (sign_char == '-') {
519             spec->n_sign = 1;
520             spec->sign = '-';
521         }
522     }
523 
524     /* The number of chars used for non-digits and non-padding. */
525     n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
526         spec->n_remainder;
527 
528     /* min_width can go negative, that's okay. format->width == -1 means
529        we don't care. */
530     if (format->fill_char == '0' && format->align == '=')
531         spec->n_min_width = format->width - n_non_digit_non_padding;
532     else
533         spec->n_min_width = 0;
534 
535     if (spec->n_digits == 0)
536         /* This case only occurs when using 'c' formatting, we need
537            to special case it because the grouping code always wants
538            to have at least one character. */
539         spec->n_grouped_digits = 0;
540     else {
541         Py_UCS4 grouping_maxchar;
542         spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
543             NULL, 0,
544             NULL, 0, spec->n_digits,
545             spec->n_min_width,
546             locale->grouping, locale->thousands_sep, &grouping_maxchar);
547         if (spec->n_grouped_digits == -1) {
548             return -1;
549         }
550         *maxchar = Py_MAX(*maxchar, grouping_maxchar);
551     }
552 
553     /* Given the desired width and the total of digit and non-digit
554        space we consume, see if we need any padding. format->width can
555        be negative (meaning no padding), but this code still works in
556        that case. */
557     n_padding = format->width -
558                         (n_non_digit_non_padding + spec->n_grouped_digits);
559     if (n_padding > 0) {
560         /* Some padding is needed. Determine if it's left, space, or right. */
561         switch (format->align) {
562         case '<':
563             spec->n_rpadding = n_padding;
564             break;
565         case '^':
566             spec->n_lpadding = n_padding / 2;
567             spec->n_rpadding = n_padding - spec->n_lpadding;
568             break;
569         case '=':
570             spec->n_spadding = n_padding;
571             break;
572         case '>':
573             spec->n_lpadding = n_padding;
574             break;
575         default:
576             /* Shouldn't get here, but treat it as '>' */
577             Py_UNREACHABLE();
578         }
579     }
580 
581     if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
582         *maxchar = Py_MAX(*maxchar, format->fill_char);
583 
584     if (spec->n_decimal)
585         *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
586 
587     return spec->n_lpadding + spec->n_sign + spec->n_prefix +
588         spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
589         spec->n_remainder + spec->n_rpadding;
590 }
591 
592 /* Fill in the digit parts of a numbers's string representation,
593    as determined in calc_number_widths().
594    Return -1 on error, or 0 on success. */
595 static int
fill_number(_PyUnicodeWriter * writer,const NumberFieldWidths * spec,PyObject * digits,Py_ssize_t d_start,Py_ssize_t d_end,PyObject * prefix,Py_ssize_t p_start,Py_UCS4 fill_char,LocaleInfo * locale,int toupper)596 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
597             PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
598             PyObject *prefix, Py_ssize_t p_start,
599             Py_UCS4 fill_char,
600             LocaleInfo *locale, int toupper)
601 {
602     /* Used to keep track of digits, decimal, and remainder. */
603     Py_ssize_t d_pos = d_start;
604     const unsigned int kind = writer->kind;
605     const void *data = writer->data;
606     Py_ssize_t r;
607 
608     if (spec->n_lpadding) {
609         _PyUnicode_FastFill(writer->buffer,
610                             writer->pos, spec->n_lpadding, fill_char);
611         writer->pos += spec->n_lpadding;
612     }
613     if (spec->n_sign == 1) {
614         PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
615         writer->pos++;
616     }
617     if (spec->n_prefix) {
618         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
619                                       prefix, p_start,
620                                       spec->n_prefix);
621         if (toupper) {
622             Py_ssize_t t;
623             for (t = 0; t < spec->n_prefix; t++) {
624                 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
625                 c = Py_TOUPPER(c);
626                 assert (c <= 127);
627                 PyUnicode_WRITE(kind, data, writer->pos + t, c);
628             }
629         }
630         writer->pos += spec->n_prefix;
631     }
632     if (spec->n_spadding) {
633         _PyUnicode_FastFill(writer->buffer,
634                             writer->pos, spec->n_spadding, fill_char);
635         writer->pos += spec->n_spadding;
636     }
637 
638     /* Only for type 'c' special case, it has no digits. */
639     if (spec->n_digits != 0) {
640         /* Fill the digits with InsertThousandsGrouping. */
641         r = _PyUnicode_InsertThousandsGrouping(
642                 writer, spec->n_grouped_digits,
643                 digits, d_pos, spec->n_digits,
644                 spec->n_min_width,
645                 locale->grouping, locale->thousands_sep, NULL);
646         if (r == -1)
647             return -1;
648         assert(r == spec->n_grouped_digits);
649         d_pos += spec->n_digits;
650     }
651     if (toupper) {
652         Py_ssize_t t;
653         for (t = 0; t < spec->n_grouped_digits; t++) {
654             Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
655             c = Py_TOUPPER(c);
656             if (c > 127) {
657                 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
658                 return -1;
659             }
660             PyUnicode_WRITE(kind, data, writer->pos + t, c);
661         }
662     }
663     writer->pos += spec->n_grouped_digits;
664 
665     if (spec->n_decimal) {
666         _PyUnicode_FastCopyCharacters(
667             writer->buffer, writer->pos,
668             locale->decimal_point, 0, spec->n_decimal);
669         writer->pos += spec->n_decimal;
670         d_pos += 1;
671     }
672 
673     if (spec->n_remainder) {
674         _PyUnicode_FastCopyCharacters(
675             writer->buffer, writer->pos,
676             digits, d_pos, spec->n_remainder);
677         writer->pos += spec->n_remainder;
678         /* d_pos += spec->n_remainder; */
679     }
680 
681     if (spec->n_rpadding) {
682         _PyUnicode_FastFill(writer->buffer,
683                             writer->pos, spec->n_rpadding,
684                             fill_char);
685         writer->pos += spec->n_rpadding;
686     }
687     return 0;
688 }
689 
690 static const char no_grouping[1] = {CHAR_MAX};
691 
692 /* Find the decimal point character(s?), thousands_separator(s?), and
693    grouping description, either for the current locale if type is
694    LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
695    LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
696 static int
get_locale_info(enum LocaleType type,LocaleInfo * locale_info)697 get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
698 {
699     switch (type) {
700     case LT_CURRENT_LOCALE: {
701         const char *grouping;
702         if (_Py_GetLocaleconvNumeric(&locale_info->decimal_point,
703                                      &locale_info->thousands_sep,
704                                      &grouping) < 0) {
705             return -1;
706         }
707 
708         /* localeconv() grouping can become a dangling pointer or point
709            to a different string if another thread calls localeconv() during
710            the string formatting. Copy the string to avoid this risk. */
711         locale_info->grouping_buffer = _PyMem_Strdup(grouping);
712         if (locale_info->grouping_buffer == NULL) {
713             PyErr_NoMemory();
714             return -1;
715         }
716         locale_info->grouping = locale_info->grouping_buffer;
717         break;
718     }
719     case LT_DEFAULT_LOCALE:
720     case LT_UNDERSCORE_LOCALE:
721     case LT_UNDER_FOUR_LOCALE:
722         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
723         locale_info->thousands_sep = PyUnicode_FromOrdinal(
724             type == LT_DEFAULT_LOCALE ? ',' : '_');
725         if (!locale_info->decimal_point || !locale_info->thousands_sep)
726             return -1;
727         if (type != LT_UNDER_FOUR_LOCALE)
728             locale_info->grouping = "\3"; /* Group every 3 characters.  The
729                                          (implicit) trailing 0 means repeat
730                                          infinitely. */
731         else
732             locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
733         break;
734     case LT_NO_LOCALE:
735         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
736         locale_info->thousands_sep = PyUnicode_New(0, 0);
737         if (!locale_info->decimal_point || !locale_info->thousands_sep)
738             return -1;
739         locale_info->grouping = no_grouping;
740         break;
741     }
742     return 0;
743 }
744 
745 static void
free_locale_info(LocaleInfo * locale_info)746 free_locale_info(LocaleInfo *locale_info)
747 {
748     Py_XDECREF(locale_info->decimal_point);
749     Py_XDECREF(locale_info->thousands_sep);
750     PyMem_Free(locale_info->grouping_buffer);
751 }
752 
753 /************************************************************************/
754 /*********** string formatting ******************************************/
755 /************************************************************************/
756 
757 static int
format_string_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)758 format_string_internal(PyObject *value, const InternalFormatSpec *format,
759                        _PyUnicodeWriter *writer)
760 {
761     Py_ssize_t lpad;
762     Py_ssize_t rpad;
763     Py_ssize_t total;
764     Py_ssize_t len;
765     int result = -1;
766     Py_UCS4 maxchar;
767 
768     assert(PyUnicode_IS_READY(value));
769     len = PyUnicode_GET_LENGTH(value);
770 
771     /* sign is not allowed on strings */
772     if (format->sign != '\0') {
773         PyErr_SetString(PyExc_ValueError,
774                         "Sign not allowed in string format specifier");
775         goto done;
776     }
777 
778     /* alternate is not allowed on strings */
779     if (format->alternate) {
780         PyErr_SetString(PyExc_ValueError,
781                         "Alternate form (#) not allowed in string format "
782                         "specifier");
783         goto done;
784     }
785 
786     /* '=' alignment not allowed on strings */
787     if (format->align == '=') {
788         PyErr_SetString(PyExc_ValueError,
789                         "'=' alignment not allowed "
790                         "in string format specifier");
791         goto done;
792     }
793 
794     if ((format->width == -1 || format->width <= len)
795         && (format->precision == -1 || format->precision >= len)) {
796         /* Fast path */
797         return _PyUnicodeWriter_WriteStr(writer, value);
798     }
799 
800     /* if precision is specified, output no more that format.precision
801        characters */
802     if (format->precision >= 0 && len >= format->precision) {
803         len = format->precision;
804     }
805 
806     calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
807 
808     maxchar = writer->maxchar;
809     if (lpad != 0 || rpad != 0)
810         maxchar = Py_MAX(maxchar, format->fill_char);
811     if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
812         Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
813         maxchar = Py_MAX(maxchar, valmaxchar);
814     }
815 
816     /* allocate the resulting string */
817     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
818         goto done;
819 
820     /* Write into that space. First the padding. */
821     result = fill_padding(writer, len, format->fill_char, lpad, rpad);
822     if (result == -1)
823         goto done;
824 
825     /* Then the source string. */
826     if (len) {
827         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
828                                       value, 0, len);
829     }
830     writer->pos += (len + rpad);
831     result = 0;
832 
833 done:
834     return result;
835 }
836 
837 
838 /************************************************************************/
839 /*********** long formatting ********************************************/
840 /************************************************************************/
841 
842 static int
format_long_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)843 format_long_internal(PyObject *value, const InternalFormatSpec *format,
844                      _PyUnicodeWriter *writer)
845 {
846     int result = -1;
847     Py_UCS4 maxchar = 127;
848     PyObject *tmp = NULL;
849     Py_ssize_t inumeric_chars;
850     Py_UCS4 sign_char = '\0';
851     Py_ssize_t n_digits;       /* count of digits need from the computed
852                                   string */
853     Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
854                                    produces non-digits */
855     Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
856     Py_ssize_t n_total;
857     Py_ssize_t prefix = 0;
858     NumberFieldWidths spec;
859     long x;
860 
861     /* Locale settings, either from the actual locale or
862        from a hard-code pseudo-locale */
863     LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
864 
865     /* no precision allowed on integers */
866     if (format->precision != -1) {
867         PyErr_SetString(PyExc_ValueError,
868                         "Precision not allowed in integer format specifier");
869         goto done;
870     }
871 
872     /* special case for character formatting */
873     if (format->type == 'c') {
874         /* error to specify a sign */
875         if (format->sign != '\0') {
876             PyErr_SetString(PyExc_ValueError,
877                             "Sign not allowed with integer"
878                             " format specifier 'c'");
879             goto done;
880         }
881         /* error to request alternate format */
882         if (format->alternate) {
883             PyErr_SetString(PyExc_ValueError,
884                             "Alternate form (#) not allowed with integer"
885                             " format specifier 'c'");
886             goto done;
887         }
888 
889         /* taken from unicodeobject.c formatchar() */
890         /* Integer input truncated to a character */
891         x = PyLong_AsLong(value);
892         if (x == -1 && PyErr_Occurred())
893             goto done;
894         if (x < 0 || x > 0x10ffff) {
895             PyErr_SetString(PyExc_OverflowError,
896                             "%c arg not in range(0x110000)");
897             goto done;
898         }
899         tmp = PyUnicode_FromOrdinal(x);
900         inumeric_chars = 0;
901         n_digits = 1;
902         maxchar = Py_MAX(maxchar, (Py_UCS4)x);
903 
904         /* As a sort-of hack, we tell calc_number_widths that we only
905            have "remainder" characters. calc_number_widths thinks
906            these are characters that don't get formatted, only copied
907            into the output string. We do this for 'c' formatting,
908            because the characters are likely to be non-digits. */
909         n_remainder = 1;
910     }
911     else {
912         int base;
913         int leading_chars_to_skip = 0;  /* Number of characters added by
914                                            PyNumber_ToBase that we want to
915                                            skip over. */
916 
917         /* Compute the base and how many characters will be added by
918            PyNumber_ToBase */
919         switch (format->type) {
920         case 'b':
921             base = 2;
922             leading_chars_to_skip = 2; /* 0b */
923             break;
924         case 'o':
925             base = 8;
926             leading_chars_to_skip = 2; /* 0o */
927             break;
928         case 'x':
929         case 'X':
930             base = 16;
931             leading_chars_to_skip = 2; /* 0x */
932             break;
933         default:  /* shouldn't be needed, but stops a compiler warning */
934         case 'd':
935         case 'n':
936             base = 10;
937             break;
938         }
939 
940         if (format->sign != '+' && format->sign != ' '
941             && format->width == -1
942             && format->type != 'X' && format->type != 'n'
943             && !format->thousands_separators
944             && PyLong_CheckExact(value))
945         {
946             /* Fast path */
947             return _PyLong_FormatWriter(writer, value, base, format->alternate);
948         }
949 
950         /* The number of prefix chars is the same as the leading
951            chars to skip */
952         if (format->alternate)
953             n_prefix = leading_chars_to_skip;
954 
955         /* Do the hard part, converting to a string in a given base */
956         tmp = _PyLong_Format(value, base);
957         if (tmp == NULL || PyUnicode_READY(tmp) == -1)
958             goto done;
959 
960         inumeric_chars = 0;
961         n_digits = PyUnicode_GET_LENGTH(tmp);
962 
963         prefix = inumeric_chars;
964 
965         /* Is a sign character present in the output?  If so, remember it
966            and skip it */
967         if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
968             sign_char = '-';
969             ++prefix;
970             ++leading_chars_to_skip;
971         }
972 
973         /* Skip over the leading chars (0x, 0b, etc.) */
974         n_digits -= leading_chars_to_skip;
975         inumeric_chars += leading_chars_to_skip;
976     }
977 
978     /* Determine the grouping, separator, and decimal point, if any. */
979     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
980                         format->thousands_separators,
981                         &locale) == -1)
982         goto done;
983 
984     /* Calculate how much memory we'll need. */
985     n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
986                                  inumeric_chars + n_digits, n_remainder, 0,
987                                  &locale, format, &maxchar);
988     if (n_total == -1) {
989         goto done;
990     }
991 
992     /* Allocate the memory. */
993     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
994         goto done;
995 
996     /* Populate the memory. */
997     result = fill_number(writer, &spec,
998                          tmp, inumeric_chars, inumeric_chars + n_digits,
999                          tmp, prefix, format->fill_char,
1000                          &locale, format->type == 'X');
1001 
1002 done:
1003     Py_XDECREF(tmp);
1004     free_locale_info(&locale);
1005     return result;
1006 }
1007 
1008 /************************************************************************/
1009 /*********** float formatting *******************************************/
1010 /************************************************************************/
1011 
1012 /* much of this is taken from unicodeobject.c */
1013 static int
format_float_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1014 format_float_internal(PyObject *value,
1015                       const InternalFormatSpec *format,
1016                       _PyUnicodeWriter *writer)
1017 {
1018     char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
1019     Py_ssize_t n_digits;
1020     Py_ssize_t n_remainder;
1021     Py_ssize_t n_total;
1022     int has_decimal;
1023     double val;
1024     int precision, default_precision = 6;
1025     Py_UCS4 type = format->type;
1026     int add_pct = 0;
1027     Py_ssize_t index;
1028     NumberFieldWidths spec;
1029     int flags = 0;
1030     int result = -1;
1031     Py_UCS4 maxchar = 127;
1032     Py_UCS4 sign_char = '\0';
1033     int float_type; /* Used to see if we have a nan, inf, or regular float. */
1034     PyObject *unicode_tmp = NULL;
1035 
1036     /* Locale settings, either from the actual locale or
1037        from a hard-code pseudo-locale */
1038     LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
1039 
1040     if (format->precision > INT_MAX) {
1041         PyErr_SetString(PyExc_ValueError, "precision too big");
1042         goto done;
1043     }
1044     precision = (int)format->precision;
1045 
1046     if (format->alternate)
1047         flags |= Py_DTSF_ALT;
1048 
1049     if (type == '\0') {
1050         /* Omitted type specifier.  Behaves in the same way as repr(x)
1051            and str(x) if no precision is given, else like 'g', but with
1052            at least one digit after the decimal point. */
1053         flags |= Py_DTSF_ADD_DOT_0;
1054         type = 'r';
1055         default_precision = 0;
1056     }
1057 
1058     if (type == 'n')
1059         /* 'n' is the same as 'g', except for the locale used to
1060            format the result. We take care of that later. */
1061         type = 'g';
1062 
1063     val = PyFloat_AsDouble(value);
1064     if (val == -1.0 && PyErr_Occurred())
1065         goto done;
1066 
1067     if (type == '%') {
1068         type = 'f';
1069         val *= 100;
1070         add_pct = 1;
1071     }
1072 
1073     if (precision < 0)
1074         precision = default_precision;
1075     else if (type == 'r')
1076         type = 'g';
1077 
1078     /* Cast "type", because if we're in unicode we need to pass an
1079        8-bit char. This is safe, because we've restricted what "type"
1080        can be. */
1081     buf = PyOS_double_to_string(val, (char)type, precision, flags,
1082                                 &float_type);
1083     if (buf == NULL)
1084         goto done;
1085     n_digits = strlen(buf);
1086 
1087     if (add_pct) {
1088         /* We know that buf has a trailing zero (since we just called
1089            strlen() on it), and we don't use that fact any more. So we
1090            can just write over the trailing zero. */
1091         buf[n_digits] = '%';
1092         n_digits += 1;
1093     }
1094 
1095     if (format->sign != '+' && format->sign != ' '
1096         && format->width == -1
1097         && format->type != 'n'
1098         && !format->thousands_separators)
1099     {
1100         /* Fast path */
1101         result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1102         PyMem_Free(buf);
1103         return result;
1104     }
1105 
1106     /* Since there is no unicode version of PyOS_double_to_string,
1107        just use the 8 bit version and then convert to unicode. */
1108     unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1109     PyMem_Free(buf);
1110     if (unicode_tmp == NULL)
1111         goto done;
1112 
1113     /* Is a sign character present in the output?  If so, remember it
1114        and skip it */
1115     index = 0;
1116     if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1117         sign_char = '-';
1118         ++index;
1119         --n_digits;
1120     }
1121 
1122     /* Determine if we have any "remainder" (after the digits, might include
1123        decimal or exponent or both (or neither)) */
1124     parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1125 
1126     /* Determine the grouping, separator, and decimal point, if any. */
1127     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1128                         format->thousands_separators,
1129                         &locale) == -1)
1130         goto done;
1131 
1132     /* Calculate how much memory we'll need. */
1133     n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
1134                                  index + n_digits, n_remainder, has_decimal,
1135                                  &locale, format, &maxchar);
1136     if (n_total == -1) {
1137         goto done;
1138     }
1139 
1140     /* Allocate the memory. */
1141     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1142         goto done;
1143 
1144     /* Populate the memory. */
1145     result = fill_number(writer, &spec,
1146                          unicode_tmp, index, index + n_digits,
1147                          NULL, 0, format->fill_char,
1148                          &locale, 0);
1149 
1150 done:
1151     Py_XDECREF(unicode_tmp);
1152     free_locale_info(&locale);
1153     return result;
1154 }
1155 
1156 /************************************************************************/
1157 /*********** complex formatting *****************************************/
1158 /************************************************************************/
1159 
1160 static int
format_complex_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1161 format_complex_internal(PyObject *value,
1162                         const InternalFormatSpec *format,
1163                         _PyUnicodeWriter *writer)
1164 {
1165     double re;
1166     double im;
1167     char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1168     char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1169 
1170     InternalFormatSpec tmp_format = *format;
1171     Py_ssize_t n_re_digits;
1172     Py_ssize_t n_im_digits;
1173     Py_ssize_t n_re_remainder;
1174     Py_ssize_t n_im_remainder;
1175     Py_ssize_t n_re_total;
1176     Py_ssize_t n_im_total;
1177     int re_has_decimal;
1178     int im_has_decimal;
1179     int precision, default_precision = 6;
1180     Py_UCS4 type = format->type;
1181     Py_ssize_t i_re;
1182     Py_ssize_t i_im;
1183     NumberFieldWidths re_spec;
1184     NumberFieldWidths im_spec;
1185     int flags = 0;
1186     int result = -1;
1187     Py_UCS4 maxchar = 127;
1188     enum PyUnicode_Kind rkind;
1189     void *rdata;
1190     Py_UCS4 re_sign_char = '\0';
1191     Py_UCS4 im_sign_char = '\0';
1192     int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1193     int im_float_type;
1194     int add_parens = 0;
1195     int skip_re = 0;
1196     Py_ssize_t lpad;
1197     Py_ssize_t rpad;
1198     Py_ssize_t total;
1199     PyObject *re_unicode_tmp = NULL;
1200     PyObject *im_unicode_tmp = NULL;
1201 
1202     /* Locale settings, either from the actual locale or
1203        from a hard-code pseudo-locale */
1204     LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
1205 
1206     if (format->precision > INT_MAX) {
1207         PyErr_SetString(PyExc_ValueError, "precision too big");
1208         goto done;
1209     }
1210     precision = (int)format->precision;
1211 
1212     /* Zero padding is not allowed. */
1213     if (format->fill_char == '0') {
1214         PyErr_SetString(PyExc_ValueError,
1215                         "Zero padding is not allowed in complex format "
1216                         "specifier");
1217         goto done;
1218     }
1219 
1220     /* Neither is '=' alignment . */
1221     if (format->align == '=') {
1222         PyErr_SetString(PyExc_ValueError,
1223                         "'=' alignment flag is not allowed in complex format "
1224                         "specifier");
1225         goto done;
1226     }
1227 
1228     re = PyComplex_RealAsDouble(value);
1229     if (re == -1.0 && PyErr_Occurred())
1230         goto done;
1231     im = PyComplex_ImagAsDouble(value);
1232     if (im == -1.0 && PyErr_Occurred())
1233         goto done;
1234 
1235     if (format->alternate)
1236         flags |= Py_DTSF_ALT;
1237 
1238     if (type == '\0') {
1239         /* Omitted type specifier. Should be like str(self). */
1240         type = 'r';
1241         default_precision = 0;
1242         if (re == 0.0 && copysign(1.0, re) == 1.0)
1243             skip_re = 1;
1244         else
1245             add_parens = 1;
1246     }
1247 
1248     if (type == 'n')
1249         /* 'n' is the same as 'g', except for the locale used to
1250            format the result. We take care of that later. */
1251         type = 'g';
1252 
1253     if (precision < 0)
1254         precision = default_precision;
1255     else if (type == 'r')
1256         type = 'g';
1257 
1258     /* Cast "type", because if we're in unicode we need to pass an
1259        8-bit char. This is safe, because we've restricted what "type"
1260        can be. */
1261     re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1262                                    &re_float_type);
1263     if (re_buf == NULL)
1264         goto done;
1265     im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1266                                    &im_float_type);
1267     if (im_buf == NULL)
1268         goto done;
1269 
1270     n_re_digits = strlen(re_buf);
1271     n_im_digits = strlen(im_buf);
1272 
1273     /* Since there is no unicode version of PyOS_double_to_string,
1274        just use the 8 bit version and then convert to unicode. */
1275     re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1276     if (re_unicode_tmp == NULL)
1277         goto done;
1278     i_re = 0;
1279 
1280     im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1281     if (im_unicode_tmp == NULL)
1282         goto done;
1283     i_im = 0;
1284 
1285     /* Is a sign character present in the output?  If so, remember it
1286        and skip it */
1287     if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1288         re_sign_char = '-';
1289         ++i_re;
1290         --n_re_digits;
1291     }
1292     if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1293         im_sign_char = '-';
1294         ++i_im;
1295         --n_im_digits;
1296     }
1297 
1298     /* Determine if we have any "remainder" (after the digits, might include
1299        decimal or exponent or both (or neither)) */
1300     parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1301                  &n_re_remainder, &re_has_decimal);
1302     parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1303                  &n_im_remainder, &im_has_decimal);
1304 
1305     /* Determine the grouping, separator, and decimal point, if any. */
1306     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1307                         format->thousands_separators,
1308                         &locale) == -1)
1309         goto done;
1310 
1311     /* Turn off any padding. We'll do it later after we've composed
1312        the numbers without padding. */
1313     tmp_format.fill_char = '\0';
1314     tmp_format.align = '<';
1315     tmp_format.width = -1;
1316 
1317     /* Calculate how much memory we'll need. */
1318     n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1319                                     i_re, i_re + n_re_digits, n_re_remainder,
1320                                     re_has_decimal, &locale, &tmp_format,
1321                                     &maxchar);
1322     if (n_re_total == -1) {
1323         goto done;
1324     }
1325 
1326     /* Same formatting, but always include a sign, unless the real part is
1327      * going to be omitted, in which case we use whatever sign convention was
1328      * requested by the original format. */
1329     if (!skip_re)
1330         tmp_format.sign = '+';
1331     n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1332                                     i_im, i_im + n_im_digits, n_im_remainder,
1333                                     im_has_decimal, &locale, &tmp_format,
1334                                     &maxchar);
1335     if (n_im_total == -1) {
1336         goto done;
1337     }
1338 
1339     if (skip_re)
1340         n_re_total = 0;
1341 
1342     /* Add 1 for the 'j', and optionally 2 for parens. */
1343     calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1344                  format->width, format->align, &lpad, &rpad, &total);
1345 
1346     if (lpad || rpad)
1347         maxchar = Py_MAX(maxchar, format->fill_char);
1348 
1349     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1350         goto done;
1351     rkind = writer->kind;
1352     rdata = writer->data;
1353 
1354     /* Populate the memory. First, the padding. */
1355     result = fill_padding(writer,
1356                           n_re_total + n_im_total + 1 + add_parens * 2,
1357                           format->fill_char, lpad, rpad);
1358     if (result == -1)
1359         goto done;
1360 
1361     if (add_parens) {
1362         PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1363         writer->pos++;
1364     }
1365 
1366     if (!skip_re) {
1367         result = fill_number(writer, &re_spec,
1368                              re_unicode_tmp, i_re, i_re + n_re_digits,
1369                              NULL, 0,
1370                              0,
1371                              &locale, 0);
1372         if (result == -1)
1373             goto done;
1374     }
1375     result = fill_number(writer, &im_spec,
1376                          im_unicode_tmp, i_im, i_im + n_im_digits,
1377                          NULL, 0,
1378                          0,
1379                          &locale, 0);
1380     if (result == -1)
1381         goto done;
1382     PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1383     writer->pos++;
1384 
1385     if (add_parens) {
1386         PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1387         writer->pos++;
1388     }
1389 
1390     writer->pos += rpad;
1391 
1392 done:
1393     PyMem_Free(re_buf);
1394     PyMem_Free(im_buf);
1395     Py_XDECREF(re_unicode_tmp);
1396     Py_XDECREF(im_unicode_tmp);
1397     free_locale_info(&locale);
1398     return result;
1399 }
1400 
1401 /************************************************************************/
1402 /*********** built in formatters ****************************************/
1403 /************************************************************************/
1404 static int
format_obj(PyObject * obj,_PyUnicodeWriter * writer)1405 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1406 {
1407     PyObject *str;
1408     int err;
1409 
1410     str = PyObject_Str(obj);
1411     if (str == NULL)
1412         return -1;
1413     err = _PyUnicodeWriter_WriteStr(writer, str);
1414     Py_DECREF(str);
1415     return err;
1416 }
1417 
1418 int
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1419 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1420                                 PyObject *obj,
1421                                 PyObject *format_spec,
1422                                 Py_ssize_t start, Py_ssize_t end)
1423 {
1424     InternalFormatSpec format;
1425 
1426     assert(PyUnicode_Check(obj));
1427 
1428     /* check for the special case of zero length format spec, make
1429        it equivalent to str(obj) */
1430     if (start == end) {
1431         if (PyUnicode_CheckExact(obj))
1432             return _PyUnicodeWriter_WriteStr(writer, obj);
1433         else
1434             return format_obj(obj, writer);
1435     }
1436 
1437     /* parse the format_spec */
1438     if (!parse_internal_render_format_spec(format_spec, start, end,
1439                                            &format, 's', '<'))
1440         return -1;
1441 
1442     /* type conversion? */
1443     switch (format.type) {
1444     case 's':
1445         /* no type conversion needed, already a string.  do the formatting */
1446         return format_string_internal(obj, &format, writer);
1447     default:
1448         /* unknown */
1449         unknown_presentation_type(format.type, obj->ob_type->tp_name);
1450         return -1;
1451     }
1452 }
1453 
1454 int
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1455 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1456                              PyObject *obj,
1457                              PyObject *format_spec,
1458                              Py_ssize_t start, Py_ssize_t end)
1459 {
1460     PyObject *tmp = NULL, *str = NULL;
1461     InternalFormatSpec format;
1462     int result = -1;
1463 
1464     /* check for the special case of zero length format spec, make
1465        it equivalent to str(obj) */
1466     if (start == end) {
1467         if (PyLong_CheckExact(obj))
1468             return _PyLong_FormatWriter(writer, obj, 10, 0);
1469         else
1470             return format_obj(obj, writer);
1471     }
1472 
1473     /* parse the format_spec */
1474     if (!parse_internal_render_format_spec(format_spec, start, end,
1475                                            &format, 'd', '>'))
1476         goto done;
1477 
1478     /* type conversion? */
1479     switch (format.type) {
1480     case 'b':
1481     case 'c':
1482     case 'd':
1483     case 'o':
1484     case 'x':
1485     case 'X':
1486     case 'n':
1487         /* no type conversion needed, already an int.  do the formatting */
1488         result = format_long_internal(obj, &format, writer);
1489         break;
1490 
1491     case 'e':
1492     case 'E':
1493     case 'f':
1494     case 'F':
1495     case 'g':
1496     case 'G':
1497     case '%':
1498         /* convert to float */
1499         tmp = PyNumber_Float(obj);
1500         if (tmp == NULL)
1501             goto done;
1502         result = format_float_internal(tmp, &format, writer);
1503         break;
1504 
1505     default:
1506         /* unknown */
1507         unknown_presentation_type(format.type, obj->ob_type->tp_name);
1508         goto done;
1509     }
1510 
1511 done:
1512     Py_XDECREF(tmp);
1513     Py_XDECREF(str);
1514     return result;
1515 }
1516 
1517 int
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1518 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1519                               PyObject *obj,
1520                               PyObject *format_spec,
1521                               Py_ssize_t start, Py_ssize_t end)
1522 {
1523     InternalFormatSpec format;
1524 
1525     /* check for the special case of zero length format spec, make
1526        it equivalent to str(obj) */
1527     if (start == end)
1528         return format_obj(obj, writer);
1529 
1530     /* parse the format_spec */
1531     if (!parse_internal_render_format_spec(format_spec, start, end,
1532                                            &format, '\0', '>'))
1533         return -1;
1534 
1535     /* type conversion? */
1536     switch (format.type) {
1537     case '\0': /* No format code: like 'g', but with at least one decimal. */
1538     case 'e':
1539     case 'E':
1540     case 'f':
1541     case 'F':
1542     case 'g':
1543     case 'G':
1544     case 'n':
1545     case '%':
1546         /* no conversion, already a float.  do the formatting */
1547         return format_float_internal(obj, &format, writer);
1548 
1549     default:
1550         /* unknown */
1551         unknown_presentation_type(format.type, obj->ob_type->tp_name);
1552         return -1;
1553     }
1554 }
1555 
1556 int
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1557 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1558                                 PyObject *obj,
1559                                 PyObject *format_spec,
1560                                 Py_ssize_t start, Py_ssize_t end)
1561 {
1562     InternalFormatSpec format;
1563 
1564     /* check for the special case of zero length format spec, make
1565        it equivalent to str(obj) */
1566     if (start == end)
1567         return format_obj(obj, writer);
1568 
1569     /* parse the format_spec */
1570     if (!parse_internal_render_format_spec(format_spec, start, end,
1571                                            &format, '\0', '>'))
1572         return -1;
1573 
1574     /* type conversion? */
1575     switch (format.type) {
1576     case '\0': /* No format code: like 'g', but with at least one decimal. */
1577     case 'e':
1578     case 'E':
1579     case 'f':
1580     case 'F':
1581     case 'g':
1582     case 'G':
1583     case 'n':
1584         /* no conversion, already a complex.  do the formatting */
1585         return format_complex_internal(obj, &format, writer);
1586 
1587     default:
1588         /* unknown */
1589         unknown_presentation_type(format.type, obj->ob_type->tp_name);
1590         return -1;
1591     }
1592 }
1593