1 /* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5 #include "Python.h"
6 #include <locale.h>
7
8 /* Raises an exception about an unknown presentation type for this
9 * type. */
10
11 static void
unknown_presentation_type(Py_UCS4 presentation_type,const char * type_name)12 unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14 {
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28 }
29
30 static void
invalid_thousands_separator_type(char specifier,Py_UCS4 presentation_type)31 invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
32 {
33 assert(specifier == ',' || specifier == '_');
34 if (presentation_type > 32 && presentation_type < 128)
35 PyErr_Format(PyExc_ValueError,
36 "Cannot specify '%c' with '%c'.",
37 specifier, (char)presentation_type);
38 else
39 PyErr_Format(PyExc_ValueError,
40 "Cannot specify '%c' with '\\x%x'.",
41 specifier, (unsigned int)presentation_type);
42 }
43
44 static void
invalid_comma_and_underscore(void)45 invalid_comma_and_underscore(void)
46 {
47 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
48 }
49
50 /*
51 get_integer consumes 0 or more decimal digit characters from an
52 input string, updates *result with the corresponding positive
53 integer, and returns the number of digits consumed.
54
55 returns -1 on error.
56 */
57 static int
get_integer(PyObject * str,Py_ssize_t * ppos,Py_ssize_t end,Py_ssize_t * result)58 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
59 Py_ssize_t *result)
60 {
61 Py_ssize_t accumulator, digitval, pos = *ppos;
62 int numdigits;
63 int kind = PyUnicode_KIND(str);
64 void *data = PyUnicode_DATA(str);
65
66 accumulator = numdigits = 0;
67 for (; pos < end; pos++, numdigits++) {
68 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
69 if (digitval < 0)
70 break;
71 /*
72 Detect possible overflow before it happens:
73
74 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
75 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
76 */
77 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
78 PyErr_Format(PyExc_ValueError,
79 "Too many decimal digits in format string");
80 *ppos = pos;
81 return -1;
82 }
83 accumulator = accumulator * 10 + digitval;
84 }
85 *ppos = pos;
86 *result = accumulator;
87 return numdigits;
88 }
89
90 /************************************************************************/
91 /*********** standard format specifier parsing **************************/
92 /************************************************************************/
93
94 /* returns true if this character is a specifier alignment token */
95 Py_LOCAL_INLINE(int)
is_alignment_token(Py_UCS4 c)96 is_alignment_token(Py_UCS4 c)
97 {
98 switch (c) {
99 case '<': case '>': case '=': case '^':
100 return 1;
101 default:
102 return 0;
103 }
104 }
105
106 /* returns true if this character is a sign element */
107 Py_LOCAL_INLINE(int)
is_sign_element(Py_UCS4 c)108 is_sign_element(Py_UCS4 c)
109 {
110 switch (c) {
111 case ' ': case '+': case '-':
112 return 1;
113 default:
114 return 0;
115 }
116 }
117
118 /* Locale type codes. LT_NO_LOCALE must be zero. */
119 enum LocaleType {
120 LT_NO_LOCALE = 0,
121 LT_DEFAULT_LOCALE = ',',
122 LT_UNDERSCORE_LOCALE = '_',
123 LT_UNDER_FOUR_LOCALE,
124 LT_CURRENT_LOCALE
125 };
126
127 typedef struct {
128 Py_UCS4 fill_char;
129 Py_UCS4 align;
130 int alternate;
131 Py_UCS4 sign;
132 Py_ssize_t width;
133 enum LocaleType thousands_separators;
134 Py_ssize_t precision;
135 Py_UCS4 type;
136 } InternalFormatSpec;
137
138 #if 0
139 /* Occasionally useful for debugging. Should normally be commented out. */
140 static void
141 DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
142 {
143 printf("internal format spec: fill_char %d\n", format->fill_char);
144 printf("internal format spec: align %d\n", format->align);
145 printf("internal format spec: alternate %d\n", format->alternate);
146 printf("internal format spec: sign %d\n", format->sign);
147 printf("internal format spec: width %zd\n", format->width);
148 printf("internal format spec: thousands_separators %d\n",
149 format->thousands_separators);
150 printf("internal format spec: precision %zd\n", format->precision);
151 printf("internal format spec: type %c\n", format->type);
152 printf("\n");
153 }
154 #endif
155
156
157 /*
158 ptr points to the start of the format_spec, end points just past its end.
159 fills in format with the parsed information.
160 returns 1 on success, 0 on failure.
161 if failure, sets the exception
162 */
163 static int
parse_internal_render_format_spec(PyObject * format_spec,Py_ssize_t start,Py_ssize_t end,InternalFormatSpec * format,char default_type,char default_align)164 parse_internal_render_format_spec(PyObject *format_spec,
165 Py_ssize_t start, Py_ssize_t end,
166 InternalFormatSpec *format,
167 char default_type,
168 char default_align)
169 {
170 Py_ssize_t pos = start;
171 int kind = PyUnicode_KIND(format_spec);
172 void *data = PyUnicode_DATA(format_spec);
173 /* end-pos is used throughout this code to specify the length of
174 the input string */
175 #define READ_spec(index) PyUnicode_READ(kind, data, index)
176
177 Py_ssize_t consumed;
178 int align_specified = 0;
179 int fill_char_specified = 0;
180
181 format->fill_char = ' ';
182 format->align = default_align;
183 format->alternate = 0;
184 format->sign = '\0';
185 format->width = -1;
186 format->thousands_separators = LT_NO_LOCALE;
187 format->precision = -1;
188 format->type = default_type;
189
190 /* If the second char is an alignment token,
191 then parse the fill char */
192 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
193 format->align = READ_spec(pos+1);
194 format->fill_char = READ_spec(pos);
195 fill_char_specified = 1;
196 align_specified = 1;
197 pos += 2;
198 }
199 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
200 format->align = READ_spec(pos);
201 align_specified = 1;
202 ++pos;
203 }
204
205 /* Parse the various sign options */
206 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
207 format->sign = READ_spec(pos);
208 ++pos;
209 }
210
211 /* If the next character is #, we're in alternate mode. This only
212 applies to integers. */
213 if (end-pos >= 1 && READ_spec(pos) == '#') {
214 format->alternate = 1;
215 ++pos;
216 }
217
218 /* The special case for 0-padding (backwards compat) */
219 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
220 format->fill_char = '0';
221 if (!align_specified) {
222 format->align = '=';
223 }
224 ++pos;
225 }
226
227 consumed = get_integer(format_spec, &pos, end, &format->width);
228 if (consumed == -1)
229 /* Overflow error. Exception already set. */
230 return 0;
231
232 /* If consumed is 0, we didn't consume any characters for the
233 width. In that case, reset the width to -1, because
234 get_integer() will have set it to zero. -1 is how we record
235 that the width wasn't specified. */
236 if (consumed == 0)
237 format->width = -1;
238
239 /* Comma signifies add thousands separators */
240 if (end-pos && READ_spec(pos) == ',') {
241 format->thousands_separators = LT_DEFAULT_LOCALE;
242 ++pos;
243 }
244 /* Underscore signifies add thousands separators */
245 if (end-pos && READ_spec(pos) == '_') {
246 if (format->thousands_separators != LT_NO_LOCALE) {
247 invalid_comma_and_underscore();
248 return 0;
249 }
250 format->thousands_separators = LT_UNDERSCORE_LOCALE;
251 ++pos;
252 }
253 if (end-pos && READ_spec(pos) == ',') {
254 invalid_comma_and_underscore();
255 return 0;
256 }
257
258 /* Parse field precision */
259 if (end-pos && READ_spec(pos) == '.') {
260 ++pos;
261
262 consumed = get_integer(format_spec, &pos, end, &format->precision);
263 if (consumed == -1)
264 /* Overflow error. Exception already set. */
265 return 0;
266
267 /* Not having a precision after a dot is an error. */
268 if (consumed == 0) {
269 PyErr_Format(PyExc_ValueError,
270 "Format specifier missing precision");
271 return 0;
272 }
273
274 }
275
276 /* Finally, parse the type field. */
277
278 if (end-pos > 1) {
279 /* More than one char remain, invalid format specifier. */
280 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
281 return 0;
282 }
283
284 if (end-pos == 1) {
285 format->type = READ_spec(pos);
286 ++pos;
287 }
288
289 /* Do as much validating as we can, just by looking at the format
290 specifier. Do not take into account what type of formatting
291 we're doing (int, float, string). */
292
293 if (format->thousands_separators) {
294 switch (format->type) {
295 case 'd':
296 case 'e':
297 case 'f':
298 case 'g':
299 case 'E':
300 case 'G':
301 case '%':
302 case 'F':
303 case '\0':
304 /* These are allowed. See PEP 378.*/
305 break;
306 case 'b':
307 case 'o':
308 case 'x':
309 case 'X':
310 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
311 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
312 /* Every four digits, not every three, in bin/oct/hex. */
313 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
314 break;
315 }
316 /* fall through */
317 default:
318 invalid_thousands_separator_type(format->thousands_separators, format->type);
319 return 0;
320 }
321 }
322
323 assert (format->align <= 127);
324 assert (format->sign <= 127);
325 return 1;
326 }
327
328 /* Calculate the padding needed. */
329 static void
calc_padding(Py_ssize_t nchars,Py_ssize_t width,Py_UCS4 align,Py_ssize_t * n_lpadding,Py_ssize_t * n_rpadding,Py_ssize_t * n_total)330 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
331 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
332 Py_ssize_t *n_total)
333 {
334 if (width >= 0) {
335 if (nchars > width)
336 *n_total = nchars;
337 else
338 *n_total = width;
339 }
340 else {
341 /* not specified, use all of the chars and no more */
342 *n_total = nchars;
343 }
344
345 /* Figure out how much leading space we need, based on the
346 aligning */
347 if (align == '>')
348 *n_lpadding = *n_total - nchars;
349 else if (align == '^')
350 *n_lpadding = (*n_total - nchars) / 2;
351 else if (align == '<' || align == '=')
352 *n_lpadding = 0;
353 else {
354 /* We should never have an unspecified alignment. */
355 Py_UNREACHABLE();
356 }
357
358 *n_rpadding = *n_total - nchars - *n_lpadding;
359 }
360
361 /* Do the padding, and return a pointer to where the caller-supplied
362 content goes. */
363 static int
fill_padding(_PyUnicodeWriter * writer,Py_ssize_t nchars,Py_UCS4 fill_char,Py_ssize_t n_lpadding,Py_ssize_t n_rpadding)364 fill_padding(_PyUnicodeWriter *writer,
365 Py_ssize_t nchars,
366 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
367 Py_ssize_t n_rpadding)
368 {
369 Py_ssize_t pos;
370
371 /* Pad on left. */
372 if (n_lpadding) {
373 pos = writer->pos;
374 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
375 }
376
377 /* Pad on right. */
378 if (n_rpadding) {
379 pos = writer->pos + nchars + n_lpadding;
380 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
381 }
382
383 /* Pointer to the user content. */
384 writer->pos += n_lpadding;
385 return 0;
386 }
387
388 /************************************************************************/
389 /*********** common routines for numeric formatting *********************/
390 /************************************************************************/
391
392 /* Locale info needed for formatting integers and the part of floats
393 before and including the decimal. Note that locales only support
394 8-bit chars, not unicode. */
395 typedef struct {
396 PyObject *decimal_point;
397 PyObject *thousands_sep;
398 const char *grouping;
399 char *grouping_buffer;
400 } LocaleInfo;
401
402 #define STATIC_LOCALE_INFO_INIT {0, 0, 0, 0}
403
404 /* describes the layout for an integer, see the comment in
405 calc_number_widths() for details */
406 typedef struct {
407 Py_ssize_t n_lpadding;
408 Py_ssize_t n_prefix;
409 Py_ssize_t n_spadding;
410 Py_ssize_t n_rpadding;
411 char sign;
412 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
413 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
414 any grouping chars. */
415 Py_ssize_t n_decimal; /* 0 if only an integer */
416 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
417 excluding the decimal itself, if
418 present. */
419
420 /* These 2 are not the widths of fields, but are needed by
421 STRINGLIB_GROUPING. */
422 Py_ssize_t n_digits; /* The number of digits before a decimal
423 or exponent. */
424 Py_ssize_t n_min_width; /* The min_width we used when we computed
425 the n_grouped_digits width. */
426 } NumberFieldWidths;
427
428
429 /* Given a number of the form:
430 digits[remainder]
431 where ptr points to the start and end points to the end, find where
432 the integer part ends. This could be a decimal, an exponent, both,
433 or neither.
434 If a decimal point is present, set *has_decimal and increment
435 remainder beyond it.
436 Results are undefined (but shouldn't crash) for improperly
437 formatted strings.
438 */
439 static void
parse_number(PyObject * s,Py_ssize_t pos,Py_ssize_t end,Py_ssize_t * n_remainder,int * has_decimal)440 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
441 Py_ssize_t *n_remainder, int *has_decimal)
442 {
443 Py_ssize_t remainder;
444 int kind = PyUnicode_KIND(s);
445 void *data = PyUnicode_DATA(s);
446
447 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
448 ++pos;
449 remainder = pos;
450
451 /* Does remainder start with a decimal point? */
452 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
453
454 /* Skip the decimal point. */
455 if (*has_decimal)
456 remainder++;
457
458 *n_remainder = end - remainder;
459 }
460
461 /* not all fields of format are used. for example, precision is
462 unused. should this take discrete params in order to be more clear
463 about what it does? or is passing a single format parameter easier
464 and more efficient enough to justify a little obfuscation?
465 Return -1 on error. */
466 static Py_ssize_t
calc_number_widths(NumberFieldWidths * spec,Py_ssize_t n_prefix,Py_UCS4 sign_char,PyObject * number,Py_ssize_t n_start,Py_ssize_t n_end,Py_ssize_t n_remainder,int has_decimal,const LocaleInfo * locale,const InternalFormatSpec * format,Py_UCS4 * maxchar)467 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
468 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
469 Py_ssize_t n_end, Py_ssize_t n_remainder,
470 int has_decimal, const LocaleInfo *locale,
471 const InternalFormatSpec *format, Py_UCS4 *maxchar)
472 {
473 Py_ssize_t n_non_digit_non_padding;
474 Py_ssize_t n_padding;
475
476 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
477 spec->n_lpadding = 0;
478 spec->n_prefix = n_prefix;
479 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
480 spec->n_remainder = n_remainder;
481 spec->n_spadding = 0;
482 spec->n_rpadding = 0;
483 spec->sign = '\0';
484 spec->n_sign = 0;
485
486 /* the output will look like:
487 | |
488 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
489 | |
490
491 sign is computed from format->sign and the actual
492 sign of the number
493
494 prefix is given (it's for the '0x' prefix)
495
496 digits is already known
497
498 the total width is either given, or computed from the
499 actual digits
500
501 only one of lpadding, spadding, and rpadding can be non-zero,
502 and it's calculated from the width and other fields
503 */
504
505 /* compute the various parts we're going to write */
506 switch (format->sign) {
507 case '+':
508 /* always put a + or - */
509 spec->n_sign = 1;
510 spec->sign = (sign_char == '-' ? '-' : '+');
511 break;
512 case ' ':
513 spec->n_sign = 1;
514 spec->sign = (sign_char == '-' ? '-' : ' ');
515 break;
516 default:
517 /* Not specified, or the default (-) */
518 if (sign_char == '-') {
519 spec->n_sign = 1;
520 spec->sign = '-';
521 }
522 }
523
524 /* The number of chars used for non-digits and non-padding. */
525 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
526 spec->n_remainder;
527
528 /* min_width can go negative, that's okay. format->width == -1 means
529 we don't care. */
530 if (format->fill_char == '0' && format->align == '=')
531 spec->n_min_width = format->width - n_non_digit_non_padding;
532 else
533 spec->n_min_width = 0;
534
535 if (spec->n_digits == 0)
536 /* This case only occurs when using 'c' formatting, we need
537 to special case it because the grouping code always wants
538 to have at least one character. */
539 spec->n_grouped_digits = 0;
540 else {
541 Py_UCS4 grouping_maxchar;
542 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
543 NULL, 0,
544 NULL, 0, spec->n_digits,
545 spec->n_min_width,
546 locale->grouping, locale->thousands_sep, &grouping_maxchar);
547 if (spec->n_grouped_digits == -1) {
548 return -1;
549 }
550 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
551 }
552
553 /* Given the desired width and the total of digit and non-digit
554 space we consume, see if we need any padding. format->width can
555 be negative (meaning no padding), but this code still works in
556 that case. */
557 n_padding = format->width -
558 (n_non_digit_non_padding + spec->n_grouped_digits);
559 if (n_padding > 0) {
560 /* Some padding is needed. Determine if it's left, space, or right. */
561 switch (format->align) {
562 case '<':
563 spec->n_rpadding = n_padding;
564 break;
565 case '^':
566 spec->n_lpadding = n_padding / 2;
567 spec->n_rpadding = n_padding - spec->n_lpadding;
568 break;
569 case '=':
570 spec->n_spadding = n_padding;
571 break;
572 case '>':
573 spec->n_lpadding = n_padding;
574 break;
575 default:
576 /* Shouldn't get here, but treat it as '>' */
577 Py_UNREACHABLE();
578 }
579 }
580
581 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
582 *maxchar = Py_MAX(*maxchar, format->fill_char);
583
584 if (spec->n_decimal)
585 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
586
587 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
588 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
589 spec->n_remainder + spec->n_rpadding;
590 }
591
592 /* Fill in the digit parts of a numbers's string representation,
593 as determined in calc_number_widths().
594 Return -1 on error, or 0 on success. */
595 static int
fill_number(_PyUnicodeWriter * writer,const NumberFieldWidths * spec,PyObject * digits,Py_ssize_t d_start,Py_ssize_t d_end,PyObject * prefix,Py_ssize_t p_start,Py_UCS4 fill_char,LocaleInfo * locale,int toupper)596 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
597 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
598 PyObject *prefix, Py_ssize_t p_start,
599 Py_UCS4 fill_char,
600 LocaleInfo *locale, int toupper)
601 {
602 /* Used to keep track of digits, decimal, and remainder. */
603 Py_ssize_t d_pos = d_start;
604 const unsigned int kind = writer->kind;
605 const void *data = writer->data;
606 Py_ssize_t r;
607
608 if (spec->n_lpadding) {
609 _PyUnicode_FastFill(writer->buffer,
610 writer->pos, spec->n_lpadding, fill_char);
611 writer->pos += spec->n_lpadding;
612 }
613 if (spec->n_sign == 1) {
614 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
615 writer->pos++;
616 }
617 if (spec->n_prefix) {
618 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
619 prefix, p_start,
620 spec->n_prefix);
621 if (toupper) {
622 Py_ssize_t t;
623 for (t = 0; t < spec->n_prefix; t++) {
624 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
625 c = Py_TOUPPER(c);
626 assert (c <= 127);
627 PyUnicode_WRITE(kind, data, writer->pos + t, c);
628 }
629 }
630 writer->pos += spec->n_prefix;
631 }
632 if (spec->n_spadding) {
633 _PyUnicode_FastFill(writer->buffer,
634 writer->pos, spec->n_spadding, fill_char);
635 writer->pos += spec->n_spadding;
636 }
637
638 /* Only for type 'c' special case, it has no digits. */
639 if (spec->n_digits != 0) {
640 /* Fill the digits with InsertThousandsGrouping. */
641 r = _PyUnicode_InsertThousandsGrouping(
642 writer, spec->n_grouped_digits,
643 digits, d_pos, spec->n_digits,
644 spec->n_min_width,
645 locale->grouping, locale->thousands_sep, NULL);
646 if (r == -1)
647 return -1;
648 assert(r == spec->n_grouped_digits);
649 d_pos += spec->n_digits;
650 }
651 if (toupper) {
652 Py_ssize_t t;
653 for (t = 0; t < spec->n_grouped_digits; t++) {
654 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
655 c = Py_TOUPPER(c);
656 if (c > 127) {
657 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
658 return -1;
659 }
660 PyUnicode_WRITE(kind, data, writer->pos + t, c);
661 }
662 }
663 writer->pos += spec->n_grouped_digits;
664
665 if (spec->n_decimal) {
666 _PyUnicode_FastCopyCharacters(
667 writer->buffer, writer->pos,
668 locale->decimal_point, 0, spec->n_decimal);
669 writer->pos += spec->n_decimal;
670 d_pos += 1;
671 }
672
673 if (spec->n_remainder) {
674 _PyUnicode_FastCopyCharacters(
675 writer->buffer, writer->pos,
676 digits, d_pos, spec->n_remainder);
677 writer->pos += spec->n_remainder;
678 /* d_pos += spec->n_remainder; */
679 }
680
681 if (spec->n_rpadding) {
682 _PyUnicode_FastFill(writer->buffer,
683 writer->pos, spec->n_rpadding,
684 fill_char);
685 writer->pos += spec->n_rpadding;
686 }
687 return 0;
688 }
689
690 static const char no_grouping[1] = {CHAR_MAX};
691
692 /* Find the decimal point character(s?), thousands_separator(s?), and
693 grouping description, either for the current locale if type is
694 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
695 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
696 static int
get_locale_info(enum LocaleType type,LocaleInfo * locale_info)697 get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
698 {
699 switch (type) {
700 case LT_CURRENT_LOCALE: {
701 const char *grouping;
702 if (_Py_GetLocaleconvNumeric(&locale_info->decimal_point,
703 &locale_info->thousands_sep,
704 &grouping) < 0) {
705 return -1;
706 }
707
708 /* localeconv() grouping can become a dangling pointer or point
709 to a different string if another thread calls localeconv() during
710 the string formatting. Copy the string to avoid this risk. */
711 locale_info->grouping_buffer = _PyMem_Strdup(grouping);
712 if (locale_info->grouping_buffer == NULL) {
713 PyErr_NoMemory();
714 return -1;
715 }
716 locale_info->grouping = locale_info->grouping_buffer;
717 break;
718 }
719 case LT_DEFAULT_LOCALE:
720 case LT_UNDERSCORE_LOCALE:
721 case LT_UNDER_FOUR_LOCALE:
722 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
723 locale_info->thousands_sep = PyUnicode_FromOrdinal(
724 type == LT_DEFAULT_LOCALE ? ',' : '_');
725 if (!locale_info->decimal_point || !locale_info->thousands_sep)
726 return -1;
727 if (type != LT_UNDER_FOUR_LOCALE)
728 locale_info->grouping = "\3"; /* Group every 3 characters. The
729 (implicit) trailing 0 means repeat
730 infinitely. */
731 else
732 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
733 break;
734 case LT_NO_LOCALE:
735 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
736 locale_info->thousands_sep = PyUnicode_New(0, 0);
737 if (!locale_info->decimal_point || !locale_info->thousands_sep)
738 return -1;
739 locale_info->grouping = no_grouping;
740 break;
741 }
742 return 0;
743 }
744
745 static void
free_locale_info(LocaleInfo * locale_info)746 free_locale_info(LocaleInfo *locale_info)
747 {
748 Py_XDECREF(locale_info->decimal_point);
749 Py_XDECREF(locale_info->thousands_sep);
750 PyMem_Free(locale_info->grouping_buffer);
751 }
752
753 /************************************************************************/
754 /*********** string formatting ******************************************/
755 /************************************************************************/
756
757 static int
format_string_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)758 format_string_internal(PyObject *value, const InternalFormatSpec *format,
759 _PyUnicodeWriter *writer)
760 {
761 Py_ssize_t lpad;
762 Py_ssize_t rpad;
763 Py_ssize_t total;
764 Py_ssize_t len;
765 int result = -1;
766 Py_UCS4 maxchar;
767
768 assert(PyUnicode_IS_READY(value));
769 len = PyUnicode_GET_LENGTH(value);
770
771 /* sign is not allowed on strings */
772 if (format->sign != '\0') {
773 PyErr_SetString(PyExc_ValueError,
774 "Sign not allowed in string format specifier");
775 goto done;
776 }
777
778 /* alternate is not allowed on strings */
779 if (format->alternate) {
780 PyErr_SetString(PyExc_ValueError,
781 "Alternate form (#) not allowed in string format "
782 "specifier");
783 goto done;
784 }
785
786 /* '=' alignment not allowed on strings */
787 if (format->align == '=') {
788 PyErr_SetString(PyExc_ValueError,
789 "'=' alignment not allowed "
790 "in string format specifier");
791 goto done;
792 }
793
794 if ((format->width == -1 || format->width <= len)
795 && (format->precision == -1 || format->precision >= len)) {
796 /* Fast path */
797 return _PyUnicodeWriter_WriteStr(writer, value);
798 }
799
800 /* if precision is specified, output no more that format.precision
801 characters */
802 if (format->precision >= 0 && len >= format->precision) {
803 len = format->precision;
804 }
805
806 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
807
808 maxchar = writer->maxchar;
809 if (lpad != 0 || rpad != 0)
810 maxchar = Py_MAX(maxchar, format->fill_char);
811 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
812 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
813 maxchar = Py_MAX(maxchar, valmaxchar);
814 }
815
816 /* allocate the resulting string */
817 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
818 goto done;
819
820 /* Write into that space. First the padding. */
821 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
822 if (result == -1)
823 goto done;
824
825 /* Then the source string. */
826 if (len) {
827 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
828 value, 0, len);
829 }
830 writer->pos += (len + rpad);
831 result = 0;
832
833 done:
834 return result;
835 }
836
837
838 /************************************************************************/
839 /*********** long formatting ********************************************/
840 /************************************************************************/
841
842 static int
format_long_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)843 format_long_internal(PyObject *value, const InternalFormatSpec *format,
844 _PyUnicodeWriter *writer)
845 {
846 int result = -1;
847 Py_UCS4 maxchar = 127;
848 PyObject *tmp = NULL;
849 Py_ssize_t inumeric_chars;
850 Py_UCS4 sign_char = '\0';
851 Py_ssize_t n_digits; /* count of digits need from the computed
852 string */
853 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
854 produces non-digits */
855 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
856 Py_ssize_t n_total;
857 Py_ssize_t prefix = 0;
858 NumberFieldWidths spec;
859 long x;
860
861 /* Locale settings, either from the actual locale or
862 from a hard-code pseudo-locale */
863 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
864
865 /* no precision allowed on integers */
866 if (format->precision != -1) {
867 PyErr_SetString(PyExc_ValueError,
868 "Precision not allowed in integer format specifier");
869 goto done;
870 }
871
872 /* special case for character formatting */
873 if (format->type == 'c') {
874 /* error to specify a sign */
875 if (format->sign != '\0') {
876 PyErr_SetString(PyExc_ValueError,
877 "Sign not allowed with integer"
878 " format specifier 'c'");
879 goto done;
880 }
881 /* error to request alternate format */
882 if (format->alternate) {
883 PyErr_SetString(PyExc_ValueError,
884 "Alternate form (#) not allowed with integer"
885 " format specifier 'c'");
886 goto done;
887 }
888
889 /* taken from unicodeobject.c formatchar() */
890 /* Integer input truncated to a character */
891 x = PyLong_AsLong(value);
892 if (x == -1 && PyErr_Occurred())
893 goto done;
894 if (x < 0 || x > 0x10ffff) {
895 PyErr_SetString(PyExc_OverflowError,
896 "%c arg not in range(0x110000)");
897 goto done;
898 }
899 tmp = PyUnicode_FromOrdinal(x);
900 inumeric_chars = 0;
901 n_digits = 1;
902 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
903
904 /* As a sort-of hack, we tell calc_number_widths that we only
905 have "remainder" characters. calc_number_widths thinks
906 these are characters that don't get formatted, only copied
907 into the output string. We do this for 'c' formatting,
908 because the characters are likely to be non-digits. */
909 n_remainder = 1;
910 }
911 else {
912 int base;
913 int leading_chars_to_skip = 0; /* Number of characters added by
914 PyNumber_ToBase that we want to
915 skip over. */
916
917 /* Compute the base and how many characters will be added by
918 PyNumber_ToBase */
919 switch (format->type) {
920 case 'b':
921 base = 2;
922 leading_chars_to_skip = 2; /* 0b */
923 break;
924 case 'o':
925 base = 8;
926 leading_chars_to_skip = 2; /* 0o */
927 break;
928 case 'x':
929 case 'X':
930 base = 16;
931 leading_chars_to_skip = 2; /* 0x */
932 break;
933 default: /* shouldn't be needed, but stops a compiler warning */
934 case 'd':
935 case 'n':
936 base = 10;
937 break;
938 }
939
940 if (format->sign != '+' && format->sign != ' '
941 && format->width == -1
942 && format->type != 'X' && format->type != 'n'
943 && !format->thousands_separators
944 && PyLong_CheckExact(value))
945 {
946 /* Fast path */
947 return _PyLong_FormatWriter(writer, value, base, format->alternate);
948 }
949
950 /* The number of prefix chars is the same as the leading
951 chars to skip */
952 if (format->alternate)
953 n_prefix = leading_chars_to_skip;
954
955 /* Do the hard part, converting to a string in a given base */
956 tmp = _PyLong_Format(value, base);
957 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
958 goto done;
959
960 inumeric_chars = 0;
961 n_digits = PyUnicode_GET_LENGTH(tmp);
962
963 prefix = inumeric_chars;
964
965 /* Is a sign character present in the output? If so, remember it
966 and skip it */
967 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
968 sign_char = '-';
969 ++prefix;
970 ++leading_chars_to_skip;
971 }
972
973 /* Skip over the leading chars (0x, 0b, etc.) */
974 n_digits -= leading_chars_to_skip;
975 inumeric_chars += leading_chars_to_skip;
976 }
977
978 /* Determine the grouping, separator, and decimal point, if any. */
979 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
980 format->thousands_separators,
981 &locale) == -1)
982 goto done;
983
984 /* Calculate how much memory we'll need. */
985 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
986 inumeric_chars + n_digits, n_remainder, 0,
987 &locale, format, &maxchar);
988 if (n_total == -1) {
989 goto done;
990 }
991
992 /* Allocate the memory. */
993 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
994 goto done;
995
996 /* Populate the memory. */
997 result = fill_number(writer, &spec,
998 tmp, inumeric_chars, inumeric_chars + n_digits,
999 tmp, prefix, format->fill_char,
1000 &locale, format->type == 'X');
1001
1002 done:
1003 Py_XDECREF(tmp);
1004 free_locale_info(&locale);
1005 return result;
1006 }
1007
1008 /************************************************************************/
1009 /*********** float formatting *******************************************/
1010 /************************************************************************/
1011
1012 /* much of this is taken from unicodeobject.c */
1013 static int
format_float_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1014 format_float_internal(PyObject *value,
1015 const InternalFormatSpec *format,
1016 _PyUnicodeWriter *writer)
1017 {
1018 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1019 Py_ssize_t n_digits;
1020 Py_ssize_t n_remainder;
1021 Py_ssize_t n_total;
1022 int has_decimal;
1023 double val;
1024 int precision, default_precision = 6;
1025 Py_UCS4 type = format->type;
1026 int add_pct = 0;
1027 Py_ssize_t index;
1028 NumberFieldWidths spec;
1029 int flags = 0;
1030 int result = -1;
1031 Py_UCS4 maxchar = 127;
1032 Py_UCS4 sign_char = '\0';
1033 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1034 PyObject *unicode_tmp = NULL;
1035
1036 /* Locale settings, either from the actual locale or
1037 from a hard-code pseudo-locale */
1038 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
1039
1040 if (format->precision > INT_MAX) {
1041 PyErr_SetString(PyExc_ValueError, "precision too big");
1042 goto done;
1043 }
1044 precision = (int)format->precision;
1045
1046 if (format->alternate)
1047 flags |= Py_DTSF_ALT;
1048
1049 if (type == '\0') {
1050 /* Omitted type specifier. Behaves in the same way as repr(x)
1051 and str(x) if no precision is given, else like 'g', but with
1052 at least one digit after the decimal point. */
1053 flags |= Py_DTSF_ADD_DOT_0;
1054 type = 'r';
1055 default_precision = 0;
1056 }
1057
1058 if (type == 'n')
1059 /* 'n' is the same as 'g', except for the locale used to
1060 format the result. We take care of that later. */
1061 type = 'g';
1062
1063 val = PyFloat_AsDouble(value);
1064 if (val == -1.0 && PyErr_Occurred())
1065 goto done;
1066
1067 if (type == '%') {
1068 type = 'f';
1069 val *= 100;
1070 add_pct = 1;
1071 }
1072
1073 if (precision < 0)
1074 precision = default_precision;
1075 else if (type == 'r')
1076 type = 'g';
1077
1078 /* Cast "type", because if we're in unicode we need to pass an
1079 8-bit char. This is safe, because we've restricted what "type"
1080 can be. */
1081 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1082 &float_type);
1083 if (buf == NULL)
1084 goto done;
1085 n_digits = strlen(buf);
1086
1087 if (add_pct) {
1088 /* We know that buf has a trailing zero (since we just called
1089 strlen() on it), and we don't use that fact any more. So we
1090 can just write over the trailing zero. */
1091 buf[n_digits] = '%';
1092 n_digits += 1;
1093 }
1094
1095 if (format->sign != '+' && format->sign != ' '
1096 && format->width == -1
1097 && format->type != 'n'
1098 && !format->thousands_separators)
1099 {
1100 /* Fast path */
1101 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1102 PyMem_Free(buf);
1103 return result;
1104 }
1105
1106 /* Since there is no unicode version of PyOS_double_to_string,
1107 just use the 8 bit version and then convert to unicode. */
1108 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1109 PyMem_Free(buf);
1110 if (unicode_tmp == NULL)
1111 goto done;
1112
1113 /* Is a sign character present in the output? If so, remember it
1114 and skip it */
1115 index = 0;
1116 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1117 sign_char = '-';
1118 ++index;
1119 --n_digits;
1120 }
1121
1122 /* Determine if we have any "remainder" (after the digits, might include
1123 decimal or exponent or both (or neither)) */
1124 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1125
1126 /* Determine the grouping, separator, and decimal point, if any. */
1127 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1128 format->thousands_separators,
1129 &locale) == -1)
1130 goto done;
1131
1132 /* Calculate how much memory we'll need. */
1133 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
1134 index + n_digits, n_remainder, has_decimal,
1135 &locale, format, &maxchar);
1136 if (n_total == -1) {
1137 goto done;
1138 }
1139
1140 /* Allocate the memory. */
1141 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1142 goto done;
1143
1144 /* Populate the memory. */
1145 result = fill_number(writer, &spec,
1146 unicode_tmp, index, index + n_digits,
1147 NULL, 0, format->fill_char,
1148 &locale, 0);
1149
1150 done:
1151 Py_XDECREF(unicode_tmp);
1152 free_locale_info(&locale);
1153 return result;
1154 }
1155
1156 /************************************************************************/
1157 /*********** complex formatting *****************************************/
1158 /************************************************************************/
1159
1160 static int
format_complex_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1161 format_complex_internal(PyObject *value,
1162 const InternalFormatSpec *format,
1163 _PyUnicodeWriter *writer)
1164 {
1165 double re;
1166 double im;
1167 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1168 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1169
1170 InternalFormatSpec tmp_format = *format;
1171 Py_ssize_t n_re_digits;
1172 Py_ssize_t n_im_digits;
1173 Py_ssize_t n_re_remainder;
1174 Py_ssize_t n_im_remainder;
1175 Py_ssize_t n_re_total;
1176 Py_ssize_t n_im_total;
1177 int re_has_decimal;
1178 int im_has_decimal;
1179 int precision, default_precision = 6;
1180 Py_UCS4 type = format->type;
1181 Py_ssize_t i_re;
1182 Py_ssize_t i_im;
1183 NumberFieldWidths re_spec;
1184 NumberFieldWidths im_spec;
1185 int flags = 0;
1186 int result = -1;
1187 Py_UCS4 maxchar = 127;
1188 enum PyUnicode_Kind rkind;
1189 void *rdata;
1190 Py_UCS4 re_sign_char = '\0';
1191 Py_UCS4 im_sign_char = '\0';
1192 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1193 int im_float_type;
1194 int add_parens = 0;
1195 int skip_re = 0;
1196 Py_ssize_t lpad;
1197 Py_ssize_t rpad;
1198 Py_ssize_t total;
1199 PyObject *re_unicode_tmp = NULL;
1200 PyObject *im_unicode_tmp = NULL;
1201
1202 /* Locale settings, either from the actual locale or
1203 from a hard-code pseudo-locale */
1204 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
1205
1206 if (format->precision > INT_MAX) {
1207 PyErr_SetString(PyExc_ValueError, "precision too big");
1208 goto done;
1209 }
1210 precision = (int)format->precision;
1211
1212 /* Zero padding is not allowed. */
1213 if (format->fill_char == '0') {
1214 PyErr_SetString(PyExc_ValueError,
1215 "Zero padding is not allowed in complex format "
1216 "specifier");
1217 goto done;
1218 }
1219
1220 /* Neither is '=' alignment . */
1221 if (format->align == '=') {
1222 PyErr_SetString(PyExc_ValueError,
1223 "'=' alignment flag is not allowed in complex format "
1224 "specifier");
1225 goto done;
1226 }
1227
1228 re = PyComplex_RealAsDouble(value);
1229 if (re == -1.0 && PyErr_Occurred())
1230 goto done;
1231 im = PyComplex_ImagAsDouble(value);
1232 if (im == -1.0 && PyErr_Occurred())
1233 goto done;
1234
1235 if (format->alternate)
1236 flags |= Py_DTSF_ALT;
1237
1238 if (type == '\0') {
1239 /* Omitted type specifier. Should be like str(self). */
1240 type = 'r';
1241 default_precision = 0;
1242 if (re == 0.0 && copysign(1.0, re) == 1.0)
1243 skip_re = 1;
1244 else
1245 add_parens = 1;
1246 }
1247
1248 if (type == 'n')
1249 /* 'n' is the same as 'g', except for the locale used to
1250 format the result. We take care of that later. */
1251 type = 'g';
1252
1253 if (precision < 0)
1254 precision = default_precision;
1255 else if (type == 'r')
1256 type = 'g';
1257
1258 /* Cast "type", because if we're in unicode we need to pass an
1259 8-bit char. This is safe, because we've restricted what "type"
1260 can be. */
1261 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1262 &re_float_type);
1263 if (re_buf == NULL)
1264 goto done;
1265 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1266 &im_float_type);
1267 if (im_buf == NULL)
1268 goto done;
1269
1270 n_re_digits = strlen(re_buf);
1271 n_im_digits = strlen(im_buf);
1272
1273 /* Since there is no unicode version of PyOS_double_to_string,
1274 just use the 8 bit version and then convert to unicode. */
1275 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1276 if (re_unicode_tmp == NULL)
1277 goto done;
1278 i_re = 0;
1279
1280 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1281 if (im_unicode_tmp == NULL)
1282 goto done;
1283 i_im = 0;
1284
1285 /* Is a sign character present in the output? If so, remember it
1286 and skip it */
1287 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1288 re_sign_char = '-';
1289 ++i_re;
1290 --n_re_digits;
1291 }
1292 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1293 im_sign_char = '-';
1294 ++i_im;
1295 --n_im_digits;
1296 }
1297
1298 /* Determine if we have any "remainder" (after the digits, might include
1299 decimal or exponent or both (or neither)) */
1300 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1301 &n_re_remainder, &re_has_decimal);
1302 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1303 &n_im_remainder, &im_has_decimal);
1304
1305 /* Determine the grouping, separator, and decimal point, if any. */
1306 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1307 format->thousands_separators,
1308 &locale) == -1)
1309 goto done;
1310
1311 /* Turn off any padding. We'll do it later after we've composed
1312 the numbers without padding. */
1313 tmp_format.fill_char = '\0';
1314 tmp_format.align = '<';
1315 tmp_format.width = -1;
1316
1317 /* Calculate how much memory we'll need. */
1318 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1319 i_re, i_re + n_re_digits, n_re_remainder,
1320 re_has_decimal, &locale, &tmp_format,
1321 &maxchar);
1322 if (n_re_total == -1) {
1323 goto done;
1324 }
1325
1326 /* Same formatting, but always include a sign, unless the real part is
1327 * going to be omitted, in which case we use whatever sign convention was
1328 * requested by the original format. */
1329 if (!skip_re)
1330 tmp_format.sign = '+';
1331 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1332 i_im, i_im + n_im_digits, n_im_remainder,
1333 im_has_decimal, &locale, &tmp_format,
1334 &maxchar);
1335 if (n_im_total == -1) {
1336 goto done;
1337 }
1338
1339 if (skip_re)
1340 n_re_total = 0;
1341
1342 /* Add 1 for the 'j', and optionally 2 for parens. */
1343 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1344 format->width, format->align, &lpad, &rpad, &total);
1345
1346 if (lpad || rpad)
1347 maxchar = Py_MAX(maxchar, format->fill_char);
1348
1349 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1350 goto done;
1351 rkind = writer->kind;
1352 rdata = writer->data;
1353
1354 /* Populate the memory. First, the padding. */
1355 result = fill_padding(writer,
1356 n_re_total + n_im_total + 1 + add_parens * 2,
1357 format->fill_char, lpad, rpad);
1358 if (result == -1)
1359 goto done;
1360
1361 if (add_parens) {
1362 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1363 writer->pos++;
1364 }
1365
1366 if (!skip_re) {
1367 result = fill_number(writer, &re_spec,
1368 re_unicode_tmp, i_re, i_re + n_re_digits,
1369 NULL, 0,
1370 0,
1371 &locale, 0);
1372 if (result == -1)
1373 goto done;
1374 }
1375 result = fill_number(writer, &im_spec,
1376 im_unicode_tmp, i_im, i_im + n_im_digits,
1377 NULL, 0,
1378 0,
1379 &locale, 0);
1380 if (result == -1)
1381 goto done;
1382 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1383 writer->pos++;
1384
1385 if (add_parens) {
1386 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1387 writer->pos++;
1388 }
1389
1390 writer->pos += rpad;
1391
1392 done:
1393 PyMem_Free(re_buf);
1394 PyMem_Free(im_buf);
1395 Py_XDECREF(re_unicode_tmp);
1396 Py_XDECREF(im_unicode_tmp);
1397 free_locale_info(&locale);
1398 return result;
1399 }
1400
1401 /************************************************************************/
1402 /*********** built in formatters ****************************************/
1403 /************************************************************************/
1404 static int
format_obj(PyObject * obj,_PyUnicodeWriter * writer)1405 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1406 {
1407 PyObject *str;
1408 int err;
1409
1410 str = PyObject_Str(obj);
1411 if (str == NULL)
1412 return -1;
1413 err = _PyUnicodeWriter_WriteStr(writer, str);
1414 Py_DECREF(str);
1415 return err;
1416 }
1417
1418 int
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1419 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1420 PyObject *obj,
1421 PyObject *format_spec,
1422 Py_ssize_t start, Py_ssize_t end)
1423 {
1424 InternalFormatSpec format;
1425
1426 assert(PyUnicode_Check(obj));
1427
1428 /* check for the special case of zero length format spec, make
1429 it equivalent to str(obj) */
1430 if (start == end) {
1431 if (PyUnicode_CheckExact(obj))
1432 return _PyUnicodeWriter_WriteStr(writer, obj);
1433 else
1434 return format_obj(obj, writer);
1435 }
1436
1437 /* parse the format_spec */
1438 if (!parse_internal_render_format_spec(format_spec, start, end,
1439 &format, 's', '<'))
1440 return -1;
1441
1442 /* type conversion? */
1443 switch (format.type) {
1444 case 's':
1445 /* no type conversion needed, already a string. do the formatting */
1446 return format_string_internal(obj, &format, writer);
1447 default:
1448 /* unknown */
1449 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1450 return -1;
1451 }
1452 }
1453
1454 int
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1455 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1456 PyObject *obj,
1457 PyObject *format_spec,
1458 Py_ssize_t start, Py_ssize_t end)
1459 {
1460 PyObject *tmp = NULL, *str = NULL;
1461 InternalFormatSpec format;
1462 int result = -1;
1463
1464 /* check for the special case of zero length format spec, make
1465 it equivalent to str(obj) */
1466 if (start == end) {
1467 if (PyLong_CheckExact(obj))
1468 return _PyLong_FormatWriter(writer, obj, 10, 0);
1469 else
1470 return format_obj(obj, writer);
1471 }
1472
1473 /* parse the format_spec */
1474 if (!parse_internal_render_format_spec(format_spec, start, end,
1475 &format, 'd', '>'))
1476 goto done;
1477
1478 /* type conversion? */
1479 switch (format.type) {
1480 case 'b':
1481 case 'c':
1482 case 'd':
1483 case 'o':
1484 case 'x':
1485 case 'X':
1486 case 'n':
1487 /* no type conversion needed, already an int. do the formatting */
1488 result = format_long_internal(obj, &format, writer);
1489 break;
1490
1491 case 'e':
1492 case 'E':
1493 case 'f':
1494 case 'F':
1495 case 'g':
1496 case 'G':
1497 case '%':
1498 /* convert to float */
1499 tmp = PyNumber_Float(obj);
1500 if (tmp == NULL)
1501 goto done;
1502 result = format_float_internal(tmp, &format, writer);
1503 break;
1504
1505 default:
1506 /* unknown */
1507 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1508 goto done;
1509 }
1510
1511 done:
1512 Py_XDECREF(tmp);
1513 Py_XDECREF(str);
1514 return result;
1515 }
1516
1517 int
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1518 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1519 PyObject *obj,
1520 PyObject *format_spec,
1521 Py_ssize_t start, Py_ssize_t end)
1522 {
1523 InternalFormatSpec format;
1524
1525 /* check for the special case of zero length format spec, make
1526 it equivalent to str(obj) */
1527 if (start == end)
1528 return format_obj(obj, writer);
1529
1530 /* parse the format_spec */
1531 if (!parse_internal_render_format_spec(format_spec, start, end,
1532 &format, '\0', '>'))
1533 return -1;
1534
1535 /* type conversion? */
1536 switch (format.type) {
1537 case '\0': /* No format code: like 'g', but with at least one decimal. */
1538 case 'e':
1539 case 'E':
1540 case 'f':
1541 case 'F':
1542 case 'g':
1543 case 'G':
1544 case 'n':
1545 case '%':
1546 /* no conversion, already a float. do the formatting */
1547 return format_float_internal(obj, &format, writer);
1548
1549 default:
1550 /* unknown */
1551 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1552 return -1;
1553 }
1554 }
1555
1556 int
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1557 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1558 PyObject *obj,
1559 PyObject *format_spec,
1560 Py_ssize_t start, Py_ssize_t end)
1561 {
1562 InternalFormatSpec format;
1563
1564 /* check for the special case of zero length format spec, make
1565 it equivalent to str(obj) */
1566 if (start == end)
1567 return format_obj(obj, writer);
1568
1569 /* parse the format_spec */
1570 if (!parse_internal_render_format_spec(format_spec, start, end,
1571 &format, '\0', '>'))
1572 return -1;
1573
1574 /* type conversion? */
1575 switch (format.type) {
1576 case '\0': /* No format code: like 'g', but with at least one decimal. */
1577 case 'e':
1578 case 'E':
1579 case 'f':
1580 case 'F':
1581 case 'g':
1582 case 'G':
1583 case 'n':
1584 /* no conversion, already a complex. do the formatting */
1585 return format_complex_internal(obj, &format, writer);
1586
1587 default:
1588 /* unknown */
1589 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1590 return -1;
1591 }
1592 }
1593