1 /* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5 #include "Python.h"
6 #include <locale.h>
7
8 /* Raises an exception about an unknown presentation type for this
9 * type. */
10
11 static void
unknown_presentation_type(Py_UCS4 presentation_type,const char * type_name)12 unknown_presentation_type(Py_UCS4 presentation_type,
13 const char* type_name)
14 {
15 /* %c might be out-of-range, hence the two cases. */
16 if (presentation_type > 32 && presentation_type < 128)
17 PyErr_Format(PyExc_ValueError,
18 "Unknown format code '%c' "
19 "for object of type '%.200s'",
20 (char)presentation_type,
21 type_name);
22 else
23 PyErr_Format(PyExc_ValueError,
24 "Unknown format code '\\x%x' "
25 "for object of type '%.200s'",
26 (unsigned int)presentation_type,
27 type_name);
28 }
29
30 static void
invalid_comma_type(Py_UCS4 presentation_type)31 invalid_comma_type(Py_UCS4 presentation_type)
32 {
33 if (presentation_type > 32 && presentation_type < 128)
34 PyErr_Format(PyExc_ValueError,
35 "Cannot specify ',' or '_' with '%c'.",
36 (char)presentation_type);
37 else
38 PyErr_Format(PyExc_ValueError,
39 "Cannot specify ',' or '_' with '\\x%x'.",
40 (unsigned int)presentation_type);
41 }
42
43 static void
invalid_comma_and_underscore(void)44 invalid_comma_and_underscore(void)
45 {
46 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
47 }
48
49 /*
50 get_integer consumes 0 or more decimal digit characters from an
51 input string, updates *result with the corresponding positive
52 integer, and returns the number of digits consumed.
53
54 returns -1 on error.
55 */
56 static int
get_integer(PyObject * str,Py_ssize_t * ppos,Py_ssize_t end,Py_ssize_t * result)57 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
58 Py_ssize_t *result)
59 {
60 Py_ssize_t accumulator, digitval, pos = *ppos;
61 int numdigits;
62 int kind = PyUnicode_KIND(str);
63 void *data = PyUnicode_DATA(str);
64
65 accumulator = numdigits = 0;
66 for (; pos < end; pos++, numdigits++) {
67 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
68 if (digitval < 0)
69 break;
70 /*
71 Detect possible overflow before it happens:
72
73 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
74 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
75 */
76 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
77 PyErr_Format(PyExc_ValueError,
78 "Too many decimal digits in format string");
79 *ppos = pos;
80 return -1;
81 }
82 accumulator = accumulator * 10 + digitval;
83 }
84 *ppos = pos;
85 *result = accumulator;
86 return numdigits;
87 }
88
89 /************************************************************************/
90 /*********** standard format specifier parsing **************************/
91 /************************************************************************/
92
93 /* returns true if this character is a specifier alignment token */
94 Py_LOCAL_INLINE(int)
is_alignment_token(Py_UCS4 c)95 is_alignment_token(Py_UCS4 c)
96 {
97 switch (c) {
98 case '<': case '>': case '=': case '^':
99 return 1;
100 default:
101 return 0;
102 }
103 }
104
105 /* returns true if this character is a sign element */
106 Py_LOCAL_INLINE(int)
is_sign_element(Py_UCS4 c)107 is_sign_element(Py_UCS4 c)
108 {
109 switch (c) {
110 case ' ': case '+': case '-':
111 return 1;
112 default:
113 return 0;
114 }
115 }
116
117 /* Locale type codes. LT_NO_LOCALE must be zero. */
118 enum LocaleType {
119 LT_NO_LOCALE = 0,
120 LT_DEFAULT_LOCALE,
121 LT_UNDERSCORE_LOCALE,
122 LT_UNDER_FOUR_LOCALE,
123 LT_CURRENT_LOCALE
124 };
125
126 typedef struct {
127 Py_UCS4 fill_char;
128 Py_UCS4 align;
129 int alternate;
130 Py_UCS4 sign;
131 Py_ssize_t width;
132 enum LocaleType thousands_separators;
133 Py_ssize_t precision;
134 Py_UCS4 type;
135 } InternalFormatSpec;
136
137 #if 0
138 /* Occasionally useful for debugging. Should normally be commented out. */
139 static void
140 DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
141 {
142 printf("internal format spec: fill_char %d\n", format->fill_char);
143 printf("internal format spec: align %d\n", format->align);
144 printf("internal format spec: alternate %d\n", format->alternate);
145 printf("internal format spec: sign %d\n", format->sign);
146 printf("internal format spec: width %zd\n", format->width);
147 printf("internal format spec: thousands_separators %d\n",
148 format->thousands_separators);
149 printf("internal format spec: precision %zd\n", format->precision);
150 printf("internal format spec: type %c\n", format->type);
151 printf("\n");
152 }
153 #endif
154
155
156 /*
157 ptr points to the start of the format_spec, end points just past its end.
158 fills in format with the parsed information.
159 returns 1 on success, 0 on failure.
160 if failure, sets the exception
161 */
162 static int
parse_internal_render_format_spec(PyObject * format_spec,Py_ssize_t start,Py_ssize_t end,InternalFormatSpec * format,char default_type,char default_align)163 parse_internal_render_format_spec(PyObject *format_spec,
164 Py_ssize_t start, Py_ssize_t end,
165 InternalFormatSpec *format,
166 char default_type,
167 char default_align)
168 {
169 Py_ssize_t pos = start;
170 int kind = PyUnicode_KIND(format_spec);
171 void *data = PyUnicode_DATA(format_spec);
172 /* end-pos is used throughout this code to specify the length of
173 the input string */
174 #define READ_spec(index) PyUnicode_READ(kind, data, index)
175
176 Py_ssize_t consumed;
177 int align_specified = 0;
178 int fill_char_specified = 0;
179
180 format->fill_char = ' ';
181 format->align = default_align;
182 format->alternate = 0;
183 format->sign = '\0';
184 format->width = -1;
185 format->thousands_separators = LT_NO_LOCALE;
186 format->precision = -1;
187 format->type = default_type;
188
189 /* If the second char is an alignment token,
190 then parse the fill char */
191 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
192 format->align = READ_spec(pos+1);
193 format->fill_char = READ_spec(pos);
194 fill_char_specified = 1;
195 align_specified = 1;
196 pos += 2;
197 }
198 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
199 format->align = READ_spec(pos);
200 align_specified = 1;
201 ++pos;
202 }
203
204 /* Parse the various sign options */
205 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
206 format->sign = READ_spec(pos);
207 ++pos;
208 }
209
210 /* If the next character is #, we're in alternate mode. This only
211 applies to integers. */
212 if (end-pos >= 1 && READ_spec(pos) == '#') {
213 format->alternate = 1;
214 ++pos;
215 }
216
217 /* The special case for 0-padding (backwards compat) */
218 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
219 format->fill_char = '0';
220 if (!align_specified) {
221 format->align = '=';
222 }
223 ++pos;
224 }
225
226 consumed = get_integer(format_spec, &pos, end, &format->width);
227 if (consumed == -1)
228 /* Overflow error. Exception already set. */
229 return 0;
230
231 /* If consumed is 0, we didn't consume any characters for the
232 width. In that case, reset the width to -1, because
233 get_integer() will have set it to zero. -1 is how we record
234 that the width wasn't specified. */
235 if (consumed == 0)
236 format->width = -1;
237
238 /* Comma signifies add thousands separators */
239 if (end-pos && READ_spec(pos) == ',') {
240 format->thousands_separators = LT_DEFAULT_LOCALE;
241 ++pos;
242 }
243 /* Underscore signifies add thousands separators */
244 if (end-pos && READ_spec(pos) == '_') {
245 if (format->thousands_separators != LT_NO_LOCALE) {
246 invalid_comma_and_underscore();
247 return 0;
248 }
249 format->thousands_separators = LT_UNDERSCORE_LOCALE;
250 ++pos;
251 }
252 if (end-pos && READ_spec(pos) == ',') {
253 invalid_comma_and_underscore();
254 return 0;
255 }
256
257 /* Parse field precision */
258 if (end-pos && READ_spec(pos) == '.') {
259 ++pos;
260
261 consumed = get_integer(format_spec, &pos, end, &format->precision);
262 if (consumed == -1)
263 /* Overflow error. Exception already set. */
264 return 0;
265
266 /* Not having a precision after a dot is an error. */
267 if (consumed == 0) {
268 PyErr_Format(PyExc_ValueError,
269 "Format specifier missing precision");
270 return 0;
271 }
272
273 }
274
275 /* Finally, parse the type field. */
276
277 if (end-pos > 1) {
278 /* More than one char remain, invalid format specifier. */
279 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
280 return 0;
281 }
282
283 if (end-pos == 1) {
284 format->type = READ_spec(pos);
285 ++pos;
286 }
287
288 /* Do as much validating as we can, just by looking at the format
289 specifier. Do not take into account what type of formatting
290 we're doing (int, float, string). */
291
292 if (format->thousands_separators) {
293 switch (format->type) {
294 case 'd':
295 case 'e':
296 case 'f':
297 case 'g':
298 case 'E':
299 case 'G':
300 case '%':
301 case 'F':
302 case '\0':
303 /* These are allowed. See PEP 378.*/
304 break;
305 case 'b':
306 case 'o':
307 case 'x':
308 case 'X':
309 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
310 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
311 /* Every four digits, not every three, in bin/oct/hex. */
312 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
313 break;
314 }
315 default:
316 invalid_comma_type(format->type);
317 return 0;
318 }
319 }
320
321 assert (format->align <= 127);
322 assert (format->sign <= 127);
323 return 1;
324 }
325
326 /* Calculate the padding needed. */
327 static void
calc_padding(Py_ssize_t nchars,Py_ssize_t width,Py_UCS4 align,Py_ssize_t * n_lpadding,Py_ssize_t * n_rpadding,Py_ssize_t * n_total)328 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
329 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
330 Py_ssize_t *n_total)
331 {
332 if (width >= 0) {
333 if (nchars > width)
334 *n_total = nchars;
335 else
336 *n_total = width;
337 }
338 else {
339 /* not specified, use all of the chars and no more */
340 *n_total = nchars;
341 }
342
343 /* Figure out how much leading space we need, based on the
344 aligning */
345 if (align == '>')
346 *n_lpadding = *n_total - nchars;
347 else if (align == '^')
348 *n_lpadding = (*n_total - nchars) / 2;
349 else if (align == '<' || align == '=')
350 *n_lpadding = 0;
351 else {
352 /* We should never have an unspecified alignment. */
353 *n_lpadding = 0;
354 assert(0);
355 }
356
357 *n_rpadding = *n_total - nchars - *n_lpadding;
358 }
359
360 /* Do the padding, and return a pointer to where the caller-supplied
361 content goes. */
362 static int
fill_padding(_PyUnicodeWriter * writer,Py_ssize_t nchars,Py_UCS4 fill_char,Py_ssize_t n_lpadding,Py_ssize_t n_rpadding)363 fill_padding(_PyUnicodeWriter *writer,
364 Py_ssize_t nchars,
365 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
366 Py_ssize_t n_rpadding)
367 {
368 Py_ssize_t pos;
369
370 /* Pad on left. */
371 if (n_lpadding) {
372 pos = writer->pos;
373 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
374 }
375
376 /* Pad on right. */
377 if (n_rpadding) {
378 pos = writer->pos + nchars + n_lpadding;
379 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
380 }
381
382 /* Pointer to the user content. */
383 writer->pos += n_lpadding;
384 return 0;
385 }
386
387 /************************************************************************/
388 /*********** common routines for numeric formatting *********************/
389 /************************************************************************/
390
391 /* Locale info needed for formatting integers and the part of floats
392 before and including the decimal. Note that locales only support
393 8-bit chars, not unicode. */
394 typedef struct {
395 PyObject *decimal_point;
396 PyObject *thousands_sep;
397 const char *grouping;
398 } LocaleInfo;
399
400 #define STATIC_LOCALE_INFO_INIT {0, 0, 0}
401
402 /* describes the layout for an integer, see the comment in
403 calc_number_widths() for details */
404 typedef struct {
405 Py_ssize_t n_lpadding;
406 Py_ssize_t n_prefix;
407 Py_ssize_t n_spadding;
408 Py_ssize_t n_rpadding;
409 char sign;
410 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
411 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
412 any grouping chars. */
413 Py_ssize_t n_decimal; /* 0 if only an integer */
414 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
415 excluding the decimal itself, if
416 present. */
417
418 /* These 2 are not the widths of fields, but are needed by
419 STRINGLIB_GROUPING. */
420 Py_ssize_t n_digits; /* The number of digits before a decimal
421 or exponent. */
422 Py_ssize_t n_min_width; /* The min_width we used when we computed
423 the n_grouped_digits width. */
424 } NumberFieldWidths;
425
426
427 /* Given a number of the form:
428 digits[remainder]
429 where ptr points to the start and end points to the end, find where
430 the integer part ends. This could be a decimal, an exponent, both,
431 or neither.
432 If a decimal point is present, set *has_decimal and increment
433 remainder beyond it.
434 Results are undefined (but shouldn't crash) for improperly
435 formatted strings.
436 */
437 static void
parse_number(PyObject * s,Py_ssize_t pos,Py_ssize_t end,Py_ssize_t * n_remainder,int * has_decimal)438 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
439 Py_ssize_t *n_remainder, int *has_decimal)
440 {
441 Py_ssize_t remainder;
442 int kind = PyUnicode_KIND(s);
443 void *data = PyUnicode_DATA(s);
444
445 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
446 ++pos;
447 remainder = pos;
448
449 /* Does remainder start with a decimal point? */
450 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
451
452 /* Skip the decimal point. */
453 if (*has_decimal)
454 remainder++;
455
456 *n_remainder = end - remainder;
457 }
458
459 /* not all fields of format are used. for example, precision is
460 unused. should this take discrete params in order to be more clear
461 about what it does? or is passing a single format parameter easier
462 and more efficient enough to justify a little obfuscation? */
463 static Py_ssize_t
calc_number_widths(NumberFieldWidths * spec,Py_ssize_t n_prefix,Py_UCS4 sign_char,PyObject * number,Py_ssize_t n_start,Py_ssize_t n_end,Py_ssize_t n_remainder,int has_decimal,const LocaleInfo * locale,const InternalFormatSpec * format,Py_UCS4 * maxchar)464 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
465 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
466 Py_ssize_t n_end, Py_ssize_t n_remainder,
467 int has_decimal, const LocaleInfo *locale,
468 const InternalFormatSpec *format, Py_UCS4 *maxchar)
469 {
470 Py_ssize_t n_non_digit_non_padding;
471 Py_ssize_t n_padding;
472
473 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
474 spec->n_lpadding = 0;
475 spec->n_prefix = n_prefix;
476 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
477 spec->n_remainder = n_remainder;
478 spec->n_spadding = 0;
479 spec->n_rpadding = 0;
480 spec->sign = '\0';
481 spec->n_sign = 0;
482
483 /* the output will look like:
484 | |
485 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
486 | |
487
488 sign is computed from format->sign and the actual
489 sign of the number
490
491 prefix is given (it's for the '0x' prefix)
492
493 digits is already known
494
495 the total width is either given, or computed from the
496 actual digits
497
498 only one of lpadding, spadding, and rpadding can be non-zero,
499 and it's calculated from the width and other fields
500 */
501
502 /* compute the various parts we're going to write */
503 switch (format->sign) {
504 case '+':
505 /* always put a + or - */
506 spec->n_sign = 1;
507 spec->sign = (sign_char == '-' ? '-' : '+');
508 break;
509 case ' ':
510 spec->n_sign = 1;
511 spec->sign = (sign_char == '-' ? '-' : ' ');
512 break;
513 default:
514 /* Not specified, or the default (-) */
515 if (sign_char == '-') {
516 spec->n_sign = 1;
517 spec->sign = '-';
518 }
519 }
520
521 /* The number of chars used for non-digits and non-padding. */
522 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
523 spec->n_remainder;
524
525 /* min_width can go negative, that's okay. format->width == -1 means
526 we don't care. */
527 if (format->fill_char == '0' && format->align == '=')
528 spec->n_min_width = format->width - n_non_digit_non_padding;
529 else
530 spec->n_min_width = 0;
531
532 if (spec->n_digits == 0)
533 /* This case only occurs when using 'c' formatting, we need
534 to special case it because the grouping code always wants
535 to have at least one character. */
536 spec->n_grouped_digits = 0;
537 else {
538 Py_UCS4 grouping_maxchar;
539 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
540 NULL, 0,
541 0, NULL,
542 spec->n_digits, spec->n_min_width,
543 locale->grouping, locale->thousands_sep, &grouping_maxchar);
544 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
545 }
546
547 /* Given the desired width and the total of digit and non-digit
548 space we consume, see if we need any padding. format->width can
549 be negative (meaning no padding), but this code still works in
550 that case. */
551 n_padding = format->width -
552 (n_non_digit_non_padding + spec->n_grouped_digits);
553 if (n_padding > 0) {
554 /* Some padding is needed. Determine if it's left, space, or right. */
555 switch (format->align) {
556 case '<':
557 spec->n_rpadding = n_padding;
558 break;
559 case '^':
560 spec->n_lpadding = n_padding / 2;
561 spec->n_rpadding = n_padding - spec->n_lpadding;
562 break;
563 case '=':
564 spec->n_spadding = n_padding;
565 break;
566 case '>':
567 spec->n_lpadding = n_padding;
568 break;
569 default:
570 /* Shouldn't get here, but treat it as '>' */
571 spec->n_lpadding = n_padding;
572 assert(0);
573 break;
574 }
575 }
576
577 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
578 *maxchar = Py_MAX(*maxchar, format->fill_char);
579
580 if (spec->n_decimal)
581 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
582
583 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
584 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
585 spec->n_remainder + spec->n_rpadding;
586 }
587
588 /* Fill in the digit parts of a numbers's string representation,
589 as determined in calc_number_widths().
590 Return -1 on error, or 0 on success. */
591 static int
fill_number(_PyUnicodeWriter * writer,const NumberFieldWidths * spec,PyObject * digits,Py_ssize_t d_start,Py_ssize_t d_end,PyObject * prefix,Py_ssize_t p_start,Py_UCS4 fill_char,LocaleInfo * locale,int toupper)592 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
593 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
594 PyObject *prefix, Py_ssize_t p_start,
595 Py_UCS4 fill_char,
596 LocaleInfo *locale, int toupper)
597 {
598 /* Used to keep track of digits, decimal, and remainder. */
599 Py_ssize_t d_pos = d_start;
600 const unsigned int kind = writer->kind;
601 const void *data = writer->data;
602 Py_ssize_t r;
603
604 if (spec->n_lpadding) {
605 _PyUnicode_FastFill(writer->buffer,
606 writer->pos, spec->n_lpadding, fill_char);
607 writer->pos += spec->n_lpadding;
608 }
609 if (spec->n_sign == 1) {
610 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
611 writer->pos++;
612 }
613 if (spec->n_prefix) {
614 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
615 prefix, p_start,
616 spec->n_prefix);
617 if (toupper) {
618 Py_ssize_t t;
619 for (t = 0; t < spec->n_prefix; t++) {
620 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
621 c = Py_TOUPPER(c);
622 assert (c <= 127);
623 PyUnicode_WRITE(kind, data, writer->pos + t, c);
624 }
625 }
626 writer->pos += spec->n_prefix;
627 }
628 if (spec->n_spadding) {
629 _PyUnicode_FastFill(writer->buffer,
630 writer->pos, spec->n_spadding, fill_char);
631 writer->pos += spec->n_spadding;
632 }
633
634 /* Only for type 'c' special case, it has no digits. */
635 if (spec->n_digits != 0) {
636 /* Fill the digits with InsertThousandsGrouping. */
637 char *pdigits;
638 if (PyUnicode_READY(digits))
639 return -1;
640 pdigits = PyUnicode_DATA(digits);
641 if (PyUnicode_KIND(digits) < kind) {
642 pdigits = _PyUnicode_AsKind(digits, kind);
643 if (pdigits == NULL)
644 return -1;
645 }
646 r = _PyUnicode_InsertThousandsGrouping(
647 writer->buffer, writer->pos,
648 spec->n_grouped_digits,
649 pdigits + kind * d_pos,
650 spec->n_digits, spec->n_min_width,
651 locale->grouping, locale->thousands_sep, NULL);
652 if (r == -1)
653 return -1;
654 assert(r == spec->n_grouped_digits);
655 if (PyUnicode_KIND(digits) < kind)
656 PyMem_Free(pdigits);
657 d_pos += spec->n_digits;
658 }
659 if (toupper) {
660 Py_ssize_t t;
661 for (t = 0; t < spec->n_grouped_digits; t++) {
662 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
663 c = Py_TOUPPER(c);
664 if (c > 127) {
665 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
666 return -1;
667 }
668 PyUnicode_WRITE(kind, data, writer->pos + t, c);
669 }
670 }
671 writer->pos += spec->n_grouped_digits;
672
673 if (spec->n_decimal) {
674 _PyUnicode_FastCopyCharacters(
675 writer->buffer, writer->pos,
676 locale->decimal_point, 0, spec->n_decimal);
677 writer->pos += spec->n_decimal;
678 d_pos += 1;
679 }
680
681 if (spec->n_remainder) {
682 _PyUnicode_FastCopyCharacters(
683 writer->buffer, writer->pos,
684 digits, d_pos, spec->n_remainder);
685 writer->pos += spec->n_remainder;
686 /* d_pos += spec->n_remainder; */
687 }
688
689 if (spec->n_rpadding) {
690 _PyUnicode_FastFill(writer->buffer,
691 writer->pos, spec->n_rpadding,
692 fill_char);
693 writer->pos += spec->n_rpadding;
694 }
695 return 0;
696 }
697
698 static const char no_grouping[1] = {CHAR_MAX};
699
700 /* Find the decimal point character(s?), thousands_separator(s?), and
701 grouping description, either for the current locale if type is
702 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
703 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
704 static int
get_locale_info(enum LocaleType type,LocaleInfo * locale_info)705 get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
706 {
707 switch (type) {
708 case LT_CURRENT_LOCALE: {
709 struct lconv *locale_data = localeconv();
710 locale_info->decimal_point = PyUnicode_DecodeLocale(
711 locale_data->decimal_point,
712 NULL);
713 if (locale_info->decimal_point == NULL)
714 return -1;
715 locale_info->thousands_sep = PyUnicode_DecodeLocale(
716 locale_data->thousands_sep,
717 NULL);
718 if (locale_info->thousands_sep == NULL)
719 return -1;
720 locale_info->grouping = locale_data->grouping;
721 break;
722 }
723 case LT_DEFAULT_LOCALE:
724 case LT_UNDERSCORE_LOCALE:
725 case LT_UNDER_FOUR_LOCALE:
726 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
727 locale_info->thousands_sep = PyUnicode_FromOrdinal(
728 type == LT_DEFAULT_LOCALE ? ',' : '_');
729 if (!locale_info->decimal_point || !locale_info->thousands_sep)
730 return -1;
731 if (type != LT_UNDER_FOUR_LOCALE)
732 locale_info->grouping = "\3"; /* Group every 3 characters. The
733 (implicit) trailing 0 means repeat
734 infinitely. */
735 else
736 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
737 break;
738 case LT_NO_LOCALE:
739 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
740 locale_info->thousands_sep = PyUnicode_New(0, 0);
741 if (!locale_info->decimal_point || !locale_info->thousands_sep)
742 return -1;
743 locale_info->grouping = no_grouping;
744 break;
745 }
746 return 0;
747 }
748
749 static void
free_locale_info(LocaleInfo * locale_info)750 free_locale_info(LocaleInfo *locale_info)
751 {
752 Py_XDECREF(locale_info->decimal_point);
753 Py_XDECREF(locale_info->thousands_sep);
754 }
755
756 /************************************************************************/
757 /*********** string formatting ******************************************/
758 /************************************************************************/
759
760 static int
format_string_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)761 format_string_internal(PyObject *value, const InternalFormatSpec *format,
762 _PyUnicodeWriter *writer)
763 {
764 Py_ssize_t lpad;
765 Py_ssize_t rpad;
766 Py_ssize_t total;
767 Py_ssize_t len;
768 int result = -1;
769 Py_UCS4 maxchar;
770
771 assert(PyUnicode_IS_READY(value));
772 len = PyUnicode_GET_LENGTH(value);
773
774 /* sign is not allowed on strings */
775 if (format->sign != '\0') {
776 PyErr_SetString(PyExc_ValueError,
777 "Sign not allowed in string format specifier");
778 goto done;
779 }
780
781 /* alternate is not allowed on strings */
782 if (format->alternate) {
783 PyErr_SetString(PyExc_ValueError,
784 "Alternate form (#) not allowed in string format "
785 "specifier");
786 goto done;
787 }
788
789 /* '=' alignment not allowed on strings */
790 if (format->align == '=') {
791 PyErr_SetString(PyExc_ValueError,
792 "'=' alignment not allowed "
793 "in string format specifier");
794 goto done;
795 }
796
797 if ((format->width == -1 || format->width <= len)
798 && (format->precision == -1 || format->precision >= len)) {
799 /* Fast path */
800 return _PyUnicodeWriter_WriteStr(writer, value);
801 }
802
803 /* if precision is specified, output no more that format.precision
804 characters */
805 if (format->precision >= 0 && len >= format->precision) {
806 len = format->precision;
807 }
808
809 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
810
811 maxchar = writer->maxchar;
812 if (lpad != 0 || rpad != 0)
813 maxchar = Py_MAX(maxchar, format->fill_char);
814 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
815 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
816 maxchar = Py_MAX(maxchar, valmaxchar);
817 }
818
819 /* allocate the resulting string */
820 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
821 goto done;
822
823 /* Write into that space. First the padding. */
824 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
825 if (result == -1)
826 goto done;
827
828 /* Then the source string. */
829 if (len) {
830 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
831 value, 0, len);
832 }
833 writer->pos += (len + rpad);
834 result = 0;
835
836 done:
837 return result;
838 }
839
840
841 /************************************************************************/
842 /*********** long formatting ********************************************/
843 /************************************************************************/
844
845 static int
format_long_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)846 format_long_internal(PyObject *value, const InternalFormatSpec *format,
847 _PyUnicodeWriter *writer)
848 {
849 int result = -1;
850 Py_UCS4 maxchar = 127;
851 PyObject *tmp = NULL;
852 Py_ssize_t inumeric_chars;
853 Py_UCS4 sign_char = '\0';
854 Py_ssize_t n_digits; /* count of digits need from the computed
855 string */
856 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
857 produces non-digits */
858 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
859 Py_ssize_t n_total;
860 Py_ssize_t prefix = 0;
861 NumberFieldWidths spec;
862 long x;
863
864 /* Locale settings, either from the actual locale or
865 from a hard-code pseudo-locale */
866 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
867
868 /* no precision allowed on integers */
869 if (format->precision != -1) {
870 PyErr_SetString(PyExc_ValueError,
871 "Precision not allowed in integer format specifier");
872 goto done;
873 }
874
875 /* special case for character formatting */
876 if (format->type == 'c') {
877 /* error to specify a sign */
878 if (format->sign != '\0') {
879 PyErr_SetString(PyExc_ValueError,
880 "Sign not allowed with integer"
881 " format specifier 'c'");
882 goto done;
883 }
884 /* error to request alternate format */
885 if (format->alternate) {
886 PyErr_SetString(PyExc_ValueError,
887 "Alternate form (#) not allowed with integer"
888 " format specifier 'c'");
889 goto done;
890 }
891
892 /* taken from unicodeobject.c formatchar() */
893 /* Integer input truncated to a character */
894 x = PyLong_AsLong(value);
895 if (x == -1 && PyErr_Occurred())
896 goto done;
897 if (x < 0 || x > 0x10ffff) {
898 PyErr_SetString(PyExc_OverflowError,
899 "%c arg not in range(0x110000)");
900 goto done;
901 }
902 tmp = PyUnicode_FromOrdinal(x);
903 inumeric_chars = 0;
904 n_digits = 1;
905 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
906
907 /* As a sort-of hack, we tell calc_number_widths that we only
908 have "remainder" characters. calc_number_widths thinks
909 these are characters that don't get formatted, only copied
910 into the output string. We do this for 'c' formatting,
911 because the characters are likely to be non-digits. */
912 n_remainder = 1;
913 }
914 else {
915 int base;
916 int leading_chars_to_skip = 0; /* Number of characters added by
917 PyNumber_ToBase that we want to
918 skip over. */
919
920 /* Compute the base and how many characters will be added by
921 PyNumber_ToBase */
922 switch (format->type) {
923 case 'b':
924 base = 2;
925 leading_chars_to_skip = 2; /* 0b */
926 break;
927 case 'o':
928 base = 8;
929 leading_chars_to_skip = 2; /* 0o */
930 break;
931 case 'x':
932 case 'X':
933 base = 16;
934 leading_chars_to_skip = 2; /* 0x */
935 break;
936 default: /* shouldn't be needed, but stops a compiler warning */
937 case 'd':
938 case 'n':
939 base = 10;
940 break;
941 }
942
943 if (format->sign != '+' && format->sign != ' '
944 && format->width == -1
945 && format->type != 'X' && format->type != 'n'
946 && !format->thousands_separators
947 && PyLong_CheckExact(value))
948 {
949 /* Fast path */
950 return _PyLong_FormatWriter(writer, value, base, format->alternate);
951 }
952
953 /* The number of prefix chars is the same as the leading
954 chars to skip */
955 if (format->alternate)
956 n_prefix = leading_chars_to_skip;
957
958 /* Do the hard part, converting to a string in a given base */
959 tmp = _PyLong_Format(value, base);
960 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
961 goto done;
962
963 inumeric_chars = 0;
964 n_digits = PyUnicode_GET_LENGTH(tmp);
965
966 prefix = inumeric_chars;
967
968 /* Is a sign character present in the output? If so, remember it
969 and skip it */
970 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
971 sign_char = '-';
972 ++prefix;
973 ++leading_chars_to_skip;
974 }
975
976 /* Skip over the leading chars (0x, 0b, etc.) */
977 n_digits -= leading_chars_to_skip;
978 inumeric_chars += leading_chars_to_skip;
979 }
980
981 /* Determine the grouping, separator, and decimal point, if any. */
982 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
983 format->thousands_separators,
984 &locale) == -1)
985 goto done;
986
987 /* Calculate how much memory we'll need. */
988 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
989 inumeric_chars + n_digits, n_remainder, 0,
990 &locale, format, &maxchar);
991
992 /* Allocate the memory. */
993 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
994 goto done;
995
996 /* Populate the memory. */
997 result = fill_number(writer, &spec,
998 tmp, inumeric_chars, inumeric_chars + n_digits,
999 tmp, prefix, format->fill_char,
1000 &locale, format->type == 'X');
1001
1002 done:
1003 Py_XDECREF(tmp);
1004 free_locale_info(&locale);
1005 return result;
1006 }
1007
1008 /************************************************************************/
1009 /*********** float formatting *******************************************/
1010 /************************************************************************/
1011
1012 /* much of this is taken from unicodeobject.c */
1013 static int
format_float_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1014 format_float_internal(PyObject *value,
1015 const InternalFormatSpec *format,
1016 _PyUnicodeWriter *writer)
1017 {
1018 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1019 Py_ssize_t n_digits;
1020 Py_ssize_t n_remainder;
1021 Py_ssize_t n_total;
1022 int has_decimal;
1023 double val;
1024 int precision, default_precision = 6;
1025 Py_UCS4 type = format->type;
1026 int add_pct = 0;
1027 Py_ssize_t index;
1028 NumberFieldWidths spec;
1029 int flags = 0;
1030 int result = -1;
1031 Py_UCS4 maxchar = 127;
1032 Py_UCS4 sign_char = '\0';
1033 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1034 PyObject *unicode_tmp = NULL;
1035
1036 /* Locale settings, either from the actual locale or
1037 from a hard-code pseudo-locale */
1038 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
1039
1040 if (format->precision > INT_MAX) {
1041 PyErr_SetString(PyExc_ValueError, "precision too big");
1042 goto done;
1043 }
1044 precision = (int)format->precision;
1045
1046 if (format->alternate)
1047 flags |= Py_DTSF_ALT;
1048
1049 if (type == '\0') {
1050 /* Omitted type specifier. Behaves in the same way as repr(x)
1051 and str(x) if no precision is given, else like 'g', but with
1052 at least one digit after the decimal point. */
1053 flags |= Py_DTSF_ADD_DOT_0;
1054 type = 'r';
1055 default_precision = 0;
1056 }
1057
1058 if (type == 'n')
1059 /* 'n' is the same as 'g', except for the locale used to
1060 format the result. We take care of that later. */
1061 type = 'g';
1062
1063 val = PyFloat_AsDouble(value);
1064 if (val == -1.0 && PyErr_Occurred())
1065 goto done;
1066
1067 if (type == '%') {
1068 type = 'f';
1069 val *= 100;
1070 add_pct = 1;
1071 }
1072
1073 if (precision < 0)
1074 precision = default_precision;
1075 else if (type == 'r')
1076 type = 'g';
1077
1078 /* Cast "type", because if we're in unicode we need to pass an
1079 8-bit char. This is safe, because we've restricted what "type"
1080 can be. */
1081 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1082 &float_type);
1083 if (buf == NULL)
1084 goto done;
1085 n_digits = strlen(buf);
1086
1087 if (add_pct) {
1088 /* We know that buf has a trailing zero (since we just called
1089 strlen() on it), and we don't use that fact any more. So we
1090 can just write over the trailing zero. */
1091 buf[n_digits] = '%';
1092 n_digits += 1;
1093 }
1094
1095 if (format->sign != '+' && format->sign != ' '
1096 && format->width == -1
1097 && format->type != 'n'
1098 && !format->thousands_separators)
1099 {
1100 /* Fast path */
1101 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1102 PyMem_Free(buf);
1103 return result;
1104 }
1105
1106 /* Since there is no unicode version of PyOS_double_to_string,
1107 just use the 8 bit version and then convert to unicode. */
1108 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1109 PyMem_Free(buf);
1110 if (unicode_tmp == NULL)
1111 goto done;
1112
1113 /* Is a sign character present in the output? If so, remember it
1114 and skip it */
1115 index = 0;
1116 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1117 sign_char = '-';
1118 ++index;
1119 --n_digits;
1120 }
1121
1122 /* Determine if we have any "remainder" (after the digits, might include
1123 decimal or exponent or both (or neither)) */
1124 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1125
1126 /* Determine the grouping, separator, and decimal point, if any. */
1127 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1128 format->thousands_separators,
1129 &locale) == -1)
1130 goto done;
1131
1132 /* Calculate how much memory we'll need. */
1133 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
1134 index + n_digits, n_remainder, has_decimal,
1135 &locale, format, &maxchar);
1136
1137 /* Allocate the memory. */
1138 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1139 goto done;
1140
1141 /* Populate the memory. */
1142 result = fill_number(writer, &spec,
1143 unicode_tmp, index, index + n_digits,
1144 NULL, 0, format->fill_char,
1145 &locale, 0);
1146
1147 done:
1148 Py_XDECREF(unicode_tmp);
1149 free_locale_info(&locale);
1150 return result;
1151 }
1152
1153 /************************************************************************/
1154 /*********** complex formatting *****************************************/
1155 /************************************************************************/
1156
1157 static int
format_complex_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1158 format_complex_internal(PyObject *value,
1159 const InternalFormatSpec *format,
1160 _PyUnicodeWriter *writer)
1161 {
1162 double re;
1163 double im;
1164 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1165 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1166
1167 InternalFormatSpec tmp_format = *format;
1168 Py_ssize_t n_re_digits;
1169 Py_ssize_t n_im_digits;
1170 Py_ssize_t n_re_remainder;
1171 Py_ssize_t n_im_remainder;
1172 Py_ssize_t n_re_total;
1173 Py_ssize_t n_im_total;
1174 int re_has_decimal;
1175 int im_has_decimal;
1176 int precision, default_precision = 6;
1177 Py_UCS4 type = format->type;
1178 Py_ssize_t i_re;
1179 Py_ssize_t i_im;
1180 NumberFieldWidths re_spec;
1181 NumberFieldWidths im_spec;
1182 int flags = 0;
1183 int result = -1;
1184 Py_UCS4 maxchar = 127;
1185 enum PyUnicode_Kind rkind;
1186 void *rdata;
1187 Py_UCS4 re_sign_char = '\0';
1188 Py_UCS4 im_sign_char = '\0';
1189 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1190 int im_float_type;
1191 int add_parens = 0;
1192 int skip_re = 0;
1193 Py_ssize_t lpad;
1194 Py_ssize_t rpad;
1195 Py_ssize_t total;
1196 PyObject *re_unicode_tmp = NULL;
1197 PyObject *im_unicode_tmp = NULL;
1198
1199 /* Locale settings, either from the actual locale or
1200 from a hard-code pseudo-locale */
1201 LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
1202
1203 if (format->precision > INT_MAX) {
1204 PyErr_SetString(PyExc_ValueError, "precision too big");
1205 goto done;
1206 }
1207 precision = (int)format->precision;
1208
1209 /* Zero padding is not allowed. */
1210 if (format->fill_char == '0') {
1211 PyErr_SetString(PyExc_ValueError,
1212 "Zero padding is not allowed in complex format "
1213 "specifier");
1214 goto done;
1215 }
1216
1217 /* Neither is '=' alignment . */
1218 if (format->align == '=') {
1219 PyErr_SetString(PyExc_ValueError,
1220 "'=' alignment flag is not allowed in complex format "
1221 "specifier");
1222 goto done;
1223 }
1224
1225 re = PyComplex_RealAsDouble(value);
1226 if (re == -1.0 && PyErr_Occurred())
1227 goto done;
1228 im = PyComplex_ImagAsDouble(value);
1229 if (im == -1.0 && PyErr_Occurred())
1230 goto done;
1231
1232 if (format->alternate)
1233 flags |= Py_DTSF_ALT;
1234
1235 if (type == '\0') {
1236 /* Omitted type specifier. Should be like str(self). */
1237 type = 'r';
1238 default_precision = 0;
1239 if (re == 0.0 && copysign(1.0, re) == 1.0)
1240 skip_re = 1;
1241 else
1242 add_parens = 1;
1243 }
1244
1245 if (type == 'n')
1246 /* 'n' is the same as 'g', except for the locale used to
1247 format the result. We take care of that later. */
1248 type = 'g';
1249
1250 if (precision < 0)
1251 precision = default_precision;
1252 else if (type == 'r')
1253 type = 'g';
1254
1255 /* Cast "type", because if we're in unicode we need to pass an
1256 8-bit char. This is safe, because we've restricted what "type"
1257 can be. */
1258 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1259 &re_float_type);
1260 if (re_buf == NULL)
1261 goto done;
1262 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1263 &im_float_type);
1264 if (im_buf == NULL)
1265 goto done;
1266
1267 n_re_digits = strlen(re_buf);
1268 n_im_digits = strlen(im_buf);
1269
1270 /* Since there is no unicode version of PyOS_double_to_string,
1271 just use the 8 bit version and then convert to unicode. */
1272 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1273 if (re_unicode_tmp == NULL)
1274 goto done;
1275 i_re = 0;
1276
1277 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1278 if (im_unicode_tmp == NULL)
1279 goto done;
1280 i_im = 0;
1281
1282 /* Is a sign character present in the output? If so, remember it
1283 and skip it */
1284 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1285 re_sign_char = '-';
1286 ++i_re;
1287 --n_re_digits;
1288 }
1289 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1290 im_sign_char = '-';
1291 ++i_im;
1292 --n_im_digits;
1293 }
1294
1295 /* Determine if we have any "remainder" (after the digits, might include
1296 decimal or exponent or both (or neither)) */
1297 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1298 &n_re_remainder, &re_has_decimal);
1299 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1300 &n_im_remainder, &im_has_decimal);
1301
1302 /* Determine the grouping, separator, and decimal point, if any. */
1303 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1304 format->thousands_separators,
1305 &locale) == -1)
1306 goto done;
1307
1308 /* Turn off any padding. We'll do it later after we've composed
1309 the numbers without padding. */
1310 tmp_format.fill_char = '\0';
1311 tmp_format.align = '<';
1312 tmp_format.width = -1;
1313
1314 /* Calculate how much memory we'll need. */
1315 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1316 i_re, i_re + n_re_digits, n_re_remainder,
1317 re_has_decimal, &locale, &tmp_format,
1318 &maxchar);
1319
1320 /* Same formatting, but always include a sign, unless the real part is
1321 * going to be omitted, in which case we use whatever sign convention was
1322 * requested by the original format. */
1323 if (!skip_re)
1324 tmp_format.sign = '+';
1325 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1326 i_im, i_im + n_im_digits, n_im_remainder,
1327 im_has_decimal, &locale, &tmp_format,
1328 &maxchar);
1329
1330 if (skip_re)
1331 n_re_total = 0;
1332
1333 /* Add 1 for the 'j', and optionally 2 for parens. */
1334 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1335 format->width, format->align, &lpad, &rpad, &total);
1336
1337 if (lpad || rpad)
1338 maxchar = Py_MAX(maxchar, format->fill_char);
1339
1340 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1341 goto done;
1342 rkind = writer->kind;
1343 rdata = writer->data;
1344
1345 /* Populate the memory. First, the padding. */
1346 result = fill_padding(writer,
1347 n_re_total + n_im_total + 1 + add_parens * 2,
1348 format->fill_char, lpad, rpad);
1349 if (result == -1)
1350 goto done;
1351
1352 if (add_parens) {
1353 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1354 writer->pos++;
1355 }
1356
1357 if (!skip_re) {
1358 result = fill_number(writer, &re_spec,
1359 re_unicode_tmp, i_re, i_re + n_re_digits,
1360 NULL, 0,
1361 0,
1362 &locale, 0);
1363 if (result == -1)
1364 goto done;
1365 }
1366 result = fill_number(writer, &im_spec,
1367 im_unicode_tmp, i_im, i_im + n_im_digits,
1368 NULL, 0,
1369 0,
1370 &locale, 0);
1371 if (result == -1)
1372 goto done;
1373 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1374 writer->pos++;
1375
1376 if (add_parens) {
1377 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1378 writer->pos++;
1379 }
1380
1381 writer->pos += rpad;
1382
1383 done:
1384 PyMem_Free(re_buf);
1385 PyMem_Free(im_buf);
1386 Py_XDECREF(re_unicode_tmp);
1387 Py_XDECREF(im_unicode_tmp);
1388 free_locale_info(&locale);
1389 return result;
1390 }
1391
1392 /************************************************************************/
1393 /*********** built in formatters ****************************************/
1394 /************************************************************************/
1395 static int
format_obj(PyObject * obj,_PyUnicodeWriter * writer)1396 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1397 {
1398 PyObject *str;
1399 int err;
1400
1401 str = PyObject_Str(obj);
1402 if (str == NULL)
1403 return -1;
1404 err = _PyUnicodeWriter_WriteStr(writer, str);
1405 Py_DECREF(str);
1406 return err;
1407 }
1408
1409 int
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1410 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1411 PyObject *obj,
1412 PyObject *format_spec,
1413 Py_ssize_t start, Py_ssize_t end)
1414 {
1415 InternalFormatSpec format;
1416
1417 assert(PyUnicode_Check(obj));
1418
1419 /* check for the special case of zero length format spec, make
1420 it equivalent to str(obj) */
1421 if (start == end) {
1422 if (PyUnicode_CheckExact(obj))
1423 return _PyUnicodeWriter_WriteStr(writer, obj);
1424 else
1425 return format_obj(obj, writer);
1426 }
1427
1428 /* parse the format_spec */
1429 if (!parse_internal_render_format_spec(format_spec, start, end,
1430 &format, 's', '<'))
1431 return -1;
1432
1433 /* type conversion? */
1434 switch (format.type) {
1435 case 's':
1436 /* no type conversion needed, already a string. do the formatting */
1437 return format_string_internal(obj, &format, writer);
1438 default:
1439 /* unknown */
1440 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1441 return -1;
1442 }
1443 }
1444
1445 int
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1446 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1447 PyObject *obj,
1448 PyObject *format_spec,
1449 Py_ssize_t start, Py_ssize_t end)
1450 {
1451 PyObject *tmp = NULL, *str = NULL;
1452 InternalFormatSpec format;
1453 int result = -1;
1454
1455 /* check for the special case of zero length format spec, make
1456 it equivalent to str(obj) */
1457 if (start == end) {
1458 if (PyLong_CheckExact(obj))
1459 return _PyLong_FormatWriter(writer, obj, 10, 0);
1460 else
1461 return format_obj(obj, writer);
1462 }
1463
1464 /* parse the format_spec */
1465 if (!parse_internal_render_format_spec(format_spec, start, end,
1466 &format, 'd', '>'))
1467 goto done;
1468
1469 /* type conversion? */
1470 switch (format.type) {
1471 case 'b':
1472 case 'c':
1473 case 'd':
1474 case 'o':
1475 case 'x':
1476 case 'X':
1477 case 'n':
1478 /* no type conversion needed, already an int. do the formatting */
1479 result = format_long_internal(obj, &format, writer);
1480 break;
1481
1482 case 'e':
1483 case 'E':
1484 case 'f':
1485 case 'F':
1486 case 'g':
1487 case 'G':
1488 case '%':
1489 /* convert to float */
1490 tmp = PyNumber_Float(obj);
1491 if (tmp == NULL)
1492 goto done;
1493 result = format_float_internal(tmp, &format, writer);
1494 break;
1495
1496 default:
1497 /* unknown */
1498 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1499 goto done;
1500 }
1501
1502 done:
1503 Py_XDECREF(tmp);
1504 Py_XDECREF(str);
1505 return result;
1506 }
1507
1508 int
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1509 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1510 PyObject *obj,
1511 PyObject *format_spec,
1512 Py_ssize_t start, Py_ssize_t end)
1513 {
1514 InternalFormatSpec format;
1515
1516 /* check for the special case of zero length format spec, make
1517 it equivalent to str(obj) */
1518 if (start == end)
1519 return format_obj(obj, writer);
1520
1521 /* parse the format_spec */
1522 if (!parse_internal_render_format_spec(format_spec, start, end,
1523 &format, '\0', '>'))
1524 return -1;
1525
1526 /* type conversion? */
1527 switch (format.type) {
1528 case '\0': /* No format code: like 'g', but with at least one decimal. */
1529 case 'e':
1530 case 'E':
1531 case 'f':
1532 case 'F':
1533 case 'g':
1534 case 'G':
1535 case 'n':
1536 case '%':
1537 /* no conversion, already a float. do the formatting */
1538 return format_float_internal(obj, &format, writer);
1539
1540 default:
1541 /* unknown */
1542 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1543 return -1;
1544 }
1545 }
1546
1547 int
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1548 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1549 PyObject *obj,
1550 PyObject *format_spec,
1551 Py_ssize_t start, Py_ssize_t end)
1552 {
1553 InternalFormatSpec format;
1554
1555 /* check for the special case of zero length format spec, make
1556 it equivalent to str(obj) */
1557 if (start == end)
1558 return format_obj(obj, writer);
1559
1560 /* parse the format_spec */
1561 if (!parse_internal_render_format_spec(format_spec, start, end,
1562 &format, '\0', '>'))
1563 return -1;
1564
1565 /* type conversion? */
1566 switch (format.type) {
1567 case '\0': /* No format code: like 'g', but with at least one decimal. */
1568 case 'e':
1569 case 'E':
1570 case 'f':
1571 case 'F':
1572 case 'g':
1573 case 'G':
1574 case 'n':
1575 /* no conversion, already a complex. do the formatting */
1576 return format_complex_internal(obj, &format, writer);
1577
1578 default:
1579 /* unknown */
1580 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1581 return -1;
1582 }
1583 }
1584