1 #include "Python.h"
2 #include "pycore_getopt.h"        // _PyOS_GetOpt()
3 #include "pycore_initconfig.h"    // _PyArgv
4 #include "pycore_pymem.h"         // _PyMem_GetAllocatorName()
5 #include "pycore_runtime.h"       // _PyRuntime_Initialize()
6 #include <locale.h>               // setlocale()
7 
8 
9 #define DECODE_LOCALE_ERR(NAME, LEN) \
10     (((LEN) == -2) \
11      ? _PyStatus_ERR("cannot decode " NAME) \
12      : _PyStatus_NO_MEMORY())
13 
14 
15 /* Forward declarations */
16 static void
17 preconfig_copy(PyPreConfig *config, const PyPreConfig *config2);
18 
19 
20 /* --- File system encoding/errors -------------------------------- */
21 
22 /* The filesystem encoding is chosen by config_init_fs_encoding(),
23    see also initfsencoding().
24 
25    Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
26    are encoded to UTF-8. */
27 const char *Py_FileSystemDefaultEncoding = NULL;
28 int Py_HasFileSystemDefaultEncoding = 0;
29 const char *Py_FileSystemDefaultEncodeErrors = NULL;
30 int _Py_HasFileSystemDefaultEncodeErrors = 0;
31 
32 void
_Py_ClearFileSystemEncoding(void)33 _Py_ClearFileSystemEncoding(void)
34 {
35     if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
36         PyMem_RawFree((char*)Py_FileSystemDefaultEncoding);
37         Py_FileSystemDefaultEncoding = NULL;
38     }
39     if (!_Py_HasFileSystemDefaultEncodeErrors && Py_FileSystemDefaultEncodeErrors) {
40         PyMem_RawFree((char*)Py_FileSystemDefaultEncodeErrors);
41         Py_FileSystemDefaultEncodeErrors = NULL;
42     }
43 }
44 
45 
46 /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
47    global configuration variables. */
48 int
_Py_SetFileSystemEncoding(const char * encoding,const char * errors)49 _Py_SetFileSystemEncoding(const char *encoding, const char *errors)
50 {
51     char *encoding2 = _PyMem_RawStrdup(encoding);
52     if (encoding2 == NULL) {
53         return -1;
54     }
55 
56     char *errors2 = _PyMem_RawStrdup(errors);
57     if (errors2 == NULL) {
58         PyMem_RawFree(encoding2);
59         return -1;
60     }
61 
62     _Py_ClearFileSystemEncoding();
63 
64     Py_FileSystemDefaultEncoding = encoding2;
65     Py_HasFileSystemDefaultEncoding = 0;
66 
67     Py_FileSystemDefaultEncodeErrors = errors2;
68     _Py_HasFileSystemDefaultEncodeErrors = 0;
69     return 0;
70 }
71 
72 
73 /* --- _PyArgv ---------------------------------------------------- */
74 
75 /* Decode bytes_argv using Py_DecodeLocale() */
76 PyStatus
_PyArgv_AsWstrList(const _PyArgv * args,PyWideStringList * list)77 _PyArgv_AsWstrList(const _PyArgv *args, PyWideStringList *list)
78 {
79     PyWideStringList wargv = _PyWideStringList_INIT;
80     if (args->use_bytes_argv) {
81         size_t size = sizeof(wchar_t*) * args->argc;
82         wargv.items = (wchar_t **)PyMem_RawMalloc(size);
83         if (wargv.items == NULL) {
84             return _PyStatus_NO_MEMORY();
85         }
86 
87         for (Py_ssize_t i = 0; i < args->argc; i++) {
88             size_t len;
89             wchar_t *arg = Py_DecodeLocale(args->bytes_argv[i], &len);
90             if (arg == NULL) {
91                 _PyWideStringList_Clear(&wargv);
92                 return DECODE_LOCALE_ERR("command line arguments",
93                                          (Py_ssize_t)len);
94             }
95             wargv.items[i] = arg;
96             wargv.length++;
97         }
98 
99         _PyWideStringList_Clear(list);
100         *list = wargv;
101     }
102     else {
103         wargv.length = args->argc;
104         wargv.items = (wchar_t **)args->wchar_argv;
105         if (_PyWideStringList_Copy(list, &wargv) < 0) {
106             return _PyStatus_NO_MEMORY();
107         }
108     }
109     return _PyStatus_OK();
110 }
111 
112 
113 /* --- _PyPreCmdline ------------------------------------------------- */
114 
115 void
_PyPreCmdline_Clear(_PyPreCmdline * cmdline)116 _PyPreCmdline_Clear(_PyPreCmdline *cmdline)
117 {
118     _PyWideStringList_Clear(&cmdline->argv);
119     _PyWideStringList_Clear(&cmdline->xoptions);
120 }
121 
122 
123 PyStatus
_PyPreCmdline_SetArgv(_PyPreCmdline * cmdline,const _PyArgv * args)124 _PyPreCmdline_SetArgv(_PyPreCmdline *cmdline, const _PyArgv *args)
125 {
126     return _PyArgv_AsWstrList(args, &cmdline->argv);
127 }
128 
129 
130 static void
precmdline_get_preconfig(_PyPreCmdline * cmdline,const PyPreConfig * config)131 precmdline_get_preconfig(_PyPreCmdline *cmdline, const PyPreConfig *config)
132 {
133 #define COPY_ATTR(ATTR) \
134     if (config->ATTR != -1) { \
135         cmdline->ATTR = config->ATTR; \
136     }
137 
138     COPY_ATTR(isolated);
139     COPY_ATTR(use_environment);
140     COPY_ATTR(dev_mode);
141 
142 #undef COPY_ATTR
143 }
144 
145 
146 static void
precmdline_set_preconfig(const _PyPreCmdline * cmdline,PyPreConfig * config)147 precmdline_set_preconfig(const _PyPreCmdline *cmdline, PyPreConfig *config)
148 {
149 #define COPY_ATTR(ATTR) \
150     config->ATTR = cmdline->ATTR
151 
152     COPY_ATTR(isolated);
153     COPY_ATTR(use_environment);
154     COPY_ATTR(dev_mode);
155 
156 #undef COPY_ATTR
157 }
158 
159 
160 PyStatus
_PyPreCmdline_SetConfig(const _PyPreCmdline * cmdline,PyConfig * config)161 _PyPreCmdline_SetConfig(const _PyPreCmdline *cmdline, PyConfig *config)
162 {
163 #define COPY_ATTR(ATTR) \
164     config->ATTR = cmdline->ATTR
165 
166     PyStatus status = _PyWideStringList_Extend(&config->xoptions, &cmdline->xoptions);
167     if (_PyStatus_EXCEPTION(status)) {
168         return status;
169     }
170 
171     COPY_ATTR(isolated);
172     COPY_ATTR(use_environment);
173     COPY_ATTR(dev_mode);
174     return _PyStatus_OK();
175 
176 #undef COPY_ATTR
177 }
178 
179 
180 /* Parse the command line arguments */
181 static PyStatus
precmdline_parse_cmdline(_PyPreCmdline * cmdline)182 precmdline_parse_cmdline(_PyPreCmdline *cmdline)
183 {
184     const PyWideStringList *argv = &cmdline->argv;
185 
186     _PyOS_ResetGetOpt();
187     /* Don't log parsing errors into stderr here: PyConfig_Read()
188        is responsible for that */
189     _PyOS_opterr = 0;
190     do {
191         int longindex = -1;
192         int c = _PyOS_GetOpt(argv->length, argv->items, &longindex);
193 
194         if (c == EOF || c == 'c' || c == 'm') {
195             break;
196         }
197 
198         switch (c) {
199         case 'E':
200             cmdline->use_environment = 0;
201             break;
202 
203         case 'I':
204             cmdline->isolated = 1;
205             break;
206 
207         case 'X':
208         {
209             PyStatus status = PyWideStringList_Append(&cmdline->xoptions,
210                                                       _PyOS_optarg);
211             if (_PyStatus_EXCEPTION(status)) {
212                 return status;
213             }
214             break;
215         }
216 
217         default:
218             /* ignore other argument:
219                handled by PyConfig_Read() */
220             break;
221         }
222     } while (1);
223 
224     return _PyStatus_OK();
225 }
226 
227 
228 PyStatus
_PyPreCmdline_Read(_PyPreCmdline * cmdline,const PyPreConfig * preconfig)229 _PyPreCmdline_Read(_PyPreCmdline *cmdline, const PyPreConfig *preconfig)
230 {
231     precmdline_get_preconfig(cmdline, preconfig);
232 
233     if (preconfig->parse_argv) {
234         PyStatus status = precmdline_parse_cmdline(cmdline);
235         if (_PyStatus_EXCEPTION(status)) {
236             return status;
237         }
238     }
239 
240     /* isolated, use_environment */
241     if (cmdline->isolated < 0) {
242         cmdline->isolated = 0;
243     }
244     if (cmdline->isolated > 0) {
245         cmdline->use_environment = 0;
246     }
247     if (cmdline->use_environment < 0) {
248         cmdline->use_environment = 0;
249     }
250 
251     /* dev_mode */
252     if ((cmdline->dev_mode < 0)
253         && (_Py_get_xoption(&cmdline->xoptions, L"dev")
254             || _Py_GetEnv(cmdline->use_environment, "PYTHONDEVMODE")))
255     {
256         cmdline->dev_mode = 1;
257     }
258     if (cmdline->dev_mode < 0) {
259         cmdline->dev_mode = 0;
260     }
261 
262     assert(cmdline->use_environment >= 0);
263     assert(cmdline->isolated >= 0);
264     assert(cmdline->dev_mode >= 0);
265 
266     return _PyStatus_OK();
267 }
268 
269 
270 /* --- PyPreConfig ----------------------------------------------- */
271 
272 
273 void
_PyPreConfig_InitCompatConfig(PyPreConfig * config)274 _PyPreConfig_InitCompatConfig(PyPreConfig *config)
275 {
276     memset(config, 0, sizeof(*config));
277 
278     config->_config_init = (int)_PyConfig_INIT_COMPAT;
279     config->parse_argv = 0;
280     config->isolated = -1;
281     config->use_environment = -1;
282     config->configure_locale = 1;
283 
284     /* bpo-36443: C locale coercion (PEP 538) and UTF-8 Mode (PEP 540)
285        are disabled by default using the Compat configuration.
286 
287        Py_UTF8Mode=1 enables the UTF-8 mode. PYTHONUTF8 environment variable
288        is ignored (even if use_environment=1). */
289     config->utf8_mode = 0;
290     config->coerce_c_locale = 0;
291     config->coerce_c_locale_warn = 0;
292 
293     config->dev_mode = -1;
294     config->allocator = PYMEM_ALLOCATOR_NOT_SET;
295 #ifdef MS_WINDOWS
296     config->legacy_windows_fs_encoding = -1;
297 #endif
298 }
299 
300 
301 void
PyPreConfig_InitPythonConfig(PyPreConfig * config)302 PyPreConfig_InitPythonConfig(PyPreConfig *config)
303 {
304     _PyPreConfig_InitCompatConfig(config);
305 
306     config->_config_init = (int)_PyConfig_INIT_PYTHON;
307     config->isolated = 0;
308     config->parse_argv = 1;
309     config->use_environment = 1;
310     /* Set to -1 to enable C locale coercion (PEP 538) and UTF-8 Mode (PEP 540)
311        depending on the LC_CTYPE locale, PYTHONUTF8 and PYTHONCOERCECLOCALE
312        environment variables. */
313     config->coerce_c_locale = -1;
314     config->coerce_c_locale_warn = -1;
315     config->utf8_mode = -1;
316 #ifdef MS_WINDOWS
317     config->legacy_windows_fs_encoding = 0;
318 #endif
319 }
320 
321 
322 void
PyPreConfig_InitIsolatedConfig(PyPreConfig * config)323 PyPreConfig_InitIsolatedConfig(PyPreConfig *config)
324 {
325     _PyPreConfig_InitCompatConfig(config);
326 
327     config->_config_init = (int)_PyConfig_INIT_ISOLATED;
328     config->configure_locale = 0;
329     config->isolated = 1;
330     config->use_environment = 0;
331     config->utf8_mode = 0;
332     config->dev_mode = 0;
333 #ifdef MS_WINDOWS
334     config->legacy_windows_fs_encoding = 0;
335 #endif
336 }
337 
338 
339 PyStatus
_PyPreConfig_InitFromPreConfig(PyPreConfig * config,const PyPreConfig * config2)340 _PyPreConfig_InitFromPreConfig(PyPreConfig *config,
341                                const PyPreConfig *config2)
342 {
343     PyPreConfig_InitPythonConfig(config);
344     preconfig_copy(config, config2);
345     return _PyStatus_OK();
346 }
347 
348 
349 void
_PyPreConfig_InitFromConfig(PyPreConfig * preconfig,const PyConfig * config)350 _PyPreConfig_InitFromConfig(PyPreConfig *preconfig, const PyConfig *config)
351 {
352     _PyConfigInitEnum config_init = (_PyConfigInitEnum)config->_config_init;
353     switch (config_init) {
354     case _PyConfig_INIT_PYTHON:
355         PyPreConfig_InitPythonConfig(preconfig);
356         break;
357     case _PyConfig_INIT_ISOLATED:
358         PyPreConfig_InitIsolatedConfig(preconfig);
359         break;
360     case _PyConfig_INIT_COMPAT:
361     default:
362         _PyPreConfig_InitCompatConfig(preconfig);
363     }
364 
365     _PyPreConfig_GetConfig(preconfig, config);
366 }
367 
368 
369 static void
preconfig_copy(PyPreConfig * config,const PyPreConfig * config2)370 preconfig_copy(PyPreConfig *config, const PyPreConfig *config2)
371 {
372 #define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
373 
374     COPY_ATTR(_config_init);
375     COPY_ATTR(parse_argv);
376     COPY_ATTR(isolated);
377     COPY_ATTR(use_environment);
378     COPY_ATTR(configure_locale);
379     COPY_ATTR(dev_mode);
380     COPY_ATTR(coerce_c_locale);
381     COPY_ATTR(coerce_c_locale_warn);
382     COPY_ATTR(utf8_mode);
383     COPY_ATTR(allocator);
384 #ifdef MS_WINDOWS
385     COPY_ATTR(legacy_windows_fs_encoding);
386 #endif
387 
388 #undef COPY_ATTR
389 }
390 
391 
392 PyObject*
_PyPreConfig_AsDict(const PyPreConfig * config)393 _PyPreConfig_AsDict(const PyPreConfig *config)
394 {
395     PyObject *dict;
396 
397     dict = PyDict_New();
398     if (dict == NULL) {
399         return NULL;
400     }
401 
402 #define SET_ITEM_INT(ATTR) \
403         do { \
404             PyObject *obj = PyLong_FromLong(config->ATTR); \
405             if (obj == NULL) { \
406                 goto fail; \
407             } \
408             int res = PyDict_SetItemString(dict, #ATTR, obj); \
409             Py_DECREF(obj); \
410             if (res < 0) { \
411                 goto fail; \
412             } \
413         } while (0)
414 
415     SET_ITEM_INT(_config_init);
416     SET_ITEM_INT(parse_argv);
417     SET_ITEM_INT(isolated);
418     SET_ITEM_INT(use_environment);
419     SET_ITEM_INT(configure_locale);
420     SET_ITEM_INT(coerce_c_locale);
421     SET_ITEM_INT(coerce_c_locale_warn);
422     SET_ITEM_INT(utf8_mode);
423 #ifdef MS_WINDOWS
424     SET_ITEM_INT(legacy_windows_fs_encoding);
425 #endif
426     SET_ITEM_INT(dev_mode);
427     SET_ITEM_INT(allocator);
428     return dict;
429 
430 fail:
431     Py_DECREF(dict);
432     return NULL;
433 
434 #undef SET_ITEM_INT
435 }
436 
437 
438 void
_PyPreConfig_GetConfig(PyPreConfig * preconfig,const PyConfig * config)439 _PyPreConfig_GetConfig(PyPreConfig *preconfig, const PyConfig *config)
440 {
441 #define COPY_ATTR(ATTR) \
442     if (config->ATTR != -1) { \
443         preconfig->ATTR = config->ATTR; \
444     }
445 
446     COPY_ATTR(parse_argv);
447     COPY_ATTR(isolated);
448     COPY_ATTR(use_environment);
449     COPY_ATTR(dev_mode);
450 
451 #undef COPY_ATTR
452 }
453 
454 
455 static void
preconfig_get_global_vars(PyPreConfig * config)456 preconfig_get_global_vars(PyPreConfig *config)
457 {
458     if (config->_config_init != _PyConfig_INIT_COMPAT) {
459         /* Python and Isolated configuration ignore global variables */
460         return;
461     }
462 
463 #define COPY_FLAG(ATTR, VALUE) \
464     if (config->ATTR < 0) { \
465         config->ATTR = VALUE; \
466     }
467 #define COPY_NOT_FLAG(ATTR, VALUE) \
468     if (config->ATTR < 0) { \
469         config->ATTR = !(VALUE); \
470     }
471 
472     COPY_FLAG(isolated, Py_IsolatedFlag);
473     COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag);
474     if (Py_UTF8Mode > 0) {
475         config->utf8_mode = Py_UTF8Mode;
476     }
477 #ifdef MS_WINDOWS
478     COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
479 #endif
480 
481 #undef COPY_FLAG
482 #undef COPY_NOT_FLAG
483 }
484 
485 
486 static void
preconfig_set_global_vars(const PyPreConfig * config)487 preconfig_set_global_vars(const PyPreConfig *config)
488 {
489 #define COPY_FLAG(ATTR, VAR) \
490     if (config->ATTR >= 0) { \
491         VAR = config->ATTR; \
492     }
493 #define COPY_NOT_FLAG(ATTR, VAR) \
494     if (config->ATTR >= 0) { \
495         VAR = !config->ATTR; \
496     }
497 
498     COPY_FLAG(isolated, Py_IsolatedFlag);
499     COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag);
500 #ifdef MS_WINDOWS
501     COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
502 #endif
503     COPY_FLAG(utf8_mode, Py_UTF8Mode);
504 
505 #undef COPY_FLAG
506 #undef COPY_NOT_FLAG
507 }
508 
509 
510 const char*
_Py_GetEnv(int use_environment,const char * name)511 _Py_GetEnv(int use_environment, const char *name)
512 {
513     assert(use_environment >= 0);
514 
515     if (!use_environment) {
516         return NULL;
517     }
518 
519     const char *var = getenv(name);
520     if (var && var[0] != '\0') {
521         return var;
522     }
523     else {
524         return NULL;
525     }
526 }
527 
528 
529 int
_Py_str_to_int(const char * str,int * result)530 _Py_str_to_int(const char *str, int *result)
531 {
532     const char *endptr = str;
533     errno = 0;
534     long value = strtol(str, (char **)&endptr, 10);
535     if (*endptr != '\0' || errno == ERANGE) {
536         return -1;
537     }
538     if (value < INT_MIN || value > INT_MAX) {
539         return -1;
540     }
541 
542     *result = (int)value;
543     return 0;
544 }
545 
546 
547 void
_Py_get_env_flag(int use_environment,int * flag,const char * name)548 _Py_get_env_flag(int use_environment, int *flag, const char *name)
549 {
550     const char *var = _Py_GetEnv(use_environment, name);
551     if (!var) {
552         return;
553     }
554     int value;
555     if (_Py_str_to_int(var, &value) < 0 || value < 0) {
556         /* PYTHONDEBUG=text and PYTHONDEBUG=-2 behave as PYTHONDEBUG=1 */
557         value = 1;
558     }
559     if (*flag < value) {
560         *flag = value;
561     }
562 }
563 
564 
565 const wchar_t*
_Py_get_xoption(const PyWideStringList * xoptions,const wchar_t * name)566 _Py_get_xoption(const PyWideStringList *xoptions, const wchar_t *name)
567 {
568     for (Py_ssize_t i=0; i < xoptions->length; i++) {
569         const wchar_t *option = xoptions->items[i];
570         size_t len;
571         wchar_t *sep = wcschr(option, L'=');
572         if (sep != NULL) {
573             len = (sep - option);
574         }
575         else {
576             len = wcslen(option);
577         }
578         if (wcsncmp(option, name, len) == 0 && name[len] == L'\0') {
579             return option;
580         }
581     }
582     return NULL;
583 }
584 
585 
586 static PyStatus
preconfig_init_utf8_mode(PyPreConfig * config,const _PyPreCmdline * cmdline)587 preconfig_init_utf8_mode(PyPreConfig *config, const _PyPreCmdline *cmdline)
588 {
589 #ifdef MS_WINDOWS
590     if (config->legacy_windows_fs_encoding) {
591         config->utf8_mode = 0;
592     }
593 #endif
594 
595     if (config->utf8_mode >= 0) {
596         return _PyStatus_OK();
597     }
598 
599     const wchar_t *xopt;
600     xopt = _Py_get_xoption(&cmdline->xoptions, L"utf8");
601     if (xopt) {
602         wchar_t *sep = wcschr(xopt, L'=');
603         if (sep) {
604             xopt = sep + 1;
605             if (wcscmp(xopt, L"1") == 0) {
606                 config->utf8_mode = 1;
607             }
608             else if (wcscmp(xopt, L"0") == 0) {
609                 config->utf8_mode = 0;
610             }
611             else {
612                 return _PyStatus_ERR("invalid -X utf8 option value");
613             }
614         }
615         else {
616             config->utf8_mode = 1;
617         }
618         return _PyStatus_OK();
619     }
620 
621     const char *opt = _Py_GetEnv(config->use_environment, "PYTHONUTF8");
622     if (opt) {
623         if (strcmp(opt, "1") == 0) {
624             config->utf8_mode = 1;
625         }
626         else if (strcmp(opt, "0") == 0) {
627             config->utf8_mode = 0;
628         }
629         else {
630             return _PyStatus_ERR("invalid PYTHONUTF8 environment "
631                                 "variable value");
632         }
633         return _PyStatus_OK();
634     }
635 
636 
637 #ifndef MS_WINDOWS
638     if (config->utf8_mode < 0) {
639         /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
640         const char *ctype_loc = setlocale(LC_CTYPE, NULL);
641         if (ctype_loc != NULL
642            && (strcmp(ctype_loc, "C") == 0
643                || strcmp(ctype_loc, "POSIX") == 0))
644         {
645             config->utf8_mode = 1;
646         }
647     }
648 #endif
649 
650     if (config->utf8_mode < 0) {
651         config->utf8_mode = 0;
652     }
653     return _PyStatus_OK();
654 }
655 
656 
657 static void
preconfig_init_coerce_c_locale(PyPreConfig * config)658 preconfig_init_coerce_c_locale(PyPreConfig *config)
659 {
660     if (!config->configure_locale) {
661         config->coerce_c_locale = 0;
662         config->coerce_c_locale_warn = 0;
663         return;
664     }
665 
666     const char *env = _Py_GetEnv(config->use_environment, "PYTHONCOERCECLOCALE");
667     if (env) {
668         if (strcmp(env, "0") == 0) {
669             if (config->coerce_c_locale < 0) {
670                 config->coerce_c_locale = 0;
671             }
672         }
673         else if (strcmp(env, "warn") == 0) {
674             if (config->coerce_c_locale_warn < 0) {
675                 config->coerce_c_locale_warn = 1;
676             }
677         }
678         else {
679             if (config->coerce_c_locale < 0) {
680                 config->coerce_c_locale = 1;
681             }
682         }
683     }
684 
685     /* Test if coerce_c_locale equals to -1 or equals to 1:
686        PYTHONCOERCECLOCALE=1 doesn't imply that the C locale is always coerced.
687        It is only coerced if if the LC_CTYPE locale is "C". */
688     if (config->coerce_c_locale < 0 || config->coerce_c_locale == 1) {
689         /* The C locale enables the C locale coercion (PEP 538) */
690         if (_Py_LegacyLocaleDetected(0)) {
691             config->coerce_c_locale = 2;
692         }
693         else {
694             config->coerce_c_locale = 0;
695         }
696     }
697 
698     if (config->coerce_c_locale_warn < 0) {
699         config->coerce_c_locale_warn = 0;
700     }
701 }
702 
703 
704 static PyStatus
preconfig_init_allocator(PyPreConfig * config)705 preconfig_init_allocator(PyPreConfig *config)
706 {
707     if (config->allocator == PYMEM_ALLOCATOR_NOT_SET) {
708         /* bpo-34247. The PYTHONMALLOC environment variable has the priority
709            over PYTHONDEV env var and "-X dev" command line option.
710            For example, PYTHONMALLOC=malloc PYTHONDEVMODE=1 sets the memory
711            allocators to "malloc" (and not to "debug"). */
712         const char *envvar = _Py_GetEnv(config->use_environment, "PYTHONMALLOC");
713         if (envvar) {
714             PyMemAllocatorName name;
715             if (_PyMem_GetAllocatorName(envvar, &name) < 0) {
716                 return _PyStatus_ERR("PYTHONMALLOC: unknown allocator");
717             }
718             config->allocator = (int)name;
719         }
720     }
721 
722     if (config->dev_mode && config->allocator == PYMEM_ALLOCATOR_NOT_SET) {
723         config->allocator = PYMEM_ALLOCATOR_DEBUG;
724     }
725     return _PyStatus_OK();
726 }
727 
728 
729 static PyStatus
preconfig_read(PyPreConfig * config,_PyPreCmdline * cmdline)730 preconfig_read(PyPreConfig *config, _PyPreCmdline *cmdline)
731 {
732     PyStatus status;
733 
734     status = _PyPreCmdline_Read(cmdline, config);
735     if (_PyStatus_EXCEPTION(status)) {
736         return status;
737     }
738 
739     precmdline_set_preconfig(cmdline, config);
740 
741     /* legacy_windows_fs_encoding, coerce_c_locale, utf8_mode */
742 #ifdef MS_WINDOWS
743     _Py_get_env_flag(config->use_environment,
744                      &config->legacy_windows_fs_encoding,
745                      "PYTHONLEGACYWINDOWSFSENCODING");
746 #endif
747 
748     preconfig_init_coerce_c_locale(config);
749 
750     status = preconfig_init_utf8_mode(config, cmdline);
751     if (_PyStatus_EXCEPTION(status)) {
752         return status;
753     }
754 
755     /* allocator */
756     status = preconfig_init_allocator(config);
757     if (_PyStatus_EXCEPTION(status)) {
758         return status;
759     }
760 
761     assert(config->coerce_c_locale >= 0);
762     assert(config->coerce_c_locale_warn >= 0);
763 #ifdef MS_WINDOWS
764     assert(config->legacy_windows_fs_encoding >= 0);
765 #endif
766     assert(config->utf8_mode >= 0);
767     assert(config->isolated >= 0);
768     assert(config->use_environment >= 0);
769     assert(config->dev_mode >= 0);
770 
771     return _PyStatus_OK();
772 }
773 
774 
775 /* Read the configuration from:
776 
777    - command line arguments
778    - environment variables
779    - Py_xxx global configuration variables
780    - the LC_CTYPE locale */
781 PyStatus
_PyPreConfig_Read(PyPreConfig * config,const _PyArgv * args)782 _PyPreConfig_Read(PyPreConfig *config, const _PyArgv *args)
783 {
784     PyStatus status;
785 
786     status = _PyRuntime_Initialize();
787     if (_PyStatus_EXCEPTION(status)) {
788         return status;
789     }
790 
791     preconfig_get_global_vars(config);
792 
793     /* Copy LC_CTYPE locale, since it's modified later */
794     const char *loc = setlocale(LC_CTYPE, NULL);
795     if (loc == NULL) {
796         return _PyStatus_ERR("failed to LC_CTYPE locale");
797     }
798     char *init_ctype_locale = _PyMem_RawStrdup(loc);
799     if (init_ctype_locale == NULL) {
800         return _PyStatus_NO_MEMORY();
801     }
802 
803     /* Save the config to be able to restore it if encodings change */
804     PyPreConfig save_config;
805 
806     status = _PyPreConfig_InitFromPreConfig(&save_config, config);
807     if (_PyStatus_EXCEPTION(status)) {
808         return status;
809     }
810 
811     /* Set LC_CTYPE to the user preferred locale */
812     if (config->configure_locale) {
813         _Py_SetLocaleFromEnv(LC_CTYPE);
814     }
815 
816     _PyPreCmdline cmdline = _PyPreCmdline_INIT;
817     int init_utf8_mode = Py_UTF8Mode;
818 #ifdef MS_WINDOWS
819     int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag;
820 #endif
821 
822     if (args) {
823         status = _PyPreCmdline_SetArgv(&cmdline, args);
824         if (_PyStatus_EXCEPTION(status)) {
825             goto done;
826         }
827     }
828 
829     int locale_coerced = 0;
830     int loops = 0;
831 
832     while (1) {
833         int utf8_mode = config->utf8_mode;
834 
835         /* Watchdog to prevent an infinite loop */
836         loops++;
837         if (loops == 3) {
838             status = _PyStatus_ERR("Encoding changed twice while "
839                                "reading the configuration");
840             goto done;
841         }
842 
843         /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend
844            on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */
845         Py_UTF8Mode = config->utf8_mode;
846 #ifdef MS_WINDOWS
847         Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding;
848 #endif
849 
850         status = preconfig_read(config, &cmdline);
851         if (_PyStatus_EXCEPTION(status)) {
852             goto done;
853         }
854 
855         /* The legacy C locale assumes ASCII as the default text encoding, which
856          * causes problems not only for the CPython runtime, but also other
857          * components like GNU readline.
858          *
859          * Accordingly, when the CLI detects it, it attempts to coerce it to a
860          * more capable UTF-8 based alternative.
861          *
862          * See the documentation of the PYTHONCOERCECLOCALE setting for more
863          * details.
864          */
865         int encoding_changed = 0;
866         if (config->coerce_c_locale && !locale_coerced) {
867             locale_coerced = 1;
868             _Py_CoerceLegacyLocale(0);
869             encoding_changed = 1;
870         }
871 
872         if (utf8_mode == -1) {
873             if (config->utf8_mode == 1) {
874                 /* UTF-8 Mode enabled */
875                 encoding_changed = 1;
876             }
877         }
878         else {
879             if (config->utf8_mode != utf8_mode) {
880                 encoding_changed = 1;
881             }
882         }
883 
884         if (!encoding_changed) {
885             break;
886         }
887 
888         /* Reset the configuration before reading again the configuration,
889            just keep UTF-8 Mode value. */
890         int new_utf8_mode = config->utf8_mode;
891         int new_coerce_c_locale = config->coerce_c_locale;
892         preconfig_copy(config, &save_config);
893         config->utf8_mode = new_utf8_mode;
894         config->coerce_c_locale = new_coerce_c_locale;
895 
896         /* The encoding changed: read again the configuration
897            with the new encoding */
898     }
899     status = _PyStatus_OK();
900 
901 done:
902     if (init_ctype_locale != NULL) {
903         setlocale(LC_CTYPE, init_ctype_locale);
904         PyMem_RawFree(init_ctype_locale);
905     }
906     Py_UTF8Mode = init_utf8_mode ;
907 #ifdef MS_WINDOWS
908     Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding;
909 #endif
910     _PyPreCmdline_Clear(&cmdline);
911     return status;
912 }
913 
914 
915 /* Write the pre-configuration:
916 
917    - set the memory allocators
918    - set Py_xxx global configuration variables
919    - set the LC_CTYPE locale (coerce C locale, PEP 538) and set the UTF-8 mode
920      (PEP 540)
921 
922    The applied configuration is written into _PyRuntime.preconfig.
923    If the C locale cannot be coerced, set coerce_c_locale to 0.
924 
925    Do nothing if called after Py_Initialize(): ignore the new
926    pre-configuration. */
927 PyStatus
_PyPreConfig_Write(const PyPreConfig * src_config)928 _PyPreConfig_Write(const PyPreConfig *src_config)
929 {
930     PyPreConfig config;
931 
932     PyStatus status = _PyPreConfig_InitFromPreConfig(&config, src_config);
933     if (_PyStatus_EXCEPTION(status)) {
934         return status;
935     }
936 
937     if (_PyRuntime.core_initialized) {
938         /* bpo-34008: Calling this functions after Py_Initialize() ignores
939            the new configuration. */
940         return _PyStatus_OK();
941     }
942 
943     PyMemAllocatorName name = (PyMemAllocatorName)config.allocator;
944     if (name != PYMEM_ALLOCATOR_NOT_SET) {
945         if (_PyMem_SetupAllocators(name) < 0) {
946             return _PyStatus_ERR("Unknown PYTHONMALLOC allocator");
947         }
948     }
949 
950     preconfig_set_global_vars(&config);
951 
952     if (config.configure_locale) {
953         if (config.coerce_c_locale) {
954             if (!_Py_CoerceLegacyLocale(config.coerce_c_locale_warn)) {
955                 /* C locale not coerced */
956                 config.coerce_c_locale = 0;
957             }
958         }
959 
960         /* Set LC_CTYPE to the user preferred locale */
961         _Py_SetLocaleFromEnv(LC_CTYPE);
962     }
963 
964     /* Write the new pre-configuration into _PyRuntime */
965     preconfig_copy(&_PyRuntime.preconfig, &config);
966 
967     return _PyStatus_OK();
968 }
969