Skip to content

Commit 3d17c04

Browse files
authored
bpo-40521: Add PyInterpreterState.unicode (GH-20081)
Move PyInterpreterState.fs_codec into a new PyInterpreterState.unicode structure. Give a name to the fs_codec structure and use this structure in unicodeobject.c.
1 parent 75cd8e4 commit 3d17c04

File tree

3 files changed

+48
-40
lines changed

3 files changed

+48
-40
lines changed

Include/internal/pycore_interp.h

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,19 @@ struct _ceval_state {
5151
#endif
5252
};
5353

54+
/* fs_codec.encoding is initialized to NULL.
55+
Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
56+
struct _Py_unicode_fs_codec {
57+
char *encoding; // Filesystem encoding (encoded to UTF-8)
58+
int utf8; // encoding=="utf-8"?
59+
char *errors; // Filesystem errors (encoded to UTF-8)
60+
_Py_error_handler error_handler;
61+
};
62+
63+
struct _Py_unicode_state {
64+
struct _Py_unicode_fs_codec fs_codec;
65+
};
66+
5467

5568
/* interpreter state */
5669

@@ -97,14 +110,7 @@ struct _is {
97110
PyObject *codec_error_registry;
98111
int codecs_initialized;
99112

100-
/* fs_codec.encoding is initialized to NULL.
101-
Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
102-
struct {
103-
char *encoding; /* Filesystem encoding (encoded to UTF-8) */
104-
int utf8; /* encoding=="utf-8"? */
105-
char *errors; /* Filesystem errors (encoded to UTF-8) */
106-
_Py_error_handler error_handler;
107-
} fs_codec;
113+
struct _Py_unicode_state unicode;
108114

109115
PyConfig config;
110116
#ifdef HAVE_DLOPEN

Modules/_io/textio.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,7 @@ io_check_errors(PyObject *errors)
10071007

10081008
/* Avoid calling PyCodec_LookupError() before the codec registry is ready:
10091009
before_PyUnicode_InitEncodings() is called. */
1010-
if (!interp->fs_codec.encoding) {
1010+
if (!interp->unicode.fs_codec.encoding) {
10111011
return 0;
10121012
}
10131013

Objects/unicodeobject.c

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ unicode_check_encoding_errors(const char *encoding, const char *errors)
463463

464464
/* Avoid calling _PyCodec_Lookup() and PyCodec_LookupError() before the
465465
codec registry is ready: before_PyUnicode_InitEncodings() is called. */
466-
if (!interp->fs_codec.encoding) {
466+
if (!interp->unicode.fs_codec.encoding) {
467467
return 0;
468468
}
469469

@@ -3650,16 +3650,17 @@ PyObject *
36503650
PyUnicode_EncodeFSDefault(PyObject *unicode)
36513651
{
36523652
PyInterpreterState *interp = _PyInterpreterState_GET();
3653-
if (interp->fs_codec.utf8) {
3653+
struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
3654+
if (fs_codec->utf8) {
36543655
return unicode_encode_utf8(unicode,
3655-
interp->fs_codec.error_handler,
3656-
interp->fs_codec.errors);
3656+
fs_codec->error_handler,
3657+
fs_codec->errors);
36573658
}
36583659
#ifndef _Py_FORCE_UTF8_FS_ENCODING
3659-
else if (interp->fs_codec.encoding) {
3660+
else if (fs_codec->encoding) {
36603661
return PyUnicode_AsEncodedString(unicode,
3661-
interp->fs_codec.encoding,
3662-
interp->fs_codec.errors);
3662+
fs_codec->encoding,
3663+
fs_codec->errors);
36633664
}
36643665
#endif
36653666
else {
@@ -3886,17 +3887,18 @@ PyObject*
38863887
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
38873888
{
38883889
PyInterpreterState *interp = _PyInterpreterState_GET();
3889-
if (interp->fs_codec.utf8) {
3890+
struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
3891+
if (fs_codec->utf8) {
38903892
return unicode_decode_utf8(s, size,
3891-
interp->fs_codec.error_handler,
3892-
interp->fs_codec.errors,
3893+
fs_codec->error_handler,
3894+
fs_codec->errors,
38933895
NULL);
38943896
}
38953897
#ifndef _Py_FORCE_UTF8_FS_ENCODING
3896-
else if (interp->fs_codec.encoding) {
3898+
else if (fs_codec->encoding) {
38973899
return PyUnicode_Decode(s, size,
3898-
interp->fs_codec.encoding,
3899-
interp->fs_codec.errors);
3900+
fs_codec->encoding,
3901+
fs_codec->errors);
39003902
}
39013903
#endif
39023904
else {
@@ -16071,16 +16073,17 @@ init_fs_codec(PyInterpreterState *interp)
1607116073
return -1;
1607216074
}
1607316075

16074-
PyMem_RawFree(interp->fs_codec.encoding);
16075-
interp->fs_codec.encoding = encoding;
16076+
struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
16077+
PyMem_RawFree(fs_codec->encoding);
16078+
fs_codec->encoding = encoding;
1607616079
/* encoding has been normalized by init_fs_encoding() */
16077-
interp->fs_codec.utf8 = (strcmp(encoding, "utf-8") == 0);
16078-
PyMem_RawFree(interp->fs_codec.errors);
16079-
interp->fs_codec.errors = errors;
16080-
interp->fs_codec.error_handler = error_handler;
16080+
fs_codec->utf8 = (strcmp(encoding, "utf-8") == 0);
16081+
PyMem_RawFree(fs_codec->errors);
16082+
fs_codec->errors = errors;
16083+
fs_codec->error_handler = error_handler;
1608116084

1608216085
#ifdef _Py_FORCE_UTF8_FS_ENCODING
16083-
assert(interp->fs_codec.utf8 == 1);
16086+
assert(fs_codec->utf8 == 1);
1608416087
#endif
1608516088

1608616089
/* At this point, PyUnicode_EncodeFSDefault() and
@@ -16089,8 +16092,8 @@ init_fs_codec(PyInterpreterState *interp)
1608916092

1609016093
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
1609116094
global configuration variables. */
16092-
if (_Py_SetFileSystemEncoding(interp->fs_codec.encoding,
16093-
interp->fs_codec.errors) < 0) {
16095+
if (_Py_SetFileSystemEncoding(fs_codec->encoding,
16096+
fs_codec->errors) < 0) {
1609416097
PyErr_NoMemory();
1609516098
return -1;
1609616099
}
@@ -16133,15 +16136,14 @@ _PyUnicode_InitEncodings(PyThreadState *tstate)
1613316136

1613416137

1613516138
static void
16136-
_PyUnicode_FiniEncodings(PyThreadState *tstate)
16139+
_PyUnicode_FiniEncodings(struct _Py_unicode_fs_codec *fs_codec)
1613716140
{
16138-
PyInterpreterState *interp = tstate->interp;
16139-
PyMem_RawFree(interp->fs_codec.encoding);
16140-
interp->fs_codec.encoding = NULL;
16141-
interp->fs_codec.utf8 = 0;
16142-
PyMem_RawFree(interp->fs_codec.errors);
16143-
interp->fs_codec.errors = NULL;
16144-
interp->fs_codec.error_handler = _Py_ERROR_UNKNOWN;
16141+
PyMem_RawFree(fs_codec->encoding);
16142+
fs_codec->encoding = NULL;
16143+
fs_codec->utf8 = 0;
16144+
PyMem_RawFree(fs_codec->errors);
16145+
fs_codec->errors = NULL;
16146+
fs_codec->error_handler = _Py_ERROR_UNKNOWN;
1614516147
}
1614616148

1614716149

@@ -16199,7 +16201,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
1619916201
unicode_clear_static_strings();
1620016202
}
1620116203

16202-
_PyUnicode_FiniEncodings(tstate);
16204+
_PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec);
1620316205
}
1620416206

1620516207

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy