Skip to content

Commit 437bfd4

Browse files
committed
BUG#36570707: Collation set on connect using C-Extension is ignored
This patch fixes the issue where the collation passed through the connection option was getting ignored and default collation was being set by C API. Additionally, the default charset and collation now corresponds to charset ID 255 for MySQL Servers equal to or greater than 8.0. Change-Id: Ib7c876044a05b1e3d4aeb09092b0659984f2da98
1 parent 78ffff8 commit 437bfd4

14 files changed

+656
-114
lines changed

CHANGES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ v9.0.0
1515
- WL#16318: Deprecate Cursors Prepared Raw and Named Tuple
1616
- WL#16283: Remove OpenTelemetry Bundled Installation
1717
- BUG#36611371: Update dnspython required versions to allow latest 2.6.1
18+
- BUG#36570707: Collation set on connect using C-Extension is ignored
1819
- BUG#36476195: Incorrect escaping in pure Python mode if sql_mode includes NO_BACKSLASH_ESCAPES
1920
- BUG#36289767: MySQLCursorBufferedRaw does not skip conversion
2021

mysql-connector-python/lib/mysql/connector/abstracts.py

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@
7272
from .constants import (
7373
CONN_ATTRS_DN,
7474
DEFAULT_CONFIGURATION,
75+
MYSQL_DEFAULT_CHARSET_ID_57,
76+
MYSQL_DEFAULT_CHARSET_ID_80,
7577
OPENSSL_CS_NAMES,
7678
TLS_CIPHER_SUITES,
7779
TLS_VERSIONS,
@@ -174,13 +176,20 @@ class MySQLConnectionAbstract(ABC):
174176

175177
def __init__(self) -> None:
176178
"""Initialize"""
177-
# opentelemetry related
178-
self._tracer: Any = None
179-
self._span: Any = None
180-
self.otel_context_propagation: bool = True
179+
# private (shouldn't be manipulated directly internally)
180+
self.__charset_id: Optional[int] = None
181+
"""It shouldn't be manipulated directly, even internally. If you need
182+
to manipulate the charset ID, use the property `_charset_id` (read & write)
183+
instead. Similarly, `_charset_id` shouldn't be manipulated externally,
184+
in this case, use property `charset_id` (read-only).
185+
"""
186+
187+
# protected (can be manipulated directly internally)
188+
self._tracer: Any = None # opentelemetry related
189+
self._span: Any = None # opentelemetry related
190+
self.otel_context_propagation: bool = True # opentelemetry related
181191

182192
self._client_flags: int = ClientFlag.get_default()
183-
self._charset_id: int = 45
184193
self._sql_mode: Optional[str] = None
185194
self._time_zone: Optional[str] = None
186195
self._autocommit: bool = False
@@ -1177,6 +1186,18 @@ def unread_result(self, value: bool) -> None:
11771186
raise ValueError("Expected a boolean type")
11781187
self._unread_result = value
11791188

1189+
@property
1190+
def collation(self) -> str:
1191+
"""Returns the collation for current connection.
1192+
1193+
This property returns the collation name of the current connection.
1194+
The server is queried when the connection is active. If not connected,
1195+
the configured collation name is returned.
1196+
1197+
Returns a string.
1198+
"""
1199+
return self._character_set.get_charset_info(self._charset_id)[2]
1200+
11801201
@property
11811202
def charset(self) -> str:
11821203
"""Returns the character set for current connection.
@@ -1189,6 +1210,41 @@ def charset(self) -> str:
11891210
"""
11901211
return self._character_set.get_info(self._charset_id)[0]
11911212

1213+
@property
1214+
def charset_id(self) -> int:
1215+
"""The charset ID utilized during the connection phase.
1216+
1217+
If the charset ID hasn't been set, the default charset ID is returned.
1218+
"""
1219+
return self._charset_id
1220+
1221+
@property
1222+
def _charset_id(self) -> int:
1223+
"""The charset ID utilized during the connection phase.
1224+
1225+
If the charset ID hasn't been set, the default charset ID is returned.
1226+
"""
1227+
if self.__charset_id is None:
1228+
if self._server_version is None:
1229+
# We mustn't set the private since we still don't know
1230+
# the server version. We temporarily return the default
1231+
# charset for undefined scenarios - eventually, the server
1232+
# info will be available and the private variable will be set.
1233+
return MYSQL_DEFAULT_CHARSET_ID_57
1234+
1235+
self.__charset_id = (
1236+
MYSQL_DEFAULT_CHARSET_ID_57
1237+
if self._server_version < (8, 0)
1238+
else MYSQL_DEFAULT_CHARSET_ID_80
1239+
)
1240+
1241+
return self.__charset_id
1242+
1243+
@_charset_id.setter
1244+
def _charset_id(self, value: int) -> None:
1245+
"""Sets the charset ID utilized during the connection phase."""
1246+
self.__charset_id = value
1247+
11921248
@property
11931249
def python_charset(self) -> str:
11941250
"""Returns the Python character set for current connection.
@@ -1269,28 +1325,9 @@ def set_charset_collation(
12691325

12701326
self._execute_query(f"SET NAMES '{charset_name}' COLLATE '{collation_name}'")
12711327

1272-
try:
1273-
# Required for C Extension
1274-
self.set_character_set_name(charset_name)
1275-
except AttributeError:
1276-
# Not required for pure Python connection
1277-
pass
1278-
12791328
if self.converter:
12801329
self.converter.set_charset(charset_name, character_set=self._character_set)
12811330

1282-
@property
1283-
def collation(self) -> str:
1284-
"""Returns the collation for current connection.
1285-
1286-
This property returns the collation name of the current connection.
1287-
The server is queried when the connection is active. If not connected,
1288-
the configured collation name is returned.
1289-
1290-
Returns a string.
1291-
"""
1292-
return self._character_set.get_charset_info(self._charset_id)[2]
1293-
12941331
@property
12951332
@abstractmethod
12961333
def connection_id(self) -> Optional[int]:
@@ -1311,7 +1348,7 @@ def _post_connection(self) -> None:
13111348
established. Some setting like autocommit, character set, and SQL mode
13121349
are set using this method.
13131350
"""
1314-
self.set_charset_collation(self._charset_id)
1351+
self.set_charset_collation(charset=self._charset_id)
13151352
self.autocommit = self._autocommit
13161353
if self._time_zone:
13171354
self.time_zone = self._time_zone

mysql-connector-python/lib/mysql/connector/aio/abstracts.py

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@
7575
from ..constants import (
7676
CONN_ATTRS_DN,
7777
DEFAULT_CONFIGURATION,
78+
MYSQL_DEFAULT_CHARSET_ID_57,
79+
MYSQL_DEFAULT_CHARSET_ID_80,
7880
OPENSSL_CS_NAMES,
7981
TLS_CIPHER_SUITES,
8082
TLS_VERSIONS,
@@ -201,6 +203,14 @@ def __init__(
201203
tls_ciphersuites: Optional[List[str]] = None,
202204
loop: Optional[asyncio.AbstractEventLoop] = None,
203205
):
206+
# private (shouldn't be manipulated directly internally)
207+
self.__charset: Optional[Charset] = None
208+
"""It shouldn't be manipulated directly, even internally. If you need
209+
to manipulate the charset object, use the property `_charset` (read & write)
210+
instead. Similarly, `_charset` shouldn't be manipulated externally.
211+
"""
212+
213+
# protected (can be manipulated directly internally)
204214
self._user: str = user
205215
self._password: str = password
206216
self._host: str = host
@@ -220,9 +230,10 @@ def __init__(
220230
self._init_command: Optional[str] = init_command
221231
self._protocol: MySQLProtocol = MySQLProtocol()
222232
self._socket: Optional[Union[MySQLTcpSocket, MySQLUnixSocket]] = None
223-
self._charset: Optional[Charset] = None
224233
self._charset_name: Optional[str] = charset
234+
"""Charset name provided by the user at connection time."""
225235
self._charset_collation: Optional[str] = collation
236+
"""Collation provided by the user at connection time."""
226237
self._ssl_active: bool = False
227238
self._ssl_disabled: bool = ssl_disabled
228239
self._ssl_ca: Optional[str] = ssl_ca
@@ -239,7 +250,7 @@ def __init__(
239250
loop or asyncio.get_event_loop()
240251
)
241252
self._client_flags: int = client_flags or ClientFlag.get_default()
242-
self._server_info: ServerInfo
253+
self._server_info: Optional[ServerInfo] = None
243254
self._cursors: weakref.WeakSet = weakref.WeakSet()
244255
self._query_attrs: Dict[str, BinaryProtocolType] = {}
245256
self._query_attrs_supported: int = False
@@ -893,6 +904,18 @@ def unread_result(self, value: bool) -> None:
893904
raise ValueError("Expected a boolean type")
894905
self._unread_result = value
895906

907+
@property
908+
def collation(self) -> str:
909+
"""Returns the collation for current connection.
910+
911+
This property returns the collation name of the current connection.
912+
The server is queried when the connection is active. If not connected,
913+
the configured collation name is returned.
914+
915+
Returns a string.
916+
"""
917+
return self._charset.collation
918+
896919
@property
897920
def charset(self) -> str:
898921
"""Return the character set for current connection.
@@ -901,7 +924,40 @@ def charset(self) -> str:
901924
The server is queried when the connection is active.
902925
If not connected, the configured character set name is returned.
903926
"""
904-
return self._charset.name if self._charset else "utf8"
927+
return self._charset.name
928+
929+
@property
930+
def charset_id(self) -> int:
931+
"""The charset ID utilized during the connection phase.
932+
933+
If the charset ID hasn't been set, the default charset ID is returned.
934+
"""
935+
return self._charset.charset_id
936+
937+
@property
938+
def _charset(self) -> Charset:
939+
"""The charset object encapsulates charset and collation information."""
940+
if self.__charset is None:
941+
if self._server_info is None:
942+
# We mustn't set `_charset` since we still don't know
943+
# the server version. We temporarily return the default
944+
# charset for undefined scenarios - eventually, the server
945+
# info will be available and `_charset` (data class) will be set.
946+
return charsets.get_by_id(MYSQL_DEFAULT_CHARSET_ID_57)
947+
948+
self.__charset = charsets.get_by_id(
949+
(
950+
MYSQL_DEFAULT_CHARSET_ID_57
951+
if self._server_info.version_tuple < (8, 0)
952+
else MYSQL_DEFAULT_CHARSET_ID_80
953+
)
954+
)
955+
return self.__charset
956+
957+
@_charset.setter
958+
def _charset(self, value: Charset) -> None:
959+
"""The charset object encapsulates charset and collation information."""
960+
self.__charset = value
905961

906962
@property
907963
def python_charset(self) -> str:
@@ -940,7 +996,7 @@ async def _post_connection(self) -> None:
940996
Some setting like autocommit, character set, and SQL mode are set using this
941997
method.
942998
"""
943-
await self.set_charset_collation(self._charset.charset_id)
999+
await self.set_charset_collation(charset=self._charset.charset_id)
9441000
await self.set_autocommit(self._autocommit)
9451001
if self._time_zone:
9461002
await self.set_time_zone(self._time_zone)
@@ -1000,18 +1056,9 @@ async def set_charset_collation(
10001056
charset = DEFAULT_CONFIGURATION["charset"]
10011057
self._charset = charsets.get_by_name(charset) # type: ignore[arg-type]
10021058

1003-
self._charset_name = self._charset.name
1004-
self._charset_collation = self._charset.collation
1005-
10061059
await self.cmd_query(
10071060
f"SET NAMES '{self._charset.name}' COLLATE '{self._charset.collation}'"
10081061
)
1009-
try:
1010-
# Required for C Extension
1011-
self.set_character_set_name(self._charset.name)
1012-
except AttributeError:
1013-
# Not required for pure Python connection
1014-
pass
10151062

10161063
if self.converter:
10171064
self.converter.set_charset(self._charset.name)
@@ -1048,7 +1095,9 @@ def get_server_version(self) -> Optional[Tuple[int, ...]]:
10481095
The MySQL server version as a tuple. If not previously connected, it will
10491096
return `None`.
10501097
"""
1051-
return self._server_info.version # type: ignore[return-value]
1098+
if self._server_info is not None:
1099+
return self._server_info.version # type: ignore[return-value]
1100+
return None
10521101

10531102
def get_server_info(self) -> Optional[str]:
10541103
"""Gets the original MySQL version information.

mysql-connector-python/lib/mysql/connector/aio/connection.py

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -238,17 +238,8 @@ async def _do_handshake(self) -> None:
238238
)
239239
elif self._charset_name:
240240
self._charset = charsets.get_by_name(self._charset_name)
241-
self._charset_collation = self._charset.collation
242241
elif self._charset_collation:
243242
self._charset = charsets.get_by_collation(self._charset_collation)
244-
self._charset_name = self._charset.name
245-
else:
246-
# The default charset from the server handshake should be used instead,
247-
# as `charsets.get_by_id(self._server_info.charset)`.
248-
# The charset id 45 is used to be aligned with the current implementation.
249-
self._charset = charsets.get_by_id(45)
250-
self._charset_name = self._charset.name
251-
self._charset_collation = self._charset.collation
252243

253244
if not self._handshake["capabilities"] & ClientFlag.SSL:
254245
if self._auth_plugin == "mysql_clear_password" and not self.is_secure:
@@ -1296,10 +1287,10 @@ async def cmd_ping(self) -> OkPacketType:
12961287

12971288
async def cmd_change_user(
12981289
self,
1299-
user: str = "",
1290+
username: str = "",
13001291
password: str = "",
13011292
database: str = "",
1302-
charset: int = 45,
1293+
charset: Optional[int] = None,
13031294
password1: str = "",
13041295
password2: str = "",
13051296
password3: str = "",
@@ -1311,13 +1302,17 @@ async def cmd_change_user(
13111302
This method allows to change the current logged in user information.
13121303
The result is a dictionary with OK packet information.
13131304
"""
1314-
if not isinstance(charset, int):
1315-
raise ValueError("charset must be an integer")
1316-
if charset < 0:
1317-
raise ValueError("charset should be either zero or a postive integer")
1305+
# If charset isn't defined, we use the same charset ID defined previously,
1306+
# otherwise, we run a verification and update the charset ID.
1307+
if charset is not None:
1308+
if not isinstance(charset, int):
1309+
raise ValueError("charset must be an integer")
1310+
if charset < 0:
1311+
raise ValueError("charset should be either zero or a postive integer")
1312+
self._charset = charsets.get_by_id(charset)
13181313

13191314
self._mfa_nfactor = 1
1320-
self._user = user
1315+
self._user = username
13211316
self._password = password
13221317
self._password1 = password1
13231318
self._password2 = password2
@@ -1341,7 +1336,7 @@ async def cmd_change_user(
13411336
username=self._user,
13421337
password=self._password,
13431338
database=self._database,
1344-
charset=charset,
1339+
charset=self._charset.charset_id,
13451340
client_flags=self._client_flags,
13461341
ssl_enabled=self._ssl_active,
13471342
auth_plugin=self._auth_plugin,
@@ -1356,8 +1351,5 @@ async def cmd_change_user(
13561351
if not (self._client_flags & ClientFlag.CONNECT_WITH_DB) and database:
13571352
await self.cmd_init_db(database)
13581353

1359-
self._charset = charsets.get_by_id(charset)
1360-
self._charset_name = self._charset.name
1361-
13621354
# return ok_pkt
13631355
return self._handle_ok(ok_packet)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy