Skip to content

Commit b3c259d

Browse files
fix: improve client-side regex statement parser (#1328)
* fix: improve client-side regex statement parser The client-side regex-based statement parser contained multiple minor errors, like: - BEGIN would match any string as BEGIN TRANSACTION (including stuff like `BEGIN foo`) - COMMIT and ROLLBACK had the same problem as BEGIN. - Mismatches were reported as UPDATE. They are now returned as UNKNOWN. - DLL missed the ANALYZE keyword * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 03400c4 commit b3c259d

File tree

7 files changed

+53
-24
lines changed

7 files changed

+53
-24
lines changed

google/cloud/spanner_dbapi/batch_dml_executor.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,12 @@ def execute_statement(self, parsed_statement: ParsedStatement):
5454
"""
5555
from google.cloud.spanner_dbapi import ProgrammingError
5656

57+
# Note: Let the server handle it if the client-side parser did not
58+
# recognize the type of statement.
5759
if (
5860
parsed_statement.statement_type != StatementType.UPDATE
5961
and parsed_statement.statement_type != StatementType.INSERT
62+
and parsed_statement.statement_type != StatementType.UNKNOWN
6063
):
6164
raise ProgrammingError("Only DML statements are allowed in batch DML mode.")
6265
self._statements.append(parsed_statement.statement)

google/cloud/spanner_dbapi/client_side_statement_parser.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,18 @@
2121
Statement,
2222
)
2323

24-
RE_BEGIN = re.compile(r"^\s*(BEGIN|START)(TRANSACTION)?", re.IGNORECASE)
25-
RE_COMMIT = re.compile(r"^\s*(COMMIT)(TRANSACTION)?", re.IGNORECASE)
26-
RE_ROLLBACK = re.compile(r"^\s*(ROLLBACK)(TRANSACTION)?", re.IGNORECASE)
24+
RE_BEGIN = re.compile(r"^\s*(BEGIN|START)(\s+TRANSACTION)?\s*$", re.IGNORECASE)
25+
RE_COMMIT = re.compile(r"^\s*(COMMIT)(\s+TRANSACTION)?\s*$", re.IGNORECASE)
26+
RE_ROLLBACK = re.compile(r"^\s*(ROLLBACK)(\s+TRANSACTION)?\s*$", re.IGNORECASE)
2727
RE_SHOW_COMMIT_TIMESTAMP = re.compile(
28-
r"^\s*(SHOW)\s+(VARIABLE)\s+(COMMIT_TIMESTAMP)", re.IGNORECASE
28+
r"^\s*(SHOW)\s+(VARIABLE)\s+(COMMIT_TIMESTAMP)\s*$", re.IGNORECASE
2929
)
3030
RE_SHOW_READ_TIMESTAMP = re.compile(
31-
r"^\s*(SHOW)\s+(VARIABLE)\s+(READ_TIMESTAMP)", re.IGNORECASE
31+
r"^\s*(SHOW)\s+(VARIABLE)\s+(READ_TIMESTAMP)\s*$", re.IGNORECASE
3232
)
33-
RE_START_BATCH_DML = re.compile(r"^\s*(START)\s+(BATCH)\s+(DML)", re.IGNORECASE)
34-
RE_RUN_BATCH = re.compile(r"^\s*(RUN)\s+(BATCH)", re.IGNORECASE)
35-
RE_ABORT_BATCH = re.compile(r"^\s*(ABORT)\s+(BATCH)", re.IGNORECASE)
33+
RE_START_BATCH_DML = re.compile(r"^\s*(START)\s+(BATCH)\s+(DML)\s*$", re.IGNORECASE)
34+
RE_RUN_BATCH = re.compile(r"^\s*(RUN)\s+(BATCH)\s*$", re.IGNORECASE)
35+
RE_ABORT_BATCH = re.compile(r"^\s*(ABORT)\s+(BATCH)\s*$", re.IGNORECASE)
3636
RE_PARTITION_QUERY = re.compile(r"^\s*(PARTITION)\s+(.+)", re.IGNORECASE)
3737
RE_RUN_PARTITION = re.compile(r"^\s*(RUN)\s+(PARTITION)\s+(.+)", re.IGNORECASE)
3838
RE_RUN_PARTITIONED_QUERY = re.compile(

google/cloud/spanner_dbapi/connection.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,7 @@
2020
from google.cloud import spanner_v1 as spanner
2121
from google.cloud.spanner_dbapi import partition_helper
2222
from google.cloud.spanner_dbapi.batch_dml_executor import BatchMode, BatchDmlExecutor
23-
from google.cloud.spanner_dbapi.parse_utils import _get_statement_type
24-
from google.cloud.spanner_dbapi.parsed_statement import (
25-
StatementType,
26-
AutocommitDmlMode,
27-
)
23+
from google.cloud.spanner_dbapi.parsed_statement import AutocommitDmlMode
2824
from google.cloud.spanner_dbapi.partition_helper import PartitionId
2925
from google.cloud.spanner_dbapi.parsed_statement import ParsedStatement, Statement
3026
from google.cloud.spanner_dbapi.transaction_helper import TransactionRetryHelper
@@ -702,10 +698,6 @@ def set_autocommit_dml_mode(
702698
self._autocommit_dml_mode = autocommit_dml_mode
703699

704700
def _partitioned_query_validation(self, partitioned_query, statement):
705-
if _get_statement_type(Statement(partitioned_query)) is not StatementType.QUERY:
706-
raise ProgrammingError(
707-
"Only queries can be partitioned. Invalid statement: " + statement.sql
708-
)
709701
if self.read_only is not True and self._client_transaction_started is True:
710702
raise ProgrammingError(
711703
"Partitioned query is not supported, because the connection is in a read/write transaction."

google/cloud/spanner_dbapi/cursor.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,9 +404,12 @@ def executemany(self, operation, seq_of_params):
404404
# For every operation, we've got to ensure that any prior DDL
405405
# statements were run.
406406
self.connection.run_prior_DDL_statements()
407+
# Treat UNKNOWN statements as if they are DML and let the server
408+
# determine what is wrong with it.
407409
if self._parsed_statement.statement_type in (
408410
StatementType.INSERT,
409411
StatementType.UPDATE,
412+
StatementType.UNKNOWN,
410413
):
411414
statements = []
412415
for params in seq_of_params:

google/cloud/spanner_dbapi/parse_utils.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,25 +155,30 @@
155155
STMT_INSERT = "INSERT"
156156

157157
# Heuristic for identifying statements that don't need to be run as updates.
158+
# TODO: This and the other regexes do not match statements that start with a hint.
158159
RE_NON_UPDATE = re.compile(r"^\W*(SELECT|GRAPH|FROM)", re.IGNORECASE)
159160

160161
RE_WITH = re.compile(r"^\s*(WITH)", re.IGNORECASE)
161162

162163
# DDL statements follow
163164
# https://cloud.google.com/spanner/docs/data-definition-language
164165
RE_DDL = re.compile(
165-
r"^\s*(CREATE|ALTER|DROP|GRANT|REVOKE|RENAME)", re.IGNORECASE | re.DOTALL
166+
r"^\s*(CREATE|ALTER|DROP|GRANT|REVOKE|RENAME|ANALYZE)", re.IGNORECASE | re.DOTALL
166167
)
167168

168-
RE_IS_INSERT = re.compile(r"^\s*(INSERT)", re.IGNORECASE | re.DOTALL)
169+
# TODO: These do not match statements that start with a hint.
170+
RE_IS_INSERT = re.compile(r"^\s*(INSERT\s+)", re.IGNORECASE | re.DOTALL)
171+
RE_IS_UPDATE = re.compile(r"^\s*(UPDATE\s+)", re.IGNORECASE | re.DOTALL)
172+
RE_IS_DELETE = re.compile(r"^\s*(DELETE\s+)", re.IGNORECASE | re.DOTALL)
169173

170174
RE_INSERT = re.compile(
171175
# Only match the `INSERT INTO <table_name> (columns...)
172176
# otherwise the rest of the statement could be a complex
173177
# operation.
174-
r"^\s*INSERT INTO (?P<table_name>[^\s\(\)]+)\s*\((?P<columns>[^\(\)]+)\)",
178+
r"^\s*INSERT(?:\s+INTO)?\s+(?P<table_name>[^\s\(\)]+)\s*\((?P<columns>[^\(\)]+)\)",
175179
re.IGNORECASE | re.DOTALL,
176180
)
181+
"""Deprecated: Use the RE_IS_INSERT, RE_IS_UPDATE, and RE_IS_DELETE regexes"""
177182

178183
RE_VALUES_TILL_END = re.compile(r"VALUES\s*\(.+$", re.IGNORECASE | re.DOTALL)
179184

@@ -259,8 +264,13 @@ def _get_statement_type(statement):
259264
# statements and doesn't yet support WITH for DML statements.
260265
return StatementType.QUERY
261266

262-
statement.sql = ensure_where_clause(query)
263-
return StatementType.UPDATE
267+
if RE_IS_UPDATE.match(query) or RE_IS_DELETE.match(query):
268+
# TODO: Remove this? It makes more sense to have this in SQLAlchemy and
269+
# Django than here.
270+
statement.sql = ensure_where_clause(query)
271+
return StatementType.UPDATE
272+
273+
return StatementType.UNKNOWN
264274

265275

266276
def sql_pyformat_args_to_spanner(sql, params):
@@ -355,7 +365,7 @@ def get_param_types(params):
355365
def ensure_where_clause(sql):
356366
"""
357367
Cloud Spanner requires a WHERE clause on UPDATE and DELETE statements.
358-
Add a dummy WHERE clause if non detected.
368+
Add a dummy WHERE clause if not detected.
359369
360370
:type sql: str
361371
:param sql: SQL code to check.

google/cloud/spanner_dbapi/parsed_statement.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818

1919
class StatementType(Enum):
20+
UNKNOWN = 0
2021
CLIENT_SIDE = 1
2122
DDL = 2
2223
QUERY = 3

tests/unit/spanner_dbapi/test_parse_utils.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,31 @@ def test_classify_stmt(self):
7474
("REVOKE SELECT ON TABLE Singers TO ROLE parent", StatementType.DDL),
7575
("GRANT ROLE parent TO ROLE child", StatementType.DDL),
7676
("INSERT INTO table (col1) VALUES (1)", StatementType.INSERT),
77+
("INSERT table (col1) VALUES (1)", StatementType.INSERT),
78+
("INSERT OR UPDATE table (col1) VALUES (1)", StatementType.INSERT),
79+
("INSERT OR IGNORE table (col1) VALUES (1)", StatementType.INSERT),
7780
("UPDATE table SET col1 = 1 WHERE col1 = NULL", StatementType.UPDATE),
81+
("delete from table WHERE col1 = 2", StatementType.UPDATE),
82+
("delete from table WHERE col1 in (select 1)", StatementType.UPDATE),
83+
("dlete from table where col1 = 2", StatementType.UNKNOWN),
84+
("udpate table set col2=1 where col1 = 2", StatementType.UNKNOWN),
85+
("begin foo", StatementType.UNKNOWN),
86+
("begin transaction foo", StatementType.UNKNOWN),
87+
("commit foo", StatementType.UNKNOWN),
88+
("commit transaction foo", StatementType.UNKNOWN),
89+
("rollback foo", StatementType.UNKNOWN),
90+
("rollback transaction foo", StatementType.UNKNOWN),
91+
("show variable", StatementType.UNKNOWN),
92+
("show variable read_timestamp foo", StatementType.UNKNOWN),
93+
("INSERTs INTO table (col1) VALUES (1)", StatementType.UNKNOWN),
94+
("UPDATEs table SET col1 = 1 WHERE col1 = NULL", StatementType.UNKNOWN),
95+
("DELETEs from table WHERE col1 = 2", StatementType.UNKNOWN),
7896
)
7997

8098
for query, want_class in cases:
81-
self.assertEqual(classify_statement(query).statement_type, want_class)
99+
self.assertEqual(
100+
classify_statement(query).statement_type, want_class, query
101+
)
82102

83103
def test_partition_query_classify_stmt(self):
84104
parsed_statement = classify_statement(

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy