From 1d393fedf3be8b36c91d0f52a5f23cfa5c05f835 Mon Sep 17 00:00:00 2001 From: Emmanuel T Odeke Date: Thu, 5 Dec 2024 23:04:03 -0800 Subject: [PATCH 01/19] fix(tracing): only set span.status=OK if UNSET (#1248) In modernized OpenTelemetry-Python, if the SpanStatus was not already set to OK, it can be changed and the code for trace_call was accidentally unconditionally setting the status to OK if there was no exception. This change fixes that and adds tests to lock this behavior in. Fixes #1246 Co-authored-by: Sri Harsha CH <57220027+harshachinta@users.noreply.github.com> --- .../spanner_v1/_opentelemetry_tracing.py | 8 +++- tests/unit/test__opentelemetry_tracing.py | 37 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/google/cloud/spanner_v1/_opentelemetry_tracing.py b/google/cloud/spanner_v1/_opentelemetry_tracing.py index efbeea05e7..e5aad08c05 100644 --- a/google/cloud/spanner_v1/_opentelemetry_tracing.py +++ b/google/cloud/spanner_v1/_opentelemetry_tracing.py @@ -109,4 +109,10 @@ def trace_call(name, session, extra_attributes=None, observability_options=None) span.record_exception(error) raise else: - span.set_status(Status(StatusCode.OK)) + if (not span._status) or span._status.status_code == StatusCode.UNSET: + # OpenTelemetry-Python only allows a status change + # if the current code is UNSET or ERROR. At the end + # of the generator's consumption, only set it to OK + # it wasn't previously set otherwise. + # https://github.com/googleapis/python-spanner/issues/1246 + span.set_status(Status(StatusCode.OK)) diff --git a/tests/unit/test__opentelemetry_tracing.py b/tests/unit/test__opentelemetry_tracing.py index 20e31d9ea6..1150ce7778 100644 --- a/tests/unit/test__opentelemetry_tracing.py +++ b/tests/unit/test__opentelemetry_tracing.py @@ -158,3 +158,40 @@ def test_trace_codeless_error(self): self.assertEqual(len(span_list), 1) span = span_list[0] self.assertEqual(span.status.status_code, StatusCode.ERROR) + + def test_trace_call_terminal_span_status(self): + # Verify that we don't unconditionally set the terminal span status to + # SpanStatus.OK per https://github.com/googleapis/python-spanner/issues/1246 + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, + ) + from opentelemetry.trace.status import Status, StatusCode + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.sampling import ALWAYS_ON + + tracer_provider = TracerProvider(sampler=ALWAYS_ON) + trace_exporter = InMemorySpanExporter() + tracer_provider.add_span_processor(SimpleSpanProcessor(trace_exporter)) + observability_options = dict(tracer_provider=tracer_provider) + + session = _make_session() + with _opentelemetry_tracing.trace_call( + "VerifyTerminalSpanStatus", + session, + observability_options=observability_options, + ) as span: + span.set_status(Status(StatusCode.ERROR, "Our error exhibit")) + + span_list = trace_exporter.get_finished_spans() + got_statuses = [] + + for span in span_list: + got_statuses.append( + (span.name, span.status.status_code, span.status.description) + ) + + want_statuses = [ + ("VerifyTerminalSpanStatus", StatusCode.ERROR, "Our error exhibit"), + ] + assert got_statuses == want_statuses From a6811afefa6739caa20203048635d94f9b85c4c8 Mon Sep 17 00:00:00 2001 From: Emmanuel T Odeke Date: Fri, 6 Dec 2024 02:01:15 -0800 Subject: [PATCH 02/19] observability: annotate Session+SessionPool events (#1207) This change adds annotations for session and session pool events to aid customers in debugging latency issues with session pool malevolence and also for maintainers to figure out which session pool type is the most appropriate. Updates #1170 --- google/cloud/spanner_v1/_helpers.py | 4 + .../spanner_v1/_opentelemetry_tracing.py | 19 +- google/cloud/spanner_v1/database.py | 12 + google/cloud/spanner_v1/pool.py | 173 ++++++- google/cloud/spanner_v1/session.py | 28 +- google/cloud/spanner_v1/transaction.py | 32 +- tests/_helpers.py | 39 +- tests/unit/test_batch.py | 4 + tests/unit/test_database.py | 4 + tests/unit/test_pool.py | 438 ++++++++++-------- tests/unit/test_session.py | 38 ++ tests/unit/test_snapshot.py | 4 + tests/unit/test_spanner.py | 4 + tests/unit/test_transaction.py | 4 + 14 files changed, 602 insertions(+), 201 deletions(-) diff --git a/google/cloud/spanner_v1/_helpers.py b/google/cloud/spanner_v1/_helpers.py index a4d66fc20f..29bd604e7b 100644 --- a/google/cloud/spanner_v1/_helpers.py +++ b/google/cloud/spanner_v1/_helpers.py @@ -463,6 +463,7 @@ def _retry( retry_count=5, delay=2, allowed_exceptions=None, + beforeNextRetry=None, ): """ Retry a function with a specified number of retries, delay between retries, and list of allowed exceptions. @@ -479,6 +480,9 @@ def _retry( """ retries = 0 while retries <= retry_count: + if retries > 0 and beforeNextRetry: + beforeNextRetry(retries, delay) + try: return func() except Exception as exc: diff --git a/google/cloud/spanner_v1/_opentelemetry_tracing.py b/google/cloud/spanner_v1/_opentelemetry_tracing.py index e5aad08c05..1caac59ecd 100644 --- a/google/cloud/spanner_v1/_opentelemetry_tracing.py +++ b/google/cloud/spanner_v1/_opentelemetry_tracing.py @@ -81,10 +81,11 @@ def trace_call(name, session, extra_attributes=None, observability_options=None) tracer = get_tracer(tracer_provider) # Set base attributes that we know for every trace created + db = session._database attributes = { "db.type": "spanner", "db.url": SpannerClient.DEFAULT_ENDPOINT, - "db.instance": session._database.name, + "db.instance": "" if not db else db.name, "net.host.name": SpannerClient.DEFAULT_ENDPOINT, OTEL_SCOPE_NAME: TRACER_NAME, OTEL_SCOPE_VERSION: TRACER_VERSION, @@ -106,7 +107,10 @@ def trace_call(name, session, extra_attributes=None, observability_options=None) yield span except Exception as error: span.set_status(Status(StatusCode.ERROR, str(error))) - span.record_exception(error) + # OpenTelemetry-Python imposes invoking span.record_exception on __exit__ + # on any exception. We should file a bug later on with them to only + # invoke .record_exception if not already invoked, hence we should not + # invoke .record_exception on our own else we shall have 2 exceptions. raise else: if (not span._status) or span._status.status_code == StatusCode.UNSET: @@ -116,3 +120,14 @@ def trace_call(name, session, extra_attributes=None, observability_options=None) # it wasn't previously set otherwise. # https://github.com/googleapis/python-spanner/issues/1246 span.set_status(Status(StatusCode.OK)) + + +def get_current_span(): + if not HAS_OPENTELEMETRY_INSTALLED: + return None + return trace.get_current_span() + + +def add_span_event(span, event_name, event_attributes=None): + if span: + span.add_event(event_name, event_attributes) diff --git a/google/cloud/spanner_v1/database.py b/google/cloud/spanner_v1/database.py index 1e10e1df73..c8230ab503 100644 --- a/google/cloud/spanner_v1/database.py +++ b/google/cloud/spanner_v1/database.py @@ -67,6 +67,10 @@ SpannerGrpcTransport, ) from google.cloud.spanner_v1.table import Table +from google.cloud.spanner_v1._opentelemetry_tracing import ( + add_span_event, + get_current_span, +) SPANNER_DATA_SCOPE = "https://www.googleapis.com/auth/spanner.data" @@ -1164,7 +1168,9 @@ def __init__( def __enter__(self): """Begin ``with`` block.""" + current_span = get_current_span() session = self._session = self._database._pool.get() + add_span_event(current_span, "Using session", {"id": session.session_id}) batch = self._batch = Batch(session) if self._request_options.transaction_tag: batch.transaction_tag = self._request_options.transaction_tag @@ -1187,6 +1193,12 @@ def __exit__(self, exc_type, exc_val, exc_tb): extra={"commit_stats": self._batch.commit_stats}, ) self._database._pool.put(self._session) + current_span = get_current_span() + add_span_event( + current_span, + "Returned session to pool", + {"id": self._session.session_id}, + ) class MutationGroupsCheckout(object): diff --git a/google/cloud/spanner_v1/pool.py b/google/cloud/spanner_v1/pool.py index c95ef7a7b9..4f90196b4a 100644 --- a/google/cloud/spanner_v1/pool.py +++ b/google/cloud/spanner_v1/pool.py @@ -16,6 +16,7 @@ import datetime import queue +import time from google.cloud.exceptions import NotFound from google.cloud.spanner_v1 import BatchCreateSessionsRequest @@ -24,6 +25,10 @@ _metadata_with_prefix, _metadata_with_leader_aware_routing, ) +from google.cloud.spanner_v1._opentelemetry_tracing import ( + add_span_event, + get_current_span, +) from warnings import warn _NOW = datetime.datetime.utcnow # unit tests may replace @@ -196,6 +201,18 @@ def bind(self, database): when needed. """ self._database = database + requested_session_count = self.size - self._sessions.qsize() + span = get_current_span() + span_event_attributes = {"kind": type(self).__name__} + + if requested_session_count <= 0: + add_span_event( + span, + f"Invalid session pool size({requested_session_count}) <= 0", + span_event_attributes, + ) + return + api = database.spanner_api metadata = _metadata_with_prefix(database.name) if database._route_to_leader_enabled: @@ -203,13 +220,31 @@ def bind(self, database): _metadata_with_leader_aware_routing(database._route_to_leader_enabled) ) self._database_role = self._database_role or self._database.database_role + if requested_session_count > 0: + add_span_event( + span, + f"Requesting {requested_session_count} sessions", + span_event_attributes, + ) + + if self._sessions.full(): + add_span_event(span, "Session pool is already full", span_event_attributes) + return + request = BatchCreateSessionsRequest( database=database.name, - session_count=self.size - self._sessions.qsize(), + session_count=requested_session_count, session_template=Session(creator_role=self.database_role), ) + returned_session_count = 0 while not self._sessions.full(): + request.session_count = requested_session_count - self._sessions.qsize() + add_span_event( + span, + f"Creating {request.session_count} sessions", + span_event_attributes, + ) resp = api.batch_create_sessions( request=request, metadata=metadata, @@ -218,6 +253,13 @@ def bind(self, database): session = self._new_session() session._session_id = session_pb.name.split("/")[-1] self._sessions.put(session) + returned_session_count += 1 + + add_span_event( + span, + f"Requested for {requested_session_count} sessions, returned {returned_session_count}", + span_event_attributes, + ) def get(self, timeout=None): """Check a session out from the pool. @@ -233,12 +275,43 @@ def get(self, timeout=None): if timeout is None: timeout = self.default_timeout - session = self._sessions.get(block=True, timeout=timeout) - age = _NOW() - session.last_use_time + start_time = time.time() + current_span = get_current_span() + span_event_attributes = {"kind": type(self).__name__} + add_span_event(current_span, "Acquiring session", span_event_attributes) - if age >= self._max_age and not session.exists(): - session = self._database.session() - session.create() + session = None + try: + add_span_event( + current_span, + "Waiting for a session to become available", + span_event_attributes, + ) + + session = self._sessions.get(block=True, timeout=timeout) + age = _NOW() - session.last_use_time + + if age >= self._max_age and not session.exists(): + if not session.exists(): + add_span_event( + current_span, + "Session is not valid, recreating it", + span_event_attributes, + ) + session = self._database.session() + session.create() + # Replacing with the updated session.id. + span_event_attributes["session.id"] = session._session_id + + span_event_attributes["session.id"] = session._session_id + span_event_attributes["time.elapsed"] = time.time() - start_time + add_span_event(current_span, "Acquired session", span_event_attributes) + + except queue.Empty as e: + add_span_event( + current_span, "No sessions available in the pool", span_event_attributes + ) + raise e return session @@ -312,13 +385,32 @@ def get(self): :returns: an existing session from the pool, or a newly-created session. """ + current_span = get_current_span() + span_event_attributes = {"kind": type(self).__name__} + add_span_event(current_span, "Acquiring session", span_event_attributes) + try: + add_span_event( + current_span, + "Waiting for a session to become available", + span_event_attributes, + ) session = self._sessions.get_nowait() except queue.Empty: + add_span_event( + current_span, + "No sessions available in pool. Creating session", + span_event_attributes, + ) session = self._new_session() session.create() else: if not session.exists(): + add_span_event( + current_span, + "Session is not valid, recreating it", + span_event_attributes, + ) session = self._new_session() session.create() return session @@ -427,6 +519,38 @@ def bind(self, database): session_template=Session(creator_role=self.database_role), ) + span_event_attributes = {"kind": type(self).__name__} + current_span = get_current_span() + requested_session_count = request.session_count + if requested_session_count <= 0: + add_span_event( + current_span, + f"Invalid session pool size({requested_session_count}) <= 0", + span_event_attributes, + ) + return + + add_span_event( + current_span, + f"Requesting {requested_session_count} sessions", + span_event_attributes, + ) + + if created_session_count >= self.size: + add_span_event( + current_span, + "Created no new sessions as sessionPool is full", + span_event_attributes, + ) + return + + add_span_event( + current_span, + f"Creating {request.session_count} sessions", + span_event_attributes, + ) + + returned_session_count = 0 while created_session_count < self.size: resp = api.batch_create_sessions( request=request, @@ -436,8 +560,16 @@ def bind(self, database): session = self._new_session() session._session_id = session_pb.name.split("/")[-1] self.put(session) + returned_session_count += 1 + created_session_count += len(resp.session) + add_span_event( + current_span, + f"Requested for {requested_session_count} sessions, return {returned_session_count}", + span_event_attributes, + ) + def get(self, timeout=None): """Check a session out from the pool. @@ -452,7 +584,26 @@ def get(self, timeout=None): if timeout is None: timeout = self.default_timeout - ping_after, session = self._sessions.get(block=True, timeout=timeout) + start_time = time.time() + span_event_attributes = {"kind": type(self).__name__} + current_span = get_current_span() + add_span_event( + current_span, + "Waiting for a session to become available", + span_event_attributes, + ) + + ping_after = None + session = None + try: + ping_after, session = self._sessions.get(block=True, timeout=timeout) + except queue.Empty as e: + add_span_event( + current_span, + "No sessions available in the pool within the specified timeout", + span_event_attributes, + ) + raise e if _NOW() > ping_after: # Using session.exists() guarantees the returned session exists. @@ -462,6 +613,14 @@ def get(self, timeout=None): session = self._new_session() session.create() + span_event_attributes.update( + { + "time.elapsed": time.time() - start_time, + "session.id": session._session_id, + "kind": "pinging_pool", + } + ) + add_span_event(current_span, "Acquired session", span_event_attributes) return session def put(self, session): diff --git a/google/cloud/spanner_v1/session.py b/google/cloud/spanner_v1/session.py index 539f36af2b..166d5488c6 100644 --- a/google/cloud/spanner_v1/session.py +++ b/google/cloud/spanner_v1/session.py @@ -31,7 +31,11 @@ _metadata_with_prefix, _metadata_with_leader_aware_routing, ) -from google.cloud.spanner_v1._opentelemetry_tracing import trace_call +from google.cloud.spanner_v1._opentelemetry_tracing import ( + add_span_event, + get_current_span, + trace_call, +) from google.cloud.spanner_v1.batch import Batch from google.cloud.spanner_v1.snapshot import Snapshot from google.cloud.spanner_v1.transaction import Transaction @@ -134,6 +138,9 @@ def create(self): :raises ValueError: if :attr:`session_id` is already set. """ + current_span = get_current_span() + add_span_event(current_span, "Creating Session") + if self._session_id is not None: raise ValueError("Session ID already set by back-end") api = self._database.spanner_api @@ -174,8 +181,18 @@ def exists(self): :rtype: bool :returns: True if the session exists on the back-end, else False. """ + current_span = get_current_span() if self._session_id is None: + add_span_event( + current_span, + "Checking session existence: Session does not exist as it has not been created yet", + ) return False + + add_span_event( + current_span, "Checking if Session exists", {"session.id": self._session_id} + ) + api = self._database.spanner_api metadata = _metadata_with_prefix(self._database.name) if self._database._route_to_leader_enabled: @@ -209,8 +226,17 @@ def delete(self): :raises ValueError: if :attr:`session_id` is not already set. :raises NotFound: if the session does not exist """ + current_span = get_current_span() if self._session_id is None: + add_span_event( + current_span, "Deleting Session failed due to unset session_id" + ) raise ValueError("Session ID not set by back-end") + + add_span_event( + current_span, "Deleting Session", {"session.id": self._session_id} + ) + api = self._database.spanner_api metadata = _metadata_with_prefix(self._database.name) observability_options = getattr(self._database, "observability_options", None) diff --git a/google/cloud/spanner_v1/transaction.py b/google/cloud/spanner_v1/transaction.py index d99c4fde2f..fa8e5121ff 100644 --- a/google/cloud/spanner_v1/transaction.py +++ b/google/cloud/spanner_v1/transaction.py @@ -32,7 +32,7 @@ from google.cloud.spanner_v1 import TransactionOptions from google.cloud.spanner_v1.snapshot import _SnapshotBase from google.cloud.spanner_v1.batch import _BatchBase -from google.cloud.spanner_v1._opentelemetry_tracing import trace_call +from google.cloud.spanner_v1._opentelemetry_tracing import add_span_event, trace_call from google.cloud.spanner_v1 import RequestOptions from google.api_core import gapic_v1 from google.api_core.exceptions import InternalServerError @@ -160,16 +160,25 @@ def begin(self): "CloudSpanner.BeginTransaction", self._session, observability_options=observability_options, - ): + ) as span: method = functools.partial( api.begin_transaction, session=self._session.name, options=txn_options, metadata=metadata, ) + + def beforeNextRetry(nthRetry, delayInSeconds): + add_span_event( + span, + "Transaction Begin Attempt Failed. Retrying", + {"attempt": nthRetry, "sleep_seconds": delayInSeconds}, + ) + response = _retry( method, allowed_exceptions={InternalServerError: _check_rst_stream_error}, + beforeNextRetry=beforeNextRetry, ) self._transaction_id = response.id return self._transaction_id @@ -246,7 +255,6 @@ def commit( metadata.append( _metadata_with_leader_aware_routing(database._route_to_leader_enabled) ) - trace_attributes = {"num_mutations": len(self._mutations)} if request_options is None: request_options = RequestOptions() @@ -266,22 +274,38 @@ def commit( max_commit_delay=max_commit_delay, request_options=request_options, ) + + trace_attributes = {"num_mutations": len(self._mutations)} observability_options = getattr(database, "observability_options", None) with trace_call( "CloudSpanner.Commit", self._session, trace_attributes, observability_options, - ): + ) as span: + add_span_event(span, "Starting Commit") + method = functools.partial( api.commit, request=request, metadata=metadata, ) + + def beforeNextRetry(nthRetry, delayInSeconds): + add_span_event( + span, + "Transaction Commit Attempt Failed. Retrying", + {"attempt": nthRetry, "sleep_seconds": delayInSeconds}, + ) + response = _retry( method, allowed_exceptions={InternalServerError: _check_rst_stream_error}, + beforeNextRetry=beforeNextRetry, ) + + add_span_event(span, "Commit Done") + self.committed = response.commit_timestamp if return_commit_stats: self.commit_stats = response.commit_stats diff --git a/tests/_helpers.py b/tests/_helpers.py index 5e514f2586..81787c5a86 100644 --- a/tests/_helpers.py +++ b/tests/_helpers.py @@ -16,10 +16,11 @@ OTEL_SCOPE_NAME, OTEL_SCOPE_VERSION, ) + from opentelemetry.sdk.trace.sampling import TraceIdRatioBased from opentelemetry.trace.status import StatusCode - trace.set_tracer_provider(TracerProvider()) + trace.set_tracer_provider(TracerProvider(sampler=TraceIdRatioBased(1.0))) HAS_OPENTELEMETRY_INSTALLED = True except ImportError: @@ -86,9 +87,43 @@ def assertSpanAttributes( if HAS_OPENTELEMETRY_INSTALLED: if not span: span_list = self.ot_exporter.get_finished_spans() - self.assertEqual(len(span_list), 1) + self.assertEqual(len(span_list) > 0, True) span = span_list[0] self.assertEqual(span.name, name) self.assertEqual(span.status.status_code, status) self.assertEqual(dict(span.attributes), attributes) + + def assertSpanEvents(self, name, wantEventNames=[], span=None): + if not HAS_OPENTELEMETRY_INSTALLED: + return + + if not span: + span_list = self.ot_exporter.get_finished_spans() + self.assertEqual(len(span_list) > 0, True) + span = span_list[0] + + self.assertEqual(span.name, name) + actualEventNames = [] + for event in span.events: + actualEventNames.append(event.name) + self.assertEqual(actualEventNames, wantEventNames) + + def assertSpanNames(self, want_span_names): + if not HAS_OPENTELEMETRY_INSTALLED: + return + + span_list = self.get_finished_spans() + got_span_names = [span.name for span in span_list] + self.assertEqual(got_span_names, want_span_names) + + def get_finished_spans(self): + if HAS_OPENTELEMETRY_INSTALLED: + return list( + filter( + lambda span: span and span.name, + self.ot_exporter.get_finished_spans(), + ) + ) + else: + return [] diff --git a/tests/unit/test_batch.py b/tests/unit/test_batch.py index 2f6b5e4ae9..a7f7a6f970 100644 --- a/tests/unit/test_batch.py +++ b/tests/unit/test_batch.py @@ -611,6 +611,10 @@ def __init__(self, database=None, name=TestBatch.SESSION_NAME): self._database = database self.name = name + @property + def session_id(self): + return self.name + class _Database(object): name = "testing" diff --git a/tests/unit/test_database.py b/tests/unit/test_database.py index 90fa0c269f..6e29255fb7 100644 --- a/tests/unit/test_database.py +++ b/tests/unit/test_database.py @@ -3188,6 +3188,10 @@ def run_in_transaction(self, func, *args, **kw): self._retried = (func, args, kw) return self._committed + @property + def session_id(self): + return self.name + class _MockIterator(object): def __init__(self, *values, **kw): diff --git a/tests/unit/test_pool.py b/tests/unit/test_pool.py index 2e3b46fa73..fbb35201eb 100644 --- a/tests/unit/test_pool.py +++ b/tests/unit/test_pool.py @@ -14,10 +14,17 @@ from functools import total_ordering +import time import unittest from datetime import datetime, timedelta import mock +from google.cloud.spanner_v1._opentelemetry_tracing import trace_call +from tests._helpers import ( + OpenTelemetryBase, + StatusCode, + enrich_with_otel_scope, +) def _make_database(name="name"): @@ -133,7 +140,15 @@ def test_session_w_kwargs(self): self.assertEqual(checkout._kwargs, {"foo": "bar"}) -class TestFixedSizePool(unittest.TestCase): +class TestFixedSizePool(OpenTelemetryBase): + BASE_ATTRIBUTES = { + "db.type": "spanner", + "db.url": "spanner.googleapis.com", + "db.instance": "name", + "net.host.name": "spanner.googleapis.com", + } + enrich_with_otel_scope(BASE_ATTRIBUTES) + def _getTargetClass(self): from google.cloud.spanner_v1.pool import FixedSizePool @@ -216,6 +231,93 @@ def test_get_non_expired(self): self.assertTrue(session._exists_checked) self.assertFalse(pool._sessions.full()) + def test_spans_bind_get(self): + # This tests retrieving 1 out of 4 sessions from the session pool. + pool = self._make_one(size=4) + database = _Database("name") + SESSIONS = sorted([_Session(database) for i in range(0, 4)]) + database._sessions.extend(SESSIONS) + pool.bind(database) + + with trace_call("pool.Get", SESSIONS[0]) as span: + pool.get() + wantEventNames = [ + "Acquiring session", + "Waiting for a session to become available", + "Acquired session", + ] + self.assertSpanEvents("pool.Get", wantEventNames, span) + + # Check for the overall spans too. + self.assertSpanAttributes( + "pool.Get", + attributes=TestFixedSizePool.BASE_ATTRIBUTES, + ) + + wantEventNames = [ + "Acquiring session", + "Waiting for a session to become available", + "Acquired session", + ] + self.assertSpanEvents("pool.Get", wantEventNames) + + def test_spans_bind_get_empty_pool(self): + # Tests trying to invoke pool.get() from an empty pool. + pool = self._make_one(size=0) + database = _Database("name") + session1 = _Session(database) + with trace_call("pool.Get", session1): + try: + pool.bind(database) + database._sessions = database._sessions[:0] + pool.get() + except Exception: + pass + + wantEventNames = [ + "Invalid session pool size(0) <= 0", + "Acquiring session", + "Waiting for a session to become available", + "No sessions available in the pool", + ] + self.assertSpanEvents("pool.Get", wantEventNames) + + # Check for the overall spans too. + self.assertSpanNames(["pool.Get"]) + self.assertSpanAttributes( + "pool.Get", + attributes=TestFixedSizePool.BASE_ATTRIBUTES, + ) + + def test_spans_pool_bind(self): + # Tests the exception generated from invoking pool.bind when + # you have an empty pool. + pool = self._make_one(size=1) + database = _Database("name") + SESSIONS = [] + database._sessions.extend(SESSIONS) + fauxSession = mock.Mock() + setattr(fauxSession, "_database", database) + try: + with trace_call("testBind", fauxSession): + pool.bind(database) + except Exception: + pass + + wantEventNames = [ + "Requesting 1 sessions", + "Creating 1 sessions", + "exception", + ] + self.assertSpanEvents("testBind", wantEventNames) + + # Check for the overall spans. + self.assertSpanAttributes( + "testBind", + status=StatusCode.ERROR, + attributes=TestFixedSizePool.BASE_ATTRIBUTES, + ) + def test_get_expired(self): pool = self._make_one(size=4) database = _Database("name") @@ -299,7 +401,15 @@ def test_clear(self): self.assertTrue(session._deleted) -class TestBurstyPool(unittest.TestCase): +class TestBurstyPool(OpenTelemetryBase): + BASE_ATTRIBUTES = { + "db.type": "spanner", + "db.url": "spanner.googleapis.com", + "db.instance": "name", + "net.host.name": "spanner.googleapis.com", + } + enrich_with_otel_scope(BASE_ATTRIBUTES) + def _getTargetClass(self): from google.cloud.spanner_v1.pool import BurstyPool @@ -347,6 +457,34 @@ def test_get_empty(self): session.create.assert_called() self.assertTrue(pool._sessions.empty()) + def test_spans_get_empty_pool(self): + # This scenario tests a pool that hasn't been filled up + # and pool.get() acquires from a pool, waiting for a session + # to become available. + pool = self._make_one() + database = _Database("name") + session1 = _Session(database) + database._sessions.append(session1) + pool.bind(database) + + with trace_call("pool.Get", session1): + session = pool.get() + self.assertIsInstance(session, _Session) + self.assertIs(session._database, database) + session.create.assert_called() + self.assertTrue(pool._sessions.empty()) + + self.assertSpanAttributes( + "pool.Get", + attributes=TestBurstyPool.BASE_ATTRIBUTES, + ) + wantEventNames = [ + "Acquiring session", + "Waiting for a session to become available", + "No sessions available in pool. Creating session", + ] + self.assertSpanEvents("pool.Get", wantEventNames) + def test_get_non_empty_session_exists(self): pool = self._make_one() database = _Database("name") @@ -361,6 +499,30 @@ def test_get_non_empty_session_exists(self): self.assertTrue(session._exists_checked) self.assertTrue(pool._sessions.empty()) + def test_spans_get_non_empty_session_exists(self): + # Tests the spans produces when you invoke pool.bind + # and then insert a session into the pool. + pool = self._make_one() + database = _Database("name") + previous = _Session(database) + pool.bind(database) + with trace_call("pool.Get", previous): + pool.put(previous) + session = pool.get() + self.assertIs(session, previous) + session.create.assert_not_called() + self.assertTrue(session._exists_checked) + self.assertTrue(pool._sessions.empty()) + + self.assertSpanAttributes( + "pool.Get", + attributes=TestBurstyPool.BASE_ATTRIBUTES, + ) + self.assertSpanEvents( + "pool.Get", + ["Acquiring session", "Waiting for a session to become available"], + ) + def test_get_non_empty_session_expired(self): pool = self._make_one() database = _Database("name") @@ -388,6 +550,22 @@ def test_put_empty(self): self.assertFalse(pool._sessions.empty()) + def test_spans_put_empty(self): + # Tests the spans produced when you put sessions into an empty pool. + pool = self._make_one() + database = _Database("name") + pool.bind(database) + session = _Session(database) + + with trace_call("pool.put", session): + pool.put(session) + self.assertFalse(pool._sessions.empty()) + + self.assertSpanAttributes( + "pool.put", + attributes=TestBurstyPool.BASE_ATTRIBUTES, + ) + def test_put_full(self): pool = self._make_one(target_size=1) database = _Database("name") @@ -402,6 +580,28 @@ def test_put_full(self): self.assertTrue(younger._deleted) self.assertIs(pool.get(), older) + def test_spans_put_full(self): + # This scenario tests the spans produced from putting an older + # session into a pool that is already full. + pool = self._make_one(target_size=1) + database = _Database("name") + pool.bind(database) + older = _Session(database) + with trace_call("pool.put", older): + pool.put(older) + self.assertFalse(pool._sessions.empty()) + + younger = _Session(database) + pool.put(younger) # discarded silently + + self.assertTrue(younger._deleted) + self.assertIs(pool.get(), older) + + self.assertSpanAttributes( + "pool.put", + attributes=TestBurstyPool.BASE_ATTRIBUTES, + ) + def test_put_full_expired(self): pool = self._make_one(target_size=1) database = _Database("name") @@ -426,9 +626,18 @@ def test_clear(self): pool.clear() self.assertTrue(previous._deleted) + self.assertNoSpans() + +class TestPingingPool(OpenTelemetryBase): + BASE_ATTRIBUTES = { + "db.type": "spanner", + "db.url": "spanner.googleapis.com", + "db.instance": "name", + "net.host.name": "spanner.googleapis.com", + } + enrich_with_otel_scope(BASE_ATTRIBUTES) -class TestPingingPool(unittest.TestCase): def _getTargetClass(self): from google.cloud.spanner_v1.pool import PingingPool @@ -505,6 +714,7 @@ def test_get_hit_no_ping(self): self.assertIs(session, SESSIONS[0]) self.assertFalse(session._exists_checked) self.assertFalse(pool._sessions.full()) + self.assertNoSpans() def test_get_hit_w_ping(self): import datetime @@ -526,6 +736,7 @@ def test_get_hit_w_ping(self): self.assertIs(session, SESSIONS[0]) self.assertTrue(session._exists_checked) self.assertFalse(pool._sessions.full()) + self.assertNoSpans() def test_get_hit_w_ping_expired(self): import datetime @@ -549,6 +760,7 @@ def test_get_hit_w_ping_expired(self): session.create.assert_called() self.assertTrue(SESSIONS[0]._exists_checked) self.assertFalse(pool._sessions.full()) + self.assertNoSpans() def test_get_empty_default_timeout(self): import queue @@ -560,6 +772,7 @@ def test_get_empty_default_timeout(self): pool.get() self.assertEqual(session_queue._got, {"block": True, "timeout": 10}) + self.assertNoSpans() def test_get_empty_explicit_timeout(self): import queue @@ -571,6 +784,7 @@ def test_get_empty_explicit_timeout(self): pool.get(timeout=1) self.assertEqual(session_queue._got, {"block": True, "timeout": 1}) + self.assertNoSpans() def test_put_full(self): import queue @@ -585,6 +799,7 @@ def test_put_full(self): pool.put(_Session(database)) self.assertTrue(pool._sessions.full()) + self.assertNoSpans() def test_put_non_full(self): import datetime @@ -605,6 +820,7 @@ def test_put_non_full(self): ping_after, queued = session_queue._items[0] self.assertEqual(ping_after, now + datetime.timedelta(seconds=3000)) self.assertIs(queued, session) + self.assertNoSpans() def test_clear(self): pool = self._make_one() @@ -623,10 +839,12 @@ def test_clear(self): for session in SESSIONS: self.assertTrue(session._deleted) + self.assertNoSpans() def test_ping_empty(self): pool = self._make_one(size=1) pool.ping() # Does not raise 'Empty' + self.assertNoSpans() def test_ping_oldest_fresh(self): pool = self._make_one(size=1) @@ -638,6 +856,7 @@ def test_ping_oldest_fresh(self): pool.ping() self.assertFalse(SESSIONS[0]._pinged) + self.assertNoSpans() def test_ping_oldest_stale_but_exists(self): import datetime @@ -674,193 +893,36 @@ def test_ping_oldest_stale_and_not_exists(self): self.assertTrue(SESSIONS[0]._pinged) SESSIONS[1].create.assert_called() + self.assertNoSpans() - -class TestTransactionPingingPool(unittest.TestCase): - def _getTargetClass(self): - from google.cloud.spanner_v1.pool import TransactionPingingPool - - return TransactionPingingPool - - def _make_one(self, *args, **kwargs): - return self._getTargetClass()(*args, **kwargs) - - def test_ctor_defaults(self): - pool = self._make_one() - self.assertIsNone(pool._database) - self.assertEqual(pool.size, 10) - self.assertEqual(pool.default_timeout, 10) - self.assertEqual(pool._delta.seconds, 3000) - self.assertTrue(pool._sessions.empty()) - self.assertTrue(pool._pending_sessions.empty()) - self.assertEqual(pool.labels, {}) - self.assertIsNone(pool.database_role) - - def test_ctor_explicit(self): - labels = {"foo": "bar"} - database_role = "dummy-role" - pool = self._make_one( - size=4, - default_timeout=30, - ping_interval=1800, - labels=labels, - database_role=database_role, - ) - self.assertIsNone(pool._database) - self.assertEqual(pool.size, 4) - self.assertEqual(pool.default_timeout, 30) - self.assertEqual(pool._delta.seconds, 1800) - self.assertTrue(pool._sessions.empty()) - self.assertTrue(pool._pending_sessions.empty()) - self.assertEqual(pool.labels, labels) - self.assertEqual(pool.database_role, database_role) - - def test_ctor_explicit_w_database_role_in_db(self): - database_role = "dummy-role" - pool = self._make_one() - database = pool._database = _Database("name") - SESSIONS = [_Session(database)] * 10 - database._sessions.extend(SESSIONS) - database._database_role = database_role - pool.bind(database) - self.assertEqual(pool.database_role, database_role) - - def test_bind(self): + def test_spans_get_and_leave_empty_pool(self): + # This scenario tests the spans generated from pulling a span + # out the pool and leaving it empty. pool = self._make_one() database = _Database("name") - SESSIONS = [_Session(database) for _ in range(10)] - database._sessions.extend(SESSIONS) - pool.bind(database) - - self.assertIs(pool._database, database) - self.assertEqual(pool.size, 10) - self.assertEqual(pool.default_timeout, 10) - self.assertEqual(pool._delta.seconds, 3000) - self.assertTrue(pool._sessions.full()) - - api = database.spanner_api - self.assertEqual(api.batch_create_sessions.call_count, 5) - for session in SESSIONS: - session.create.assert_not_called() - txn = session._transaction - txn.begin.assert_not_called() - - self.assertTrue(pool._pending_sessions.empty()) - - def test_bind_w_timestamp_race(self): - import datetime - from google.cloud._testing import _Monkey - from google.cloud.spanner_v1 import pool as MUT - - NOW = datetime.datetime.utcnow() - pool = self._make_one() - database = _Database("name") - SESSIONS = [_Session(database) for _ in range(10)] - database._sessions.extend(SESSIONS) - - with _Monkey(MUT, _NOW=lambda: NOW): + session1 = _Session(database) + database._sessions.append(session1) + try: pool.bind(database) + except Exception: + pass - self.assertIs(pool._database, database) - self.assertEqual(pool.size, 10) - self.assertEqual(pool.default_timeout, 10) - self.assertEqual(pool._delta.seconds, 3000) - self.assertTrue(pool._sessions.full()) - - api = database.spanner_api - self.assertEqual(api.batch_create_sessions.call_count, 5) - for session in SESSIONS: - session.create.assert_not_called() - txn = session._transaction - txn.begin.assert_not_called() - - self.assertTrue(pool._pending_sessions.empty()) - - def test_put_full(self): - import queue - - pool = self._make_one(size=4) - database = _Database("name") - SESSIONS = [_Session(database) for _ in range(4)] - database._sessions.extend(SESSIONS) - pool.bind(database) - - with self.assertRaises(queue.Full): - pool.put(_Session(database)) - - self.assertTrue(pool._sessions.full()) - - def test_put_non_full_w_active_txn(self): - pool = self._make_one(size=1) - session_queue = pool._sessions = _Queue() - pending = pool._pending_sessions = _Queue() - database = _Database("name") - session = _Session(database) - txn = session.transaction() - - pool.put(session) - - self.assertEqual(len(session_queue._items), 1) - _, queued = session_queue._items[0] - self.assertIs(queued, session) - - self.assertEqual(len(pending._items), 0) - txn.begin.assert_not_called() - - def test_put_non_full_w_committed_txn(self): - pool = self._make_one(size=1) - session_queue = pool._sessions = _Queue() - pending = pool._pending_sessions = _Queue() - database = _Database("name") - session = _Session(database) - committed = session.transaction() - committed.committed = True - - pool.put(session) - - self.assertEqual(len(session_queue._items), 0) - - self.assertEqual(len(pending._items), 1) - self.assertIs(pending._items[0], session) - self.assertIsNot(session._transaction, committed) - session._transaction.begin.assert_not_called() - - def test_put_non_full(self): - pool = self._make_one(size=1) - session_queue = pool._sessions = _Queue() - pending = pool._pending_sessions = _Queue() - database = _Database("name") - session = _Session(database) - - pool.put(session) - - self.assertEqual(len(session_queue._items), 0) - self.assertEqual(len(pending._items), 1) - self.assertIs(pending._items[0], session) - - self.assertFalse(pending.empty()) - - def test_begin_pending_transactions_empty(self): - pool = self._make_one(size=1) - pool.begin_pending_transactions() # no raise - - def test_begin_pending_transactions_non_empty(self): - pool = self._make_one(size=1) - pool._sessions = _Queue() - - database = _Database("name") - TRANSACTIONS = [_make_transaction(object())] - PENDING_SESSIONS = [_Session(database, transaction=txn) for txn in TRANSACTIONS] - - pending = pool._pending_sessions = _Queue(*PENDING_SESSIONS) - self.assertFalse(pending.empty()) - - pool.begin_pending_transactions() # no raise - - for txn in TRANSACTIONS: - txn.begin.assert_not_called() - - self.assertTrue(pending.empty()) + with trace_call("pool.Get", session1): + session = pool.get() + self.assertIsInstance(session, _Session) + self.assertIs(session._database, database) + # session.create.assert_called() + self.assertTrue(pool._sessions.empty()) + + self.assertSpanAttributes( + "pool.Get", + attributes=TestPingingPool.BASE_ATTRIBUTES, + ) + wantEventNames = [ + "Waiting for a session to become available", + "Acquired session", + ] + self.assertSpanEvents("pool.Get", wantEventNames) class TestSessionCheckout(unittest.TestCase): @@ -945,6 +1007,8 @@ def __init__( self._deleted = False self._transaction = transaction self._last_use_time = last_use_time + # Generate a faux id. + self._session_id = f"{time.time()}" def __lt__(self, other): return id(self) < id(other) @@ -975,6 +1039,10 @@ def transaction(self): txn = self._transaction = _make_transaction(self) return txn + @property + def session_id(self): + return self._session_id + class _Database(object): def __init__(self, name): diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index 2ae0cb94b8..966adadcbd 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -15,6 +15,7 @@ import google.api_core.gapic_v1.method from google.cloud.spanner_v1 import RequestOptions +from google.cloud.spanner_v1._opentelemetry_tracing import trace_call import mock from tests._helpers import ( OpenTelemetryBase, @@ -174,6 +175,43 @@ def test_create_w_database_role(self): "CloudSpanner.CreateSession", attributes=TestSession.BASE_ATTRIBUTES ) + def test_create_session_span_annotations(self): + from google.cloud.spanner_v1 import CreateSessionRequest + from google.cloud.spanner_v1 import Session as SessionRequestProto + + session_pb = self._make_session_pb( + self.SESSION_NAME, database_role=self.DATABASE_ROLE + ) + + gax_api = self._make_spanner_api() + gax_api.create_session.return_value = session_pb + database = self._make_database(database_role=self.DATABASE_ROLE) + database.spanner_api = gax_api + session = self._make_one(database, database_role=self.DATABASE_ROLE) + + with trace_call("TestSessionSpan", session) as span: + session.create() + + self.assertEqual(session.session_id, self.SESSION_ID) + self.assertEqual(session.database_role, self.DATABASE_ROLE) + session_template = SessionRequestProto(creator_role=self.DATABASE_ROLE) + + request = CreateSessionRequest( + database=database.name, + session=session_template, + ) + + gax_api.create_session.assert_called_once_with( + request=request, + metadata=[ + ("google-cloud-resource-prefix", database.name), + ("x-goog-spanner-route-to-leader", "true"), + ], + ) + + wantEventNames = ["Creating Session"] + self.assertSpanEvents("TestSessionSpan", wantEventNames, span) + def test_create_wo_database_role(self): from google.cloud.spanner_v1 import CreateSessionRequest diff --git a/tests/unit/test_snapshot.py b/tests/unit/test_snapshot.py index bf7363fef2..479a0d62e9 100644 --- a/tests/unit/test_snapshot.py +++ b/tests/unit/test_snapshot.py @@ -1822,6 +1822,10 @@ def __init__(self, database=None, name=TestSnapshot.SESSION_NAME): self._database = database self.name = name + @property + def session_id(self): + return self.name + class _MockIterator(object): def __init__(self, *values, **kw): diff --git a/tests/unit/test_spanner.py b/tests/unit/test_spanner.py index ab5479eb3c..ff34a109af 100644 --- a/tests/unit/test_spanner.py +++ b/tests/unit/test_spanner.py @@ -1082,6 +1082,10 @@ def __init__(self, database=None, name=TestTransaction.SESSION_NAME): self._database = database self.name = name + @property + def session_id(self): + return self.name + class _MockIterator(object): def __init__(self, *values, **kw): diff --git a/tests/unit/test_transaction.py b/tests/unit/test_transaction.py index d52fb61db1..e426f912b2 100644 --- a/tests/unit/test_transaction.py +++ b/tests/unit/test_transaction.py @@ -939,6 +939,10 @@ def __init__(self, database=None, name=TestTransaction.SESSION_NAME): self._database = database self.name = name + @property + def session_id(self): + return self.name + class _FauxSpannerAPI(object): _committed = None From 259a78baeeeb90011be1eb5e3bb01ea95c896bcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Knut=20Olav=20L=C3=B8ite?= Date: Mon, 16 Dec 2024 11:32:03 +0100 Subject: [PATCH 03/19] test: add test to verify that transactions are retried (#1267) --- .../cloud/spanner_v1/testing/mock_spanner.py | 13 +++++ .../mockserver_tests/mock_server_test_base.py | 31 ++++++++++++ .../test_aborted_transaction.py | 50 +++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 tests/mockserver_tests/test_aborted_transaction.py diff --git a/google/cloud/spanner_v1/testing/mock_spanner.py b/google/cloud/spanner_v1/testing/mock_spanner.py index d01c63aff5..1f37ff2a03 100644 --- a/google/cloud/spanner_v1/testing/mock_spanner.py +++ b/google/cloud/spanner_v1/testing/mock_spanner.py @@ -12,10 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. import base64 +import inspect import grpc from concurrent import futures from google.protobuf import empty_pb2 +from grpc_status.rpc_status import _Status from google.cloud.spanner_v1.testing.mock_database_admin import DatabaseAdminServicer import google.cloud.spanner_v1.testing.spanner_database_admin_pb2_grpc as database_admin_grpc import google.cloud.spanner_v1.testing.spanner_pb2_grpc as spanner_grpc @@ -28,6 +30,7 @@ class MockSpanner: def __init__(self): self.results = {} + self.errors = {} def add_result(self, sql: str, result: result_set.ResultSet): self.results[sql.lower().strip()] = result @@ -38,6 +41,15 @@ def get_result(self, sql: str) -> result_set.ResultSet: raise ValueError(f"No result found for {sql}") return result + def add_error(self, method: str, error: _Status): + self.errors[method] = error + + def pop_error(self, context): + name = inspect.currentframe().f_back.f_code.co_name + error: _Status | None = self.errors.pop(name, None) + if error: + context.abort_with_status(error) + def get_result_as_partial_result_sets( self, sql: str ) -> [result_set.PartialResultSet]: @@ -174,6 +186,7 @@ def __create_transaction( def Commit(self, request, context): self._requests.append(request) + self.mock_spanner.pop_error(context) tx = self.transactions[request.transaction_id] if tx is None: raise ValueError(f"Transaction not found: {request.transaction_id}") diff --git a/tests/mockserver_tests/mock_server_test_base.py b/tests/mockserver_tests/mock_server_test_base.py index 1cd7656297..12c98bc51b 100644 --- a/tests/mockserver_tests/mock_server_test_base.py +++ b/tests/mockserver_tests/mock_server_test_base.py @@ -28,6 +28,37 @@ from google.cloud.spanner_v1.database import Database from google.cloud.spanner_v1.instance import Instance import grpc +from google.rpc import code_pb2 +from google.rpc import status_pb2 +from google.rpc.error_details_pb2 import RetryInfo +from google.protobuf.duration_pb2 import Duration +from grpc_status._common import code_to_grpc_status_code +from grpc_status.rpc_status import _Status + + +# Creates an aborted status with the smallest possible retry delay. +def aborted_status() -> _Status: + error = status_pb2.Status( + code=code_pb2.ABORTED, + message="Transaction was aborted.", + ) + retry_info = RetryInfo(retry_delay=Duration(seconds=0, nanos=1)) + status = _Status( + code=code_to_grpc_status_code(error.code), + details=error.message, + trailing_metadata=( + ("grpc-status-details-bin", error.SerializeToString()), + ( + "google.rpc.retryinfo-bin", + retry_info.SerializeToString(), + ), + ), + ) + return status + + +def add_error(method: str, error: status_pb2.Status): + MockServerTestBase.spanner_service.mock_spanner.add_error(method, error) def add_result(sql: str, result: result_set.ResultSet): diff --git a/tests/mockserver_tests/test_aborted_transaction.py b/tests/mockserver_tests/test_aborted_transaction.py new file mode 100644 index 0000000000..ede2675ce6 --- /dev/null +++ b/tests/mockserver_tests/test_aborted_transaction.py @@ -0,0 +1,50 @@ +# Copyright 2024 Google LLC All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud.spanner_v1 import ( + BatchCreateSessionsRequest, + BeginTransactionRequest, + CommitRequest, +) +from google.cloud.spanner_v1.testing.mock_spanner import SpannerServicer +from google.cloud.spanner_v1.transaction import Transaction +from tests.mockserver_tests.mock_server_test_base import ( + MockServerTestBase, + add_error, + aborted_status, +) + + +class TestAbortedTransaction(MockServerTestBase): + def test_run_in_transaction_commit_aborted(self): + # Add an Aborted error for the Commit method on the mock server. + add_error(SpannerServicer.Commit.__name__, aborted_status()) + # Run a transaction. The Commit method will return Aborted the first + # time that the transaction tries to commit. It will then be retried + # and succeed. + self.database.run_in_transaction(_insert_mutations) + + # Verify that the transaction was retried. + requests = self.spanner_service.requests + self.assertEqual(5, len(requests), msg=requests) + self.assertTrue(isinstance(requests[0], BatchCreateSessionsRequest)) + self.assertTrue(isinstance(requests[1], BeginTransactionRequest)) + self.assertTrue(isinstance(requests[2], CommitRequest)) + # The transaction is aborted and retried. + self.assertTrue(isinstance(requests[3], BeginTransactionRequest)) + self.assertTrue(isinstance(requests[4], CommitRequest)) + + +def _insert_mutations(transaction: Transaction): + transaction.insert("my_table", ["col1", "col2"], ["value1", "value2"]) From ad69c48f01b09cbc5270b9cefde23715d5ac54b6 Mon Sep 17 00:00:00 2001 From: Emmanuel T Odeke Date: Mon, 16 Dec 2024 21:53:30 -0800 Subject: [PATCH 04/19] feat: add updated span events + trace more methods (#1259) * observability: add updated span events + traace more methods This change carves out parts of PR #1241 in smaller pieces to ease with smaller reviews. This change adds more span events, updates important spans to make them more distinct like changing: "CloudSpanner.ReadWriteTransaction" to more direct and more pointed spans like: * CloudSpanner.Transaction.execute_streaming_sql Also added important spans: * CloudSpanner.Database.run_in_transaction * CloudSpanner.Session.run_in_transaction * all: update review comments + show type for BeginTransaction + remove prints * Remove requested span event "Using Transaction" * Move attempts into try block * Transform Session.run_in_transaction retry exceptions into events * More comprehensive test for events and attributes for pool.get * Add test guards against Python3.7 for which OpenTelemetry is unavailable + address test feedback * Remove span event per mutation in favour of future TODO Referencing issue #1269, this update removes adding a span event per mutation, in favour of a future TODO. * Sort system-test.test_transaction_abort_then_retry_spans spans by create time * Delint tests --- .../spanner_v1/_opentelemetry_tracing.py | 12 +- google/cloud/spanner_v1/batch.py | 12 +- google/cloud/spanner_v1/database.py | 42 +++-- google/cloud/spanner_v1/pool.py | 90 ++++++---- google/cloud/spanner_v1/session.py | 135 +++++++++----- google/cloud/spanner_v1/snapshot.py | 10 +- google/cloud/spanner_v1/transaction.py | 10 +- tests/_helpers.py | 9 +- tests/system/_helpers.py | 18 ++ tests/system/test_observability_options.py | 143 ++++++++++++++- tests/system/test_session_api.py | 76 ++++---- tests/unit/test_batch.py | 11 +- tests/unit/test_pool.py | 170 ++++++++++++++++-- tests/unit/test_session.py | 15 +- tests/unit/test_snapshot.py | 18 +- tests/unit/test_transaction.py | 13 +- 16 files changed, 601 insertions(+), 183 deletions(-) diff --git a/google/cloud/spanner_v1/_opentelemetry_tracing.py b/google/cloud/spanner_v1/_opentelemetry_tracing.py index 1caac59ecd..6f3997069e 100644 --- a/google/cloud/spanner_v1/_opentelemetry_tracing.py +++ b/google/cloud/spanner_v1/_opentelemetry_tracing.py @@ -56,11 +56,11 @@ def get_tracer(tracer_provider=None): @contextmanager -def trace_call(name, session, extra_attributes=None, observability_options=None): +def trace_call(name, session=None, extra_attributes=None, observability_options=None): if session: session._last_use_time = datetime.now() - if not HAS_OPENTELEMETRY_INSTALLED or not session: + if not (HAS_OPENTELEMETRY_INSTALLED and name): # Empty context manager. Users will have to check if the generated value is None or a span yield None return @@ -72,20 +72,24 @@ def trace_call(name, session, extra_attributes=None, observability_options=None) # on by default. enable_extended_tracing = True + db_name = "" + if session and getattr(session, "_database", None): + db_name = session._database.name + if isinstance(observability_options, dict): # Avoid false positives with mock.Mock tracer_provider = observability_options.get("tracer_provider", None) enable_extended_tracing = observability_options.get( "enable_extended_tracing", enable_extended_tracing ) + db_name = observability_options.get("db_name", db_name) tracer = get_tracer(tracer_provider) # Set base attributes that we know for every trace created - db = session._database attributes = { "db.type": "spanner", "db.url": SpannerClient.DEFAULT_ENDPOINT, - "db.instance": "" if not db else db.name, + "db.instance": db_name, "net.host.name": SpannerClient.DEFAULT_ENDPOINT, OTEL_SCOPE_NAME: TRACER_NAME, OTEL_SCOPE_VERSION: TRACER_VERSION, diff --git a/google/cloud/spanner_v1/batch.py b/google/cloud/spanner_v1/batch.py index 948740d7d4..8d62ac0883 100644 --- a/google/cloud/spanner_v1/batch.py +++ b/google/cloud/spanner_v1/batch.py @@ -70,6 +70,8 @@ def insert(self, table, columns, values): :param values: Values to be modified. """ self._mutations.append(Mutation(insert=_make_write_pb(table, columns, values))) + # TODO: Decide if we should add a span event per mutation: + # https://github.com/googleapis/python-spanner/issues/1269 def update(self, table, columns, values): """Update one or more existing table rows. @@ -84,6 +86,8 @@ def update(self, table, columns, values): :param values: Values to be modified. """ self._mutations.append(Mutation(update=_make_write_pb(table, columns, values))) + # TODO: Decide if we should add a span event per mutation: + # https://github.com/googleapis/python-spanner/issues/1269 def insert_or_update(self, table, columns, values): """Insert/update one or more table rows. @@ -100,6 +104,8 @@ def insert_or_update(self, table, columns, values): self._mutations.append( Mutation(insert_or_update=_make_write_pb(table, columns, values)) ) + # TODO: Decide if we should add a span event per mutation: + # https://github.com/googleapis/python-spanner/issues/1269 def replace(self, table, columns, values): """Replace one or more table rows. @@ -114,6 +120,8 @@ def replace(self, table, columns, values): :param values: Values to be modified. """ self._mutations.append(Mutation(replace=_make_write_pb(table, columns, values))) + # TODO: Decide if we should add a span event per mutation: + # https://github.com/googleapis/python-spanner/issues/1269 def delete(self, table, keyset): """Delete one or more table rows. @@ -126,6 +134,8 @@ def delete(self, table, keyset): """ delete = Mutation.Delete(table=table, key_set=keyset._to_pb()) self._mutations.append(Mutation(delete=delete)) + # TODO: Decide if we should add a span event per mutation: + # https://github.com/googleapis/python-spanner/issues/1269 class Batch(_BatchBase): @@ -207,7 +217,7 @@ def commit( ) observability_options = getattr(database, "observability_options", None) with trace_call( - "CloudSpanner.Commit", + f"CloudSpanner.{type(self).__name__}.commit", self._session, trace_attributes, observability_options=observability_options, diff --git a/google/cloud/spanner_v1/database.py b/google/cloud/spanner_v1/database.py index c8230ab503..88d2bb60f7 100644 --- a/google/cloud/spanner_v1/database.py +++ b/google/cloud/spanner_v1/database.py @@ -70,6 +70,7 @@ from google.cloud.spanner_v1._opentelemetry_tracing import ( add_span_event, get_current_span, + trace_call, ) @@ -720,6 +721,7 @@ def execute_pdml(): iterator = _restart_on_unavailable( method=method, + trace_name="CloudSpanner.ExecuteStreamingSql", request=request, transaction_selector=txn_selector, observability_options=self.observability_options, @@ -881,20 +883,25 @@ def run_in_transaction(self, func, *args, **kw): :raises Exception: reraises any non-ABORT exceptions raised by ``func``. """ - # Sanity check: Is there a transaction already running? - # If there is, then raise a red flag. Otherwise, mark that this one - # is running. - if getattr(self._local, "transaction_running", False): - raise RuntimeError("Spanner does not support nested transactions.") - self._local.transaction_running = True - - # Check out a session and run the function in a transaction; once - # done, flip the sanity check bit back. - try: - with SessionCheckout(self._pool) as session: - return session.run_in_transaction(func, *args, **kw) - finally: - self._local.transaction_running = False + observability_options = getattr(self, "observability_options", None) + with trace_call( + "CloudSpanner.Database.run_in_transaction", + observability_options=observability_options, + ): + # Sanity check: Is there a transaction already running? + # If there is, then raise a red flag. Otherwise, mark that this one + # is running. + if getattr(self._local, "transaction_running", False): + raise RuntimeError("Spanner does not support nested transactions.") + self._local.transaction_running = True + + # Check out a session and run the function in a transaction; once + # done, flip the sanity check bit back. + try: + with SessionCheckout(self._pool) as session: + return session.run_in_transaction(func, *args, **kw) + finally: + self._local.transaction_running = False def restore(self, source): """Restore from a backup to this database. @@ -1120,7 +1127,12 @@ def observability_options(self): if not (self._instance and self._instance._client): return None - return getattr(self._instance._client, "observability_options", None) + opts = getattr(self._instance._client, "observability_options", None) + if not opts: + opts = dict() + + opts["db_name"] = self.name + return opts class BatchCheckout(object): diff --git a/google/cloud/spanner_v1/pool.py b/google/cloud/spanner_v1/pool.py index 4f90196b4a..03bff81b52 100644 --- a/google/cloud/spanner_v1/pool.py +++ b/google/cloud/spanner_v1/pool.py @@ -28,6 +28,7 @@ from google.cloud.spanner_v1._opentelemetry_tracing import ( add_span_event, get_current_span, + trace_call, ) from warnings import warn @@ -237,29 +238,41 @@ def bind(self, database): session_template=Session(creator_role=self.database_role), ) - returned_session_count = 0 - while not self._sessions.full(): - request.session_count = requested_session_count - self._sessions.qsize() + observability_options = getattr(self._database, "observability_options", None) + with trace_call( + "CloudSpanner.FixedPool.BatchCreateSessions", + observability_options=observability_options, + ) as span: + returned_session_count = 0 + while not self._sessions.full(): + request.session_count = requested_session_count - self._sessions.qsize() + add_span_event( + span, + f"Creating {request.session_count} sessions", + span_event_attributes, + ) + resp = api.batch_create_sessions( + request=request, + metadata=metadata, + ) + + add_span_event( + span, + "Created sessions", + dict(count=len(resp.session)), + ) + + for session_pb in resp.session: + session = self._new_session() + session._session_id = session_pb.name.split("/")[-1] + self._sessions.put(session) + returned_session_count += 1 + add_span_event( span, - f"Creating {request.session_count} sessions", + f"Requested for {requested_session_count} sessions, returned {returned_session_count}", span_event_attributes, ) - resp = api.batch_create_sessions( - request=request, - metadata=metadata, - ) - for session_pb in resp.session: - session = self._new_session() - session._session_id = session_pb.name.split("/")[-1] - self._sessions.put(session) - returned_session_count += 1 - - add_span_event( - span, - f"Requested for {requested_session_count} sessions, returned {returned_session_count}", - span_event_attributes, - ) def get(self, timeout=None): """Check a session out from the pool. @@ -550,25 +563,30 @@ def bind(self, database): span_event_attributes, ) - returned_session_count = 0 - while created_session_count < self.size: - resp = api.batch_create_sessions( - request=request, - metadata=metadata, - ) - for session_pb in resp.session: - session = self._new_session() - session._session_id = session_pb.name.split("/")[-1] - self.put(session) - returned_session_count += 1 + observability_options = getattr(self._database, "observability_options", None) + with trace_call( + "CloudSpanner.PingingPool.BatchCreateSessions", + observability_options=observability_options, + ) as span: + returned_session_count = 0 + while created_session_count < self.size: + resp = api.batch_create_sessions( + request=request, + metadata=metadata, + ) + for session_pb in resp.session: + session = self._new_session() + session._session_id = session_pb.name.split("/")[-1] + self.put(session) + returned_session_count += 1 - created_session_count += len(resp.session) + created_session_count += len(resp.session) - add_span_event( - current_span, - f"Requested for {requested_session_count} sessions, return {returned_session_count}", - span_event_attributes, - ) + add_span_event( + span, + f"Requested for {requested_session_count} sessions, returned {returned_session_count}", + span_event_attributes, + ) def get(self, timeout=None): """Check a session out from the pool. diff --git a/google/cloud/spanner_v1/session.py b/google/cloud/spanner_v1/session.py index 166d5488c6..d73a8cc2b5 100644 --- a/google/cloud/spanner_v1/session.py +++ b/google/cloud/spanner_v1/session.py @@ -243,6 +243,10 @@ def delete(self): with trace_call( "CloudSpanner.DeleteSession", self, + extra_attributes={ + "session.id": self._session_id, + "session.name": self.name, + }, observability_options=observability_options, ): api.delete_session(name=self.name, metadata=metadata) @@ -458,47 +462,98 @@ def run_in_transaction(self, func, *args, **kw): ) attempts = 0 - while True: - if self._transaction is None: - txn = self.transaction() - txn.transaction_tag = transaction_tag - txn.exclude_txn_from_change_streams = exclude_txn_from_change_streams - else: - txn = self._transaction - - try: - attempts += 1 - return_value = func(txn, *args, **kw) - except Aborted as exc: - del self._transaction - _delay_until_retry(exc, deadline, attempts) - continue - except GoogleAPICallError: - del self._transaction - raise - except Exception: - txn.rollback() - raise - - try: - txn.commit( - return_commit_stats=self._database.log_commit_stats, - request_options=commit_request_options, - max_commit_delay=max_commit_delay, - ) - except Aborted as exc: - del self._transaction - _delay_until_retry(exc, deadline, attempts) - except GoogleAPICallError: - del self._transaction - raise - else: - if self._database.log_commit_stats and txn.commit_stats: - self._database.logger.info( - "CommitStats: {}".format(txn.commit_stats), - extra={"commit_stats": txn.commit_stats}, + observability_options = getattr(self._database, "observability_options", None) + with trace_call( + "CloudSpanner.Session.run_in_transaction", + self, + observability_options=observability_options, + ) as span: + while True: + if self._transaction is None: + txn = self.transaction() + txn.transaction_tag = transaction_tag + txn.exclude_txn_from_change_streams = ( + exclude_txn_from_change_streams + ) + else: + txn = self._transaction + + span_attributes = dict() + + try: + attempts += 1 + span_attributes["attempt"] = attempts + txn_id = getattr(txn, "_transaction_id", "") or "" + if txn_id: + span_attributes["transaction.id"] = txn_id + + return_value = func(txn, *args, **kw) + + except Aborted as exc: + del self._transaction + if span: + delay_seconds = _get_retry_delay(exc.errors[0], attempts) + attributes = dict(delay_seconds=delay_seconds, cause=str(exc)) + attributes.update(span_attributes) + add_span_event( + span, + "Transaction was aborted in user operation, retrying", + attributes, + ) + + _delay_until_retry(exc, deadline, attempts) + continue + except GoogleAPICallError: + del self._transaction + add_span_event( + span, + "User operation failed due to GoogleAPICallError, not retrying", + span_attributes, + ) + raise + except Exception: + add_span_event( + span, + "User operation failed. Invoking Transaction.rollback(), not retrying", + span_attributes, + ) + txn.rollback() + raise + + try: + txn.commit( + return_commit_stats=self._database.log_commit_stats, + request_options=commit_request_options, + max_commit_delay=max_commit_delay, + ) + except Aborted as exc: + del self._transaction + if span: + delay_seconds = _get_retry_delay(exc.errors[0], attempts) + attributes = dict(delay_seconds=delay_seconds) + attributes.update(span_attributes) + add_span_event( + span, + "Transaction got aborted during commit, retrying afresh", + attributes, + ) + + _delay_until_retry(exc, deadline, attempts) + except GoogleAPICallError: + del self._transaction + add_span_event( + span, + "Transaction.commit failed due to GoogleAPICallError, not retrying", + span_attributes, ) - return return_value + raise + else: + if self._database.log_commit_stats and txn.commit_stats: + self._database.logger.info( + "CommitStats: {}".format(txn.commit_stats), + extra={"commit_stats": txn.commit_stats}, + ) + return return_value # Rational: this function factors out complex shared deadline / retry diff --git a/google/cloud/spanner_v1/snapshot.py b/google/cloud/spanner_v1/snapshot.py index 89b5094706..6234c96435 100644 --- a/google/cloud/spanner_v1/snapshot.py +++ b/google/cloud/spanner_v1/snapshot.py @@ -335,7 +335,7 @@ def read( iterator = _restart_on_unavailable( restart, request, - "CloudSpanner.ReadOnlyTransaction", + f"CloudSpanner.{type(self).__name__}.read", self._session, trace_attributes, transaction=self, @@ -357,7 +357,7 @@ def read( iterator = _restart_on_unavailable( restart, request, - "CloudSpanner.ReadOnlyTransaction", + f"CloudSpanner.{type(self).__name__}.read", self._session, trace_attributes, transaction=self, @@ -578,7 +578,7 @@ def _get_streamed_result_set( iterator = _restart_on_unavailable( restart, request, - "CloudSpanner.ReadWriteTransaction", + f"CloudSpanner.{type(self).__name__}.execute_streaming_sql", self._session, trace_attributes, transaction=self, @@ -676,7 +676,7 @@ def partition_read( trace_attributes = {"table_id": table, "columns": columns} with trace_call( - "CloudSpanner.PartitionReadOnlyTransaction", + f"CloudSpanner.{type(self).__name__}.partition_read", self._session, trace_attributes, observability_options=getattr(database, "observability_options", None), @@ -926,7 +926,7 @@ def begin(self): ) txn_selector = self._make_txn_selector() with trace_call( - "CloudSpanner.BeginTransaction", + f"CloudSpanner.{type(self).__name__}.begin", self._session, observability_options=getattr(database, "observability_options", None), ): diff --git a/google/cloud/spanner_v1/transaction.py b/google/cloud/spanner_v1/transaction.py index fa8e5121ff..a8aef7f470 100644 --- a/google/cloud/spanner_v1/transaction.py +++ b/google/cloud/spanner_v1/transaction.py @@ -157,7 +157,7 @@ def begin(self): ) observability_options = getattr(database, "observability_options", None) with trace_call( - "CloudSpanner.BeginTransaction", + f"CloudSpanner.{type(self).__name__}.begin", self._session, observability_options=observability_options, ) as span: @@ -199,7 +199,7 @@ def rollback(self): ) observability_options = getattr(database, "observability_options", None) with trace_call( - "CloudSpanner.Rollback", + f"CloudSpanner.{type(self).__name__}.rollback", self._session, observability_options=observability_options, ): @@ -278,7 +278,7 @@ def commit( trace_attributes = {"num_mutations": len(self._mutations)} observability_options = getattr(database, "observability_options", None) with trace_call( - "CloudSpanner.Commit", + f"CloudSpanner.{type(self).__name__}.commit", self._session, trace_attributes, observability_options, @@ -447,7 +447,7 @@ def execute_update( response = self._execute_request( method, request, - "CloudSpanner.ReadWriteTransaction", + f"CloudSpanner.{type(self).__name__}.execute_update", self._session, trace_attributes, observability_options=observability_options, @@ -464,7 +464,7 @@ def execute_update( response = self._execute_request( method, request, - "CloudSpanner.ReadWriteTransaction", + f"CloudSpanner.{type(self).__name__}.execute_update", self._session, trace_attributes, observability_options=observability_options, diff --git a/tests/_helpers.py b/tests/_helpers.py index 81787c5a86..c7b1665e89 100644 --- a/tests/_helpers.py +++ b/tests/_helpers.py @@ -78,7 +78,7 @@ def tearDown(self): def assertNoSpans(self): if HAS_OPENTELEMETRY_INSTALLED: - span_list = self.ot_exporter.get_finished_spans() + span_list = self.get_finished_spans() self.assertEqual(len(span_list), 0) def assertSpanAttributes( @@ -119,11 +119,16 @@ def assertSpanNames(self, want_span_names): def get_finished_spans(self): if HAS_OPENTELEMETRY_INSTALLED: - return list( + span_list = list( filter( lambda span: span and span.name, self.ot_exporter.get_finished_spans(), ) ) + # Sort the spans by their start time in the hierarchy. + return sorted(span_list, key=lambda span: span.start_time) else: return [] + + def reset(self): + self.tearDown() diff --git a/tests/system/_helpers.py b/tests/system/_helpers.py index b62d453512..f157a8ee59 100644 --- a/tests/system/_helpers.py +++ b/tests/system/_helpers.py @@ -137,3 +137,21 @@ def cleanup_old_instances(spanner_client): def unique_id(prefix, separator="-"): return f"{prefix}{system.unique_resource_id(separator)}" + + +class FauxCall: + def __init__(self, code, details="FauxCall"): + self._code = code + self._details = details + + def initial_metadata(self): + return {} + + def trailing_metadata(self): + return {} + + def code(self): + return self._code + + def details(self): + return self._details diff --git a/tests/system/test_observability_options.py b/tests/system/test_observability_options.py index 8382255c15..42ce0de7fe 100644 --- a/tests/system/test_observability_options.py +++ b/tests/system/test_observability_options.py @@ -105,7 +105,10 @@ def test_propagation(enable_extended_tracing): len(from_inject_spans) >= 2 ) # "Expecting at least 2 spans from the injected trace exporter" gotNames = [span.name for span in from_inject_spans] - wantNames = ["CloudSpanner.CreateSession", "CloudSpanner.ReadWriteTransaction"] + wantNames = [ + "CloudSpanner.CreateSession", + "CloudSpanner.Snapshot.execute_streaming_sql", + ] assert gotNames == wantNames # Check for conformance of enable_extended_tracing @@ -128,6 +131,144 @@ def test_propagation(enable_extended_tracing): test_propagation(False) +@pytest.mark.skipif( + not _helpers.USE_EMULATOR, + reason="Emulator needed to run this tests", +) +@pytest.mark.skipif( + not HAS_OTEL_INSTALLED, + reason="Tracing requires OpenTelemetry", +) +def test_transaction_abort_then_retry_spans(): + from google.auth.credentials import AnonymousCredentials + from google.api_core.exceptions import Aborted + from google.rpc import code_pb2 + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, + ) + from opentelemetry.trace.status import StatusCode + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.sampling import ALWAYS_ON + + PROJECT = _helpers.EMULATOR_PROJECT + CONFIGURATION_NAME = "config-name" + INSTANCE_ID = _helpers.INSTANCE_ID + DISPLAY_NAME = "display-name" + DATABASE_ID = _helpers.unique_id("temp_db") + NODE_COUNT = 5 + LABELS = {"test": "true"} + + counters = dict(aborted=0) + + def select_in_txn(txn): + results = txn.execute_sql("SELECT 1") + for row in results: + _ = row + + if counters["aborted"] == 0: + counters["aborted"] = 1 + raise Aborted( + "Thrown from ClientInterceptor for testing", + errors=[_helpers.FauxCall(code_pb2.ABORTED)], + ) + + tracer_provider = TracerProvider(sampler=ALWAYS_ON) + trace_exporter = InMemorySpanExporter() + tracer_provider.add_span_processor(SimpleSpanProcessor(trace_exporter)) + observability_options = dict( + tracer_provider=tracer_provider, + enable_extended_tracing=True, + ) + + client = Client( + project=PROJECT, + observability_options=observability_options, + credentials=AnonymousCredentials(), + ) + + instance = client.instance( + INSTANCE_ID, + CONFIGURATION_NAME, + display_name=DISPLAY_NAME, + node_count=NODE_COUNT, + labels=LABELS, + ) + + try: + instance.create() + except Exception: + pass + + db = instance.database(DATABASE_ID) + try: + db.create() + except Exception: + pass + + db.run_in_transaction(select_in_txn) + + span_list = trace_exporter.get_finished_spans() + # Sort the spans by their start time in the hierarchy. + span_list = sorted(span_list, key=lambda span: span.start_time) + got_span_names = [span.name for span in span_list] + want_span_names = [ + "CloudSpanner.Database.run_in_transaction", + "CloudSpanner.CreateSession", + "CloudSpanner.Session.run_in_transaction", + "CloudSpanner.Transaction.execute_streaming_sql", + "CloudSpanner.Transaction.execute_streaming_sql", + "CloudSpanner.Transaction.commit", + ] + + assert got_span_names == want_span_names + + got_events = [] + got_statuses = [] + + # Some event attributes are noisy/highly ephemeral + # and can't be directly compared against. + imprecise_event_attributes = ["exception.stacktrace", "delay_seconds", "cause"] + for span in span_list: + got_statuses.append( + (span.name, span.status.status_code, span.status.description) + ) + for event in span.events: + evt_attributes = event.attributes.copy() + for attr_name in imprecise_event_attributes: + if attr_name in evt_attributes: + evt_attributes[attr_name] = "EPHEMERAL" + + got_events.append((event.name, evt_attributes)) + + # Check for the series of events + want_events = [ + ("Acquiring session", {"kind": "BurstyPool"}), + ("Waiting for a session to become available", {"kind": "BurstyPool"}), + ("No sessions available in pool. Creating session", {"kind": "BurstyPool"}), + ("Creating Session", {}), + ( + "Transaction was aborted in user operation, retrying", + {"delay_seconds": "EPHEMERAL", "cause": "EPHEMERAL", "attempt": 1}, + ), + ("Starting Commit", {}), + ("Commit Done", {}), + ] + assert got_events == want_events + + # Check for the statues. + codes = StatusCode + want_statuses = [ + ("CloudSpanner.Database.run_in_transaction", codes.OK, None), + ("CloudSpanner.CreateSession", codes.OK, None), + ("CloudSpanner.Session.run_in_transaction", codes.OK, None), + ("CloudSpanner.Transaction.execute_streaming_sql", codes.OK, None), + ("CloudSpanner.Transaction.execute_streaming_sql", codes.OK, None), + ("CloudSpanner.Transaction.commit", codes.OK, None), + ] + assert got_statuses == want_statuses + + def _make_credentials(): from google.auth.credentials import AnonymousCredentials diff --git a/tests/system/test_session_api.py b/tests/system/test_session_api.py index b7337cb258..4e80657584 100644 --- a/tests/system/test_session_api.py +++ b/tests/system/test_session_api.py @@ -447,7 +447,7 @@ def test_batch_insert_then_read(sessions_database, ot_exporter): ) assert_span_attributes( ot_exporter, - "CloudSpanner.Commit", + "CloudSpanner.Batch.commit", attributes=_make_attributes(db_name, num_mutations=2), span=span_list[1], ) @@ -459,7 +459,7 @@ def test_batch_insert_then_read(sessions_database, ot_exporter): ) assert_span_attributes( ot_exporter, - "CloudSpanner.ReadOnlyTransaction", + "CloudSpanner.Snapshot.read", attributes=_make_attributes(db_name, columns=sd.COLUMNS, table_id=sd.TABLE), span=span_list[3], ) @@ -608,7 +608,18 @@ def test_transaction_read_and_insert_then_rollback( if ot_exporter is not None: span_list = ot_exporter.get_finished_spans() - assert len(span_list) == 8 + got_span_names = [span.name for span in span_list] + want_span_names = [ + "CloudSpanner.CreateSession", + "CloudSpanner.GetSession", + "CloudSpanner.Batch.commit", + "CloudSpanner.Transaction.begin", + "CloudSpanner.Transaction.read", + "CloudSpanner.Transaction.read", + "CloudSpanner.Transaction.rollback", + "CloudSpanner.Snapshot.read", + ] + assert got_span_names == want_span_names assert_span_attributes( ot_exporter, @@ -624,19 +635,19 @@ def test_transaction_read_and_insert_then_rollback( ) assert_span_attributes( ot_exporter, - "CloudSpanner.Commit", + "CloudSpanner.Batch.commit", attributes=_make_attributes(db_name, num_mutations=1), span=span_list[2], ) assert_span_attributes( ot_exporter, - "CloudSpanner.BeginTransaction", + "CloudSpanner.Transaction.begin", attributes=_make_attributes(db_name), span=span_list[3], ) assert_span_attributes( ot_exporter, - "CloudSpanner.ReadOnlyTransaction", + "CloudSpanner.Transaction.read", attributes=_make_attributes( db_name, table_id=sd.TABLE, @@ -646,7 +657,7 @@ def test_transaction_read_and_insert_then_rollback( ) assert_span_attributes( ot_exporter, - "CloudSpanner.ReadOnlyTransaction", + "CloudSpanner.Transaction.read", attributes=_make_attributes( db_name, table_id=sd.TABLE, @@ -656,13 +667,13 @@ def test_transaction_read_and_insert_then_rollback( ) assert_span_attributes( ot_exporter, - "CloudSpanner.Rollback", + "CloudSpanner.Transaction.rollback", attributes=_make_attributes(db_name), span=span_list[6], ) assert_span_attributes( ot_exporter, - "CloudSpanner.ReadOnlyTransaction", + "CloudSpanner.Snapshot.read", attributes=_make_attributes( db_name, table_id=sd.TABLE, @@ -1183,18 +1194,29 @@ def unit_of_work(transaction): session.run_in_transaction(unit_of_work) span_list = ot_exporter.get_finished_spans() - assert len(span_list) == 5 - expected_span_names = [ + got_span_names = [span.name for span in span_list] + want_span_names = [ "CloudSpanner.CreateSession", - "CloudSpanner.Commit", + "CloudSpanner.Batch.commit", "CloudSpanner.DMLTransaction", - "CloudSpanner.Commit", + "CloudSpanner.Transaction.commit", + "CloudSpanner.Session.run_in_transaction", "Test Span", ] - assert [span.name for span in span_list] == expected_span_names - for span in span_list[2:-1]: - assert span.context.trace_id == span_list[-1].context.trace_id - assert span.parent.span_id == span_list[-1].context.span_id + assert got_span_names == want_span_names + + def assert_parent_hierarchy(parent, children): + for child in children: + assert child.context.trace_id == parent.context.trace_id + assert child.parent.span_id == parent.context.span_id + + test_span = span_list[-1] + test_span_children = [span_list[-2]] + assert_parent_hierarchy(test_span, test_span_children) + + session_run_in_txn = span_list[-2] + session_run_in_txn_children = span_list[2:-2] + assert_parent_hierarchy(session_run_in_txn, session_run_in_txn_children) def test_execute_partitioned_dml( @@ -2844,31 +2866,13 @@ def test_mutation_groups_insert_or_update_then_query(not_emulator, sessions_data sd._check_rows_data(rows, sd.BATCH_WRITE_ROW_DATA) -class FauxCall: - def __init__(self, code, details="FauxCall"): - self._code = code - self._details = details - - def initial_metadata(self): - return {} - - def trailing_metadata(self): - return {} - - def code(self): - return self._code - - def details(self): - return self._details - - def _check_batch_status(status_code, expected=code_pb2.OK): if status_code != expected: _status_code_to_grpc_status_code = { member.value[0]: member for member in grpc.StatusCode } grpc_status_code = _status_code_to_grpc_status_code[status_code] - call = FauxCall(status_code) + call = _helpers.FauxCall(status_code) raise exceptions.from_grpc_status( grpc_status_code, "batch_update failed", errors=[call] ) diff --git a/tests/unit/test_batch.py b/tests/unit/test_batch.py index a7f7a6f970..a43678f3b9 100644 --- a/tests/unit/test_batch.py +++ b/tests/unit/test_batch.py @@ -212,7 +212,7 @@ def test_commit_grpc_error(self): batch.commit() self.assertSpanAttributes( - "CloudSpanner.Commit", + "CloudSpanner.Batch.commit", status=StatusCode.ERROR, attributes=dict(BASE_ATTRIBUTES, num_mutations=1), ) @@ -261,7 +261,8 @@ def test_commit_ok(self): self.assertEqual(max_commit_delay, None) self.assertSpanAttributes( - "CloudSpanner.Commit", attributes=dict(BASE_ATTRIBUTES, num_mutations=1) + "CloudSpanner.Batch.commit", + attributes=dict(BASE_ATTRIBUTES, num_mutations=1), ) def _test_commit_with_options( @@ -327,7 +328,8 @@ def _test_commit_with_options( self.assertEqual(actual_request_options, expected_request_options) self.assertSpanAttributes( - "CloudSpanner.Commit", attributes=dict(BASE_ATTRIBUTES, num_mutations=1) + "CloudSpanner.Batch.commit", + attributes=dict(BASE_ATTRIBUTES, num_mutations=1), ) self.assertEqual(max_commit_delay_in, max_commit_delay) @@ -438,7 +440,8 @@ def test_context_mgr_success(self): self.assertEqual(request_options, RequestOptions()) self.assertSpanAttributes( - "CloudSpanner.Commit", attributes=dict(BASE_ATTRIBUTES, num_mutations=1) + "CloudSpanner.Batch.commit", + attributes=dict(BASE_ATTRIBUTES, num_mutations=1), ) def test_context_mgr_failure(self): diff --git a/tests/unit/test_pool.py b/tests/unit/test_pool.py index fbb35201eb..89715c741d 100644 --- a/tests/unit/test_pool.py +++ b/tests/unit/test_pool.py @@ -24,6 +24,7 @@ OpenTelemetryBase, StatusCode, enrich_with_otel_scope, + HAS_OPENTELEMETRY_INSTALLED, ) @@ -232,6 +233,9 @@ def test_get_non_expired(self): self.assertFalse(pool._sessions.full()) def test_spans_bind_get(self): + if not HAS_OPENTELEMETRY_INSTALLED: + return + # This tests retrieving 1 out of 4 sessions from the session pool. pool = self._make_one(size=4) database = _Database("name") @@ -239,29 +243,41 @@ def test_spans_bind_get(self): database._sessions.extend(SESSIONS) pool.bind(database) - with trace_call("pool.Get", SESSIONS[0]) as span: + with trace_call("pool.Get", SESSIONS[0]): pool.get() - wantEventNames = [ - "Acquiring session", - "Waiting for a session to become available", - "Acquired session", - ] - self.assertSpanEvents("pool.Get", wantEventNames, span) - # Check for the overall spans too. + span_list = self.get_finished_spans() + got_span_names = [span.name for span in span_list] + want_span_names = ["CloudSpanner.FixedPool.BatchCreateSessions", "pool.Get"] + assert got_span_names == want_span_names + + attrs = TestFixedSizePool.BASE_ATTRIBUTES.copy() + + # Check for the overall spans. + self.assertSpanAttributes( + "CloudSpanner.FixedPool.BatchCreateSessions", + status=StatusCode.OK, + attributes=attrs, + span=span_list[0], + ) + self.assertSpanAttributes( "pool.Get", + status=StatusCode.OK, attributes=TestFixedSizePool.BASE_ATTRIBUTES, + span=span_list[-1], ) - wantEventNames = [ "Acquiring session", "Waiting for a session to become available", "Acquired session", ] - self.assertSpanEvents("pool.Get", wantEventNames) + self.assertSpanEvents("pool.Get", wantEventNames, span_list[-1]) def test_spans_bind_get_empty_pool(self): + if not HAS_OPENTELEMETRY_INSTALLED: + return + # Tests trying to invoke pool.get() from an empty pool. pool = self._make_one(size=0) database = _Database("name") @@ -289,7 +305,23 @@ def test_spans_bind_get_empty_pool(self): attributes=TestFixedSizePool.BASE_ATTRIBUTES, ) + span_list = self.get_finished_spans() + got_all_events = [] + for span in span_list: + for event in span.events: + got_all_events.append((event.name, event.attributes)) + want_all_events = [ + ("Invalid session pool size(0) <= 0", {"kind": "FixedSizePool"}), + ("Acquiring session", {"kind": "FixedSizePool"}), + ("Waiting for a session to become available", {"kind": "FixedSizePool"}), + ("No sessions available in the pool", {"kind": "FixedSizePool"}), + ] + assert got_all_events == want_all_events + def test_spans_pool_bind(self): + if not HAS_OPENTELEMETRY_INSTALLED: + return + # Tests the exception generated from invoking pool.bind when # you have an empty pool. pool = self._make_one(size=1) @@ -304,20 +336,63 @@ def test_spans_pool_bind(self): except Exception: pass + span_list = self.get_finished_spans() + got_span_names = [span.name for span in span_list] + want_span_names = ["testBind", "CloudSpanner.FixedPool.BatchCreateSessions"] + assert got_span_names == want_span_names + wantEventNames = [ "Requesting 1 sessions", - "Creating 1 sessions", "exception", ] - self.assertSpanEvents("testBind", wantEventNames) + self.assertSpanEvents("testBind", wantEventNames, span_list[0]) - # Check for the overall spans. self.assertSpanAttributes( "testBind", status=StatusCode.ERROR, attributes=TestFixedSizePool.BASE_ATTRIBUTES, + span=span_list[0], ) + got_all_events = [] + + # Some event attributes are noisy/highly ephemeral + # and can't be directly compared against. + imprecise_event_attributes = ["exception.stacktrace", "delay_seconds", "cause"] + for span in span_list: + for event in span.events: + evt_attributes = event.attributes.copy() + for attr_name in imprecise_event_attributes: + if attr_name in evt_attributes: + evt_attributes[attr_name] = "EPHEMERAL" + + got_all_events.append((event.name, evt_attributes)) + + want_all_events = [ + ("Requesting 1 sessions", {"kind": "FixedSizePool"}), + ( + "exception", + { + "exception.type": "IndexError", + "exception.message": "pop from empty list", + "exception.stacktrace": "EPHEMERAL", + "exception.escaped": "False", + }, + ), + ("Creating 1 sessions", {"kind": "FixedSizePool"}), + ("Created sessions", {"count": 1}), + ( + "exception", + { + "exception.type": "IndexError", + "exception.message": "pop from empty list", + "exception.stacktrace": "EPHEMERAL", + "exception.escaped": "False", + }, + ), + ] + assert got_all_events == want_all_events + def test_get_expired(self): pool = self._make_one(size=4) database = _Database("name") @@ -364,6 +439,7 @@ def test_put_full(self): SESSIONS = [_Session(database)] * 4 database._sessions.extend(SESSIONS) pool.bind(database) + self.reset() with self.assertRaises(queue.Full): pool.put(_Session(database)) @@ -458,6 +534,9 @@ def test_get_empty(self): self.assertTrue(pool._sessions.empty()) def test_spans_get_empty_pool(self): + if not HAS_OPENTELEMETRY_INSTALLED: + return + # This scenario tests a pool that hasn't been filled up # and pool.get() acquires from a pool, waiting for a session # to become available. @@ -474,16 +553,23 @@ def test_spans_get_empty_pool(self): session.create.assert_called() self.assertTrue(pool._sessions.empty()) + span_list = self.get_finished_spans() + got_span_names = [span.name for span in span_list] + want_span_names = ["pool.Get"] + assert got_span_names == want_span_names + + create_span = span_list[-1] self.assertSpanAttributes( "pool.Get", attributes=TestBurstyPool.BASE_ATTRIBUTES, + span=create_span, ) wantEventNames = [ "Acquiring session", "Waiting for a session to become available", "No sessions available in pool. Creating session", ] - self.assertSpanEvents("pool.Get", wantEventNames) + self.assertSpanEvents("pool.Get", wantEventNames, span=create_span) def test_get_non_empty_session_exists(self): pool = self._make_one() @@ -708,6 +794,7 @@ def test_get_hit_no_ping(self): SESSIONS = [_Session(database)] * 4 database._sessions.extend(SESSIONS) pool.bind(database) + self.reset() session = pool.get() @@ -731,6 +818,8 @@ def test_get_hit_w_ping(self): with _Monkey(MUT, _NOW=lambda: sessions_created): pool.bind(database) + self.reset() + session = pool.get() self.assertIs(session, SESSIONS[0]) @@ -753,6 +842,7 @@ def test_get_hit_w_ping_expired(self): with _Monkey(MUT, _NOW=lambda: sessions_created): pool.bind(database) + self.reset() session = pool.get() @@ -799,7 +889,39 @@ def test_put_full(self): pool.put(_Session(database)) self.assertTrue(pool._sessions.full()) - self.assertNoSpans() + + def test_spans_put_full(self): + if not HAS_OPENTELEMETRY_INSTALLED: + return + + import queue + + pool = self._make_one(size=4) + database = _Database("name") + SESSIONS = [_Session(database)] * 4 + database._sessions.extend(SESSIONS) + pool.bind(database) + + with self.assertRaises(queue.Full): + pool.put(_Session(database)) + + self.assertTrue(pool._sessions.full()) + + span_list = self.get_finished_spans() + got_span_names = [span.name for span in span_list] + want_span_names = ["CloudSpanner.PingingPool.BatchCreateSessions"] + assert got_span_names == want_span_names + + attrs = TestPingingPool.BASE_ATTRIBUTES.copy() + self.assertSpanAttributes( + "CloudSpanner.PingingPool.BatchCreateSessions", + attributes=attrs, + span=span_list[-1], + ) + wantEventNames = ["Requested for 4 sessions, returned 4"] + self.assertSpanEvents( + "CloudSpanner.PingingPool.BatchCreateSessions", wantEventNames + ) def test_put_non_full(self): import datetime @@ -828,6 +950,7 @@ def test_clear(self): SESSIONS = [_Session(database)] * 10 database._sessions.extend(SESSIONS) pool.bind(database) + self.reset() self.assertTrue(pool._sessions.full()) api = database.spanner_api @@ -852,6 +975,7 @@ def test_ping_oldest_fresh(self): SESSIONS = [_Session(database)] * 1 database._sessions.extend(SESSIONS) pool.bind(database) + self.reset() pool.ping() @@ -886,6 +1010,7 @@ def test_ping_oldest_stale_and_not_exists(self): SESSIONS[0]._exists = False database._sessions.extend(SESSIONS) pool.bind(database) + self.reset() later = datetime.datetime.utcnow() + datetime.timedelta(seconds=4000) with _Monkey(MUT, _NOW=lambda: later): @@ -896,6 +1021,9 @@ def test_ping_oldest_stale_and_not_exists(self): self.assertNoSpans() def test_spans_get_and_leave_empty_pool(self): + if not HAS_OPENTELEMETRY_INSTALLED: + return + # This scenario tests the spans generated from pulling a span # out the pool and leaving it empty. pool = self._make_one() @@ -914,15 +1042,21 @@ def test_spans_get_and_leave_empty_pool(self): # session.create.assert_called() self.assertTrue(pool._sessions.empty()) + span_list = self.get_finished_spans() + got_span_names = [span.name for span in span_list] + want_span_names = ["CloudSpanner.PingingPool.BatchCreateSessions", "pool.Get"] + assert got_span_names == want_span_names + self.assertSpanAttributes( "pool.Get", attributes=TestPingingPool.BASE_ATTRIBUTES, + span=span_list[-1], ) wantEventNames = [ "Waiting for a session to become available", "Acquired session", ] - self.assertSpanEvents("pool.Get", wantEventNames) + self.assertSpanEvents("pool.Get", wantEventNames, span_list[-1]) class TestSessionCheckout(unittest.TestCase): @@ -1095,6 +1229,10 @@ def session(self, **kwargs): # sessions into pool (important for order tests) return self._sessions.pop(0) + @property + def observability_options(self): + return dict(db_name=self.name) + class _Queue(object): _size = 1 diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index 966adadcbd..0d60e98cd0 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -558,8 +558,11 @@ def test_delete_hit(self): metadata=[("google-cloud-resource-prefix", database.name)], ) + attrs = {"session.id": session._session_id, "session.name": session.name} + attrs.update(TestSession.BASE_ATTRIBUTES) self.assertSpanAttributes( - "CloudSpanner.DeleteSession", attributes=TestSession.BASE_ATTRIBUTES + "CloudSpanner.DeleteSession", + attributes=attrs, ) def test_delete_miss(self): @@ -580,10 +583,13 @@ def test_delete_miss(self): metadata=[("google-cloud-resource-prefix", database.name)], ) + attrs = {"session.id": session._session_id, "session.name": session.name} + attrs.update(TestSession.BASE_ATTRIBUTES) + self.assertSpanAttributes( "CloudSpanner.DeleteSession", status=StatusCode.ERROR, - attributes=TestSession.BASE_ATTRIBUTES, + attributes=attrs, ) def test_delete_error(self): @@ -604,10 +610,13 @@ def test_delete_error(self): metadata=[("google-cloud-resource-prefix", database.name)], ) + attrs = {"session.id": session._session_id, "session.name": session.name} + attrs.update(TestSession.BASE_ATTRIBUTES) + self.assertSpanAttributes( "CloudSpanner.DeleteSession", status=StatusCode.ERROR, - attributes=TestSession.BASE_ATTRIBUTES, + attributes=attrs, ) def test_snapshot_not_created(self): diff --git a/tests/unit/test_snapshot.py b/tests/unit/test_snapshot.py index 479a0d62e9..a4446a0d1e 100644 --- a/tests/unit/test_snapshot.py +++ b/tests/unit/test_snapshot.py @@ -616,7 +616,7 @@ def test_read_other_error(self): list(derived.read(TABLE_NAME, COLUMNS, keyset)) self.assertSpanAttributes( - "CloudSpanner.ReadOnlyTransaction", + "CloudSpanner._Derived.read", status=StatusCode.ERROR, attributes=dict( BASE_ATTRIBUTES, table_id=TABLE_NAME, columns=tuple(COLUMNS) @@ -773,7 +773,7 @@ def _read_helper( ) self.assertSpanAttributes( - "CloudSpanner.ReadOnlyTransaction", + "CloudSpanner._Derived.read", attributes=dict( BASE_ATTRIBUTES, table_id=TABLE_NAME, columns=tuple(COLUMNS) ), @@ -868,7 +868,7 @@ def test_execute_sql_other_error(self): self.assertEqual(derived._execute_sql_count, 1) self.assertSpanAttributes( - "CloudSpanner.ReadWriteTransaction", + "CloudSpanner._Derived.execute_streaming_sql", status=StatusCode.ERROR, attributes=dict(BASE_ATTRIBUTES, **{"db.statement": SQL_QUERY}), ) @@ -1024,7 +1024,7 @@ def _execute_sql_helper( self.assertEqual(derived._execute_sql_count, sql_count + 1) self.assertSpanAttributes( - "CloudSpanner.ReadWriteTransaction", + "CloudSpanner._Derived.execute_streaming_sql", status=StatusCode.OK, attributes=dict(BASE_ATTRIBUTES, **{"db.statement": SQL_QUERY_WITH_PARAM}), ) @@ -1195,7 +1195,7 @@ def _partition_read_helper( ) self.assertSpanAttributes( - "CloudSpanner.PartitionReadOnlyTransaction", + "CloudSpanner._Derived.partition_read", status=StatusCode.OK, attributes=dict( BASE_ATTRIBUTES, table_id=TABLE_NAME, columns=tuple(COLUMNS) @@ -1226,7 +1226,7 @@ def test_partition_read_other_error(self): list(derived.partition_read(TABLE_NAME, COLUMNS, keyset)) self.assertSpanAttributes( - "CloudSpanner.PartitionReadOnlyTransaction", + "CloudSpanner._Derived.partition_read", status=StatusCode.ERROR, attributes=dict( BASE_ATTRIBUTES, table_id=TABLE_NAME, columns=tuple(COLUMNS) @@ -1697,7 +1697,7 @@ def test_begin_w_other_error(self): snapshot.begin() self.assertSpanAttributes( - "CloudSpanner.BeginTransaction", + "CloudSpanner.Snapshot.begin", status=StatusCode.ERROR, attributes=BASE_ATTRIBUTES, ) @@ -1755,7 +1755,7 @@ def test_begin_ok_exact_staleness(self): ) self.assertSpanAttributes( - "CloudSpanner.BeginTransaction", + "CloudSpanner.Snapshot.begin", status=StatusCode.OK, attributes=BASE_ATTRIBUTES, ) @@ -1791,7 +1791,7 @@ def test_begin_ok_exact_strong(self): ) self.assertSpanAttributes( - "CloudSpanner.BeginTransaction", + "CloudSpanner.Snapshot.begin", status=StatusCode.OK, attributes=BASE_ATTRIBUTES, ) diff --git a/tests/unit/test_transaction.py b/tests/unit/test_transaction.py index e426f912b2..d3d7035854 100644 --- a/tests/unit/test_transaction.py +++ b/tests/unit/test_transaction.py @@ -162,7 +162,7 @@ def test_begin_w_other_error(self): transaction.begin() self.assertSpanAttributes( - "CloudSpanner.BeginTransaction", + "CloudSpanner.Transaction.begin", status=StatusCode.ERROR, attributes=TestTransaction.BASE_ATTRIBUTES, ) @@ -195,7 +195,7 @@ def test_begin_ok(self): ) self.assertSpanAttributes( - "CloudSpanner.BeginTransaction", attributes=TestTransaction.BASE_ATTRIBUTES + "CloudSpanner.Transaction.begin", attributes=TestTransaction.BASE_ATTRIBUTES ) def test_begin_w_retry(self): @@ -266,7 +266,7 @@ def test_rollback_w_other_error(self): self.assertFalse(transaction.rolled_back) self.assertSpanAttributes( - "CloudSpanner.Rollback", + "CloudSpanner.Transaction.rollback", status=StatusCode.ERROR, attributes=TestTransaction.BASE_ATTRIBUTES, ) @@ -299,7 +299,8 @@ def test_rollback_ok(self): ) self.assertSpanAttributes( - "CloudSpanner.Rollback", attributes=TestTransaction.BASE_ATTRIBUTES + "CloudSpanner.Transaction.rollback", + attributes=TestTransaction.BASE_ATTRIBUTES, ) def test_commit_not_begun(self): @@ -345,7 +346,7 @@ def test_commit_w_other_error(self): self.assertIsNone(transaction.committed) self.assertSpanAttributes( - "CloudSpanner.Commit", + "CloudSpanner.Transaction.commit", status=StatusCode.ERROR, attributes=dict(TestTransaction.BASE_ATTRIBUTES, num_mutations=1), ) @@ -427,7 +428,7 @@ def _commit_helper( self.assertEqual(transaction.commit_stats.mutation_count, 4) self.assertSpanAttributes( - "CloudSpanner.Commit", + "CloudSpanner.Transaction.commit", attributes=dict( TestTransaction.BASE_ATTRIBUTES, num_mutations=len(transaction._mutations), From f2483e11ba94f8bd1e142d1a85347d90104d1a19 Mon Sep 17 00:00:00 2001 From: Emmanuel T Odeke Date: Thu, 19 Dec 2024 12:35:24 -0800 Subject: [PATCH 05/19] feat(x-goog-spanner-request-id): introduce AtomicCounter (#1275) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(x-goog-spanner-request-id): introduce AtomicCounter This change introduces AtomicCounter, a concurrency/thread-safe counter do deal with the multi-threaded nature of variables. It permits operations: * atomic_counter += 1 * value = atomic_counter + 1 * atomic_counter.value that'll be paramount to bringing in the logic for x-goog-spanner-request-id in much reduced changelists. Updates #1261 Carved out from PR #1264 * Tests for with_request_id * chore: remove sleep * chore: remove unused import --------- Co-authored-by: Knut Olav Løite --- google/cloud/spanner_v1/_helpers.py | 44 +++++++++++ google/cloud/spanner_v1/request_id_header.py | 42 +++++++++++ tests/unit/test_atomic_counter.py | 78 ++++++++++++++++++++ 3 files changed, 164 insertions(+) create mode 100644 google/cloud/spanner_v1/request_id_header.py create mode 100644 tests/unit/test_atomic_counter.py diff --git a/google/cloud/spanner_v1/_helpers.py b/google/cloud/spanner_v1/_helpers.py index 29bd604e7b..1f4bf5b174 100644 --- a/google/cloud/spanner_v1/_helpers.py +++ b/google/cloud/spanner_v1/_helpers.py @@ -19,6 +19,7 @@ import math import time import base64 +import threading from google.protobuf.struct_pb2 import ListValue from google.protobuf.struct_pb2 import Value @@ -30,6 +31,7 @@ from google.cloud.spanner_v1 import TypeCode from google.cloud.spanner_v1 import ExecuteSqlRequest from google.cloud.spanner_v1 import JsonObject +from google.cloud.spanner_v1.request_id_header import with_request_id # Validation error messages NUMERIC_MAX_SCALE_ERR_MSG = ( @@ -525,3 +527,45 @@ def _metadata_with_leader_aware_routing(value, **kw): List[Tuple[str, str]]: RPC metadata with leader aware routing header """ return ("x-goog-spanner-route-to-leader", str(value).lower()) + + +class AtomicCounter: + def __init__(self, start_value=0): + self.__lock = threading.Lock() + self.__value = start_value + + @property + def value(self): + with self.__lock: + return self.__value + + def increment(self, n=1): + with self.__lock: + self.__value += n + return self.__value + + def __iadd__(self, n): + """ + Defines the inplace += operator result. + """ + with self.__lock: + self.__value += n + return self + + def __add__(self, n): + """ + Defines the result of invoking: value = AtomicCounter + addable + """ + with self.__lock: + n += self.__value + return n + + def __radd__(self, n): + """ + Defines the result of invoking: value = addable + AtomicCounter + """ + return self.__add__(n) + + +def _metadata_with_request_id(*args, **kwargs): + return with_request_id(*args, **kwargs) diff --git a/google/cloud/spanner_v1/request_id_header.py b/google/cloud/spanner_v1/request_id_header.py new file mode 100644 index 0000000000..8376778273 --- /dev/null +++ b/google/cloud/spanner_v1/request_id_header.py @@ -0,0 +1,42 @@ +# Copyright 2024 Google LLC All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +REQ_ID_VERSION = 1 # The version of the x-goog-spanner-request-id spec. +REQ_ID_HEADER_KEY = "x-goog-spanner-request-id" + + +def generate_rand_uint64(): + b = os.urandom(8) + return ( + b[7] & 0xFF + | (b[6] & 0xFF) << 8 + | (b[5] & 0xFF) << 16 + | (b[4] & 0xFF) << 24 + | (b[3] & 0xFF) << 32 + | (b[2] & 0xFF) << 36 + | (b[1] & 0xFF) << 48 + | (b[0] & 0xFF) << 56 + ) + + +REQ_RAND_PROCESS_ID = generate_rand_uint64() + + +def with_request_id(client_id, channel_id, nth_request, attempt, other_metadata=[]): + req_id = f"{REQ_ID_VERSION}.{REQ_RAND_PROCESS_ID}.{client_id}.{channel_id}.{nth_request}.{attempt}" + all_metadata = other_metadata.copy() + all_metadata.append((REQ_ID_HEADER_KEY, req_id)) + return all_metadata diff --git a/tests/unit/test_atomic_counter.py b/tests/unit/test_atomic_counter.py new file mode 100644 index 0000000000..92d10cac79 --- /dev/null +++ b/tests/unit/test_atomic_counter.py @@ -0,0 +1,78 @@ +# Copyright 2024 Google LLC All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import threading +import unittest +from google.cloud.spanner_v1._helpers import AtomicCounter + + +class TestAtomicCounter(unittest.TestCase): + def test_initialization(self): + ac_default = AtomicCounter() + assert ac_default.value == 0 + + ac_1 = AtomicCounter(1) + assert ac_1.value == 1 + + ac_negative_1 = AtomicCounter(-1) + assert ac_negative_1.value == -1 + + def test_increment(self): + ac = AtomicCounter() + result_default = ac.increment() + assert result_default == 1 + assert ac.value == 1 + + result_with_value = ac.increment(2) + assert result_with_value == 3 + assert ac.value == 3 + result_plus_100 = ac.increment(100) + assert result_plus_100 == 103 + + def test_plus_call(self): + ac = AtomicCounter() + ac += 1 + assert ac.value == 1 + + n = ac + 2 + assert n == 3 + assert ac.value == 1 + + n = 200 + ac + assert n == 201 + assert ac.value == 1 + + def test_multiple_threads_incrementing(self): + ac = AtomicCounter() + n = 200 + m = 10 + + def do_work(): + for i in range(m): + ac.increment() + + threads = [] + for i in range(n): + th = threading.Thread(target=do_work) + threads.append(th) + th.start() + + random.shuffle(threads) + for th in threads: + th.join() + assert not th.is_alive() + + # Finally the result should be n*m + assert ac.value == n * m From 6352dd2f84c64ff39806862b9e245fdc6d34d6bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Knut=20Olav=20L=C3=B8ite?= Date: Fri, 20 Dec 2024 15:29:20 +0100 Subject: [PATCH 06/19] test: support inline-begin in mock server (#1271) --- .../cloud/spanner_v1/testing/mock_spanner.py | 46 ++++++++++--- .../test_aborted_transaction.py | 69 +++++++++++++++++++ 2 files changed, 105 insertions(+), 10 deletions(-) diff --git a/google/cloud/spanner_v1/testing/mock_spanner.py b/google/cloud/spanner_v1/testing/mock_spanner.py index 1f37ff2a03..6b50d9a6d1 100644 --- a/google/cloud/spanner_v1/testing/mock_spanner.py +++ b/google/cloud/spanner_v1/testing/mock_spanner.py @@ -18,6 +18,13 @@ from google.protobuf import empty_pb2 from grpc_status.rpc_status import _Status + +from google.cloud.spanner_v1 import ( + TransactionOptions, + ResultSetMetadata, + ExecuteSqlRequest, + ExecuteBatchDmlRequest, +) from google.cloud.spanner_v1.testing.mock_database_admin import DatabaseAdminServicer import google.cloud.spanner_v1.testing.spanner_database_admin_pb2_grpc as database_admin_grpc import google.cloud.spanner_v1.testing.spanner_pb2_grpc as spanner_grpc @@ -51,23 +58,25 @@ def pop_error(self, context): context.abort_with_status(error) def get_result_as_partial_result_sets( - self, sql: str + self, sql: str, started_transaction: transaction.Transaction ) -> [result_set.PartialResultSet]: result: result_set.ResultSet = self.get_result(sql) partials = [] first = True if len(result.rows) == 0: partial = result_set.PartialResultSet() - partial.metadata = result.metadata + partial.metadata = ResultSetMetadata(result.metadata) partials.append(partial) else: for row in result.rows: partial = result_set.PartialResultSet() if first: - partial.metadata = result.metadata + partial.metadata = ResultSetMetadata(result.metadata) partial.values.extend(row) partials.append(partial) partials[len(partials) - 1].stats = result.stats + if started_transaction: + partials[0].metadata.transaction = started_transaction return partials @@ -129,22 +138,29 @@ def DeleteSession(self, request, context): def ExecuteSql(self, request, context): self._requests.append(request) - return result_set.ResultSet() + self.mock_spanner.pop_error(context) + started_transaction = self.__maybe_create_transaction(request) + result: result_set.ResultSet = self.mock_spanner.get_result(request.sql) + if started_transaction: + result.metadata = ResultSetMetadata(result.metadata) + result.metadata.transaction = started_transaction + return result def ExecuteStreamingSql(self, request, context): self._requests.append(request) - partials = self.mock_spanner.get_result_as_partial_result_sets(request.sql) + self.mock_spanner.pop_error(context) + started_transaction = self.__maybe_create_transaction(request) + partials = self.mock_spanner.get_result_as_partial_result_sets( + request.sql, started_transaction + ) for result in partials: yield result def ExecuteBatchDml(self, request, context): self._requests.append(request) + self.mock_spanner.pop_error(context) response = spanner.ExecuteBatchDmlResponse() - started_transaction = None - if not request.transaction.begin == transaction.TransactionOptions(): - started_transaction = self.__create_transaction( - request.session, request.transaction.begin - ) + started_transaction = self.__maybe_create_transaction(request) first = True for statement in request.statements: result = self.mock_spanner.get_result(statement.sql) @@ -170,6 +186,16 @@ def BeginTransaction(self, request, context): self._requests.append(request) return self.__create_transaction(request.session, request.options) + def __maybe_create_transaction( + self, request: ExecuteSqlRequest | ExecuteBatchDmlRequest + ): + started_transaction = None + if not request.transaction.begin == TransactionOptions(): + started_transaction = self.__create_transaction( + request.session, request.transaction.begin + ) + return started_transaction + def __create_transaction( self, session: str, options: transaction.TransactionOptions ) -> transaction.Transaction: diff --git a/tests/mockserver_tests/test_aborted_transaction.py b/tests/mockserver_tests/test_aborted_transaction.py index ede2675ce6..89b30a0875 100644 --- a/tests/mockserver_tests/test_aborted_transaction.py +++ b/tests/mockserver_tests/test_aborted_transaction.py @@ -16,6 +16,9 @@ BatchCreateSessionsRequest, BeginTransactionRequest, CommitRequest, + ExecuteSqlRequest, + TypeCode, + ExecuteBatchDmlRequest, ) from google.cloud.spanner_v1.testing.mock_spanner import SpannerServicer from google.cloud.spanner_v1.transaction import Transaction @@ -23,6 +26,8 @@ MockServerTestBase, add_error, aborted_status, + add_update_count, + add_single_result, ) @@ -45,6 +50,70 @@ def test_run_in_transaction_commit_aborted(self): self.assertTrue(isinstance(requests[3], BeginTransactionRequest)) self.assertTrue(isinstance(requests[4], CommitRequest)) + def test_run_in_transaction_update_aborted(self): + add_update_count("update my_table set my_col=1 where id=2", 1) + add_error(SpannerServicer.ExecuteSql.__name__, aborted_status()) + self.database.run_in_transaction(_execute_update) + + # Verify that the transaction was retried. + requests = self.spanner_service.requests + self.assertEqual(4, len(requests), msg=requests) + self.assertTrue(isinstance(requests[0], BatchCreateSessionsRequest)) + self.assertTrue(isinstance(requests[1], ExecuteSqlRequest)) + self.assertTrue(isinstance(requests[2], ExecuteSqlRequest)) + self.assertTrue(isinstance(requests[3], CommitRequest)) + + def test_run_in_transaction_query_aborted(self): + add_single_result( + "select value from my_table where id=1", + "value", + TypeCode.STRING, + "my-value", + ) + add_error(SpannerServicer.ExecuteStreamingSql.__name__, aborted_status()) + self.database.run_in_transaction(_execute_query) + + # Verify that the transaction was retried. + requests = self.spanner_service.requests + self.assertEqual(4, len(requests), msg=requests) + self.assertTrue(isinstance(requests[0], BatchCreateSessionsRequest)) + self.assertTrue(isinstance(requests[1], ExecuteSqlRequest)) + self.assertTrue(isinstance(requests[2], ExecuteSqlRequest)) + self.assertTrue(isinstance(requests[3], CommitRequest)) + + def test_run_in_transaction_batch_dml_aborted(self): + add_update_count("update my_table set my_col=1 where id=1", 1) + add_update_count("update my_table set my_col=1 where id=2", 1) + add_error(SpannerServicer.ExecuteBatchDml.__name__, aborted_status()) + self.database.run_in_transaction(_execute_batch_dml) + + # Verify that the transaction was retried. + requests = self.spanner_service.requests + self.assertEqual(4, len(requests), msg=requests) + self.assertTrue(isinstance(requests[0], BatchCreateSessionsRequest)) + self.assertTrue(isinstance(requests[1], ExecuteBatchDmlRequest)) + self.assertTrue(isinstance(requests[2], ExecuteBatchDmlRequest)) + self.assertTrue(isinstance(requests[3], CommitRequest)) + def _insert_mutations(transaction: Transaction): transaction.insert("my_table", ["col1", "col2"], ["value1", "value2"]) + + +def _execute_update(transaction: Transaction): + transaction.execute_update("update my_table set my_col=1 where id=2") + + +def _execute_query(transaction: Transaction): + rows = transaction.execute_sql("select value from my_table where id=1") + for _ in rows: + pass + + +def _execute_batch_dml(transaction: Transaction): + transaction.batch_update( + [ + "update my_table set my_col=1 where id=1", + "update my_table set my_col=1 where id=2", + ] + ) From ab310786baf09033a28c76e843b654e98a21613d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Knut=20Olav=20L=C3=B8ite?= Date: Wed, 1 Jan 2025 10:17:30 +0100 Subject: [PATCH 07/19] fix: retry UNAVAILABLE errors for streaming RPCs (#1278) UNAVAILABLE errors that occurred during the initial attempt of a streaming RPC (StreamingRead / ExecuteStreamingSql) would not be retried. Fixes #1150 --- google/cloud/spanner_v1/snapshot.py | 13 +++++++---- .../mockserver_tests/mock_server_test_base.py | 21 ++++++++++++++++++ tests/mockserver_tests/test_basics.py | 22 +++++++++++++++++++ 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/google/cloud/spanner_v1/snapshot.py b/google/cloud/spanner_v1/snapshot.py index 6234c96435..de610e1387 100644 --- a/google/cloud/spanner_v1/snapshot.py +++ b/google/cloud/spanner_v1/snapshot.py @@ -86,13 +86,18 @@ def _restart_on_unavailable( ) request.transaction = transaction_selector + iterator = None - with trace_call( - trace_name, session, attributes, observability_options=observability_options - ): - iterator = method(request=request) while True: try: + if iterator is None: + with trace_call( + trace_name, + session, + attributes, + observability_options=observability_options, + ): + iterator = method(request=request) for item in iterator: item_buffer.append(item) # Setting the transaction id because the transaction begin was inlined for first rpc. diff --git a/tests/mockserver_tests/mock_server_test_base.py b/tests/mockserver_tests/mock_server_test_base.py index 12c98bc51b..b332c88d7c 100644 --- a/tests/mockserver_tests/mock_server_test_base.py +++ b/tests/mockserver_tests/mock_server_test_base.py @@ -57,6 +57,27 @@ def aborted_status() -> _Status: return status +# Creates an UNAVAILABLE status with the smallest possible retry delay. +def unavailable_status() -> _Status: + error = status_pb2.Status( + code=code_pb2.UNAVAILABLE, + message="Service unavailable.", + ) + retry_info = RetryInfo(retry_delay=Duration(seconds=0, nanos=1)) + status = _Status( + code=code_to_grpc_status_code(error.code), + details=error.message, + trailing_metadata=( + ("grpc-status-details-bin", error.SerializeToString()), + ( + "google.rpc.retryinfo-bin", + retry_info.SerializeToString(), + ), + ), + ) + return status + + def add_error(method: str, error: status_pb2.Status): MockServerTestBase.spanner_service.mock_spanner.add_error(method, error) diff --git a/tests/mockserver_tests/test_basics.py b/tests/mockserver_tests/test_basics.py index ed0906cb9b..d34065a6ff 100644 --- a/tests/mockserver_tests/test_basics.py +++ b/tests/mockserver_tests/test_basics.py @@ -21,11 +21,14 @@ BeginTransactionRequest, TransactionOptions, ) +from google.cloud.spanner_v1.testing.mock_spanner import SpannerServicer from tests.mockserver_tests.mock_server_test_base import ( MockServerTestBase, add_select1_result, add_update_count, + add_error, + unavailable_status, ) @@ -85,3 +88,22 @@ def test_dbapi_partitioned_dml(self): self.assertEqual( TransactionOptions(dict(partitioned_dml={})), begin_request.options ) + + def test_execute_streaming_sql_unavailable(self): + add_select1_result() + # Add an UNAVAILABLE error that is returned the first time the + # ExecuteStreamingSql RPC is called. + add_error(SpannerServicer.ExecuteStreamingSql.__name__, unavailable_status()) + with self.database.snapshot() as snapshot: + results = snapshot.execute_sql("select 1") + result_list = [] + for row in results: + result_list.append(row) + self.assertEqual(1, row[0]) + self.assertEqual(1, len(result_list)) + requests = self.spanner_service.requests + self.assertEqual(3, len(requests), msg=requests) + self.assertTrue(isinstance(requests[0], BatchCreateSessionsRequest)) + # The ExecuteStreamingSql call should be retried. + self.assertTrue(isinstance(requests[1], ExecuteSqlRequest)) + self.assertTrue(isinstance(requests[2], ExecuteSqlRequest)) From 7acf6dd8cc854a4792782335ac2b384d22910520 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 3 Jan 2025 09:38:45 -0500 Subject: [PATCH 08/19] chore(python): Update the python version in docs presubmit to use 3.10 (#1281) Source-Link: https://github.com/googleapis/synthtool/commit/de3def663b75d8b9ae1e5d548364c960ff13af8f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:a1c5112b81d645f5bbc4d4bbc99d7dcb5089a52216c0e3fb1203a0eeabadd7d5 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 6 ++-- .kokoro/docker/docs/requirements.txt | 52 ++++++++++++++++++++++------ 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 6301519a9a..1d0fd7e787 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2ed982f884312e4883e01b5ab8af8b6935f0216a5a2d82928d273081fc3be562 -# created: 2024-11-12T12:09:45.821174897Z + digest: sha256:a1c5112b81d645f5bbc4d4bbc99d7dcb5089a52216c0e3fb1203a0eeabadd7d5 +# created: 2025-01-02T23:09:36.975468657Z diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index 8bb0764594..f99a5c4aac 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -2,11 +2,11 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --allow-unsafe --generate-hashes requirements.in +# pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in # -argcomplete==3.5.1 \ - --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ - --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 +argcomplete==3.5.2 \ + --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ + --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb # via nox colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ @@ -23,7 +23,7 @@ filelock==3.16.1 \ nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r requirements.in + # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in packaging==24.2 \ --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f @@ -32,11 +32,41 @@ platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv -tomli==2.0.2 \ - --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ - --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed +tomli==2.2.1 \ + --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ + --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ + --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \ + --hash=sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b \ + --hash=sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8 \ + --hash=sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6 \ + --hash=sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77 \ + --hash=sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff \ + --hash=sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea \ + --hash=sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192 \ + --hash=sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249 \ + --hash=sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee \ + --hash=sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4 \ + --hash=sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98 \ + --hash=sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8 \ + --hash=sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4 \ + --hash=sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281 \ + --hash=sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744 \ + --hash=sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69 \ + --hash=sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13 \ + --hash=sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140 \ + --hash=sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e \ + --hash=sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e \ + --hash=sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc \ + --hash=sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff \ + --hash=sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec \ + --hash=sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2 \ + --hash=sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222 \ + --hash=sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106 \ + --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ + --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ + --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.27.1 \ - --hash=sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba \ - --hash=sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4 +virtualenv==20.28.0 \ + --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ + --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa # via nox From 959bb9cda953eead89ffc271cb2a472e7139f81c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Knut=20Olav=20L=C3=B8ite?= Date: Tue, 7 Jan 2025 16:07:01 +0100 Subject: [PATCH 09/19] feat: support GRAPH and pipe syntax in dbapi (#1285) Recognize GRAPH and pipe syntax queries as valid queries in dbapi. --- google/cloud/spanner_dbapi/parse_utils.py | 2 +- tests/unit/spanner_dbapi/test_parse_utils.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/google/cloud/spanner_dbapi/parse_utils.py b/google/cloud/spanner_dbapi/parse_utils.py index f039efe5b0..245840ca0d 100644 --- a/google/cloud/spanner_dbapi/parse_utils.py +++ b/google/cloud/spanner_dbapi/parse_utils.py @@ -155,7 +155,7 @@ STMT_INSERT = "INSERT" # Heuristic for identifying statements that don't need to be run as updates. -RE_NON_UPDATE = re.compile(r"^\W*(SELECT)", re.IGNORECASE) +RE_NON_UPDATE = re.compile(r"^\W*(SELECT|GRAPH|FROM)", re.IGNORECASE) RE_WITH = re.compile(r"^\s*(WITH)", re.IGNORECASE) diff --git a/tests/unit/spanner_dbapi/test_parse_utils.py b/tests/unit/spanner_dbapi/test_parse_utils.py index 4b1c7cdb06..f0721bdbe3 100644 --- a/tests/unit/spanner_dbapi/test_parse_utils.py +++ b/tests/unit/spanner_dbapi/test_parse_utils.py @@ -39,6 +39,11 @@ def test_classify_stmt(self): "WITH sq AS (SELECT SchoolID FROM Roster) SELECT * from sq", StatementType.QUERY, ), + ( + "GRAPH FinGraph MATCH (n) RETURN LABELS(n) AS label, n.id", + StatementType.QUERY, + ), + ("FROM Produce |> WHERE item != 'bananas'", StatementType.QUERY), ( "CREATE TABLE django_content_type (id STRING(64) NOT NULL, name STRING(100) " "NOT NULL, app_label STRING(100) NOT NULL, model STRING(100) NOT NULL) PRIMARY KEY(id)", From 04a11a6110e8ba646b1c0d4f6a5fb3d5c30889bb Mon Sep 17 00:00:00 2001 From: Lester Szeto Date: Tue, 7 Jan 2025 22:14:20 -0800 Subject: [PATCH 10/19] chore: Add Custom OpenTelemetry Exporter in for Service Metrics (#1273) * chore: Add Custom OpenTelemetry Exporter in for Service Metrics * Updated copyright dates to 2025 --------- Co-authored-by: rahul2393 --- google/cloud/spanner_v1/metrics/README.md | 19 + google/cloud/spanner_v1/metrics/constants.py | 63 +++ .../spanner_v1/metrics/metrics_exporter.py | 392 ++++++++++++++ setup.py | 1 + testing/constraints-3.10.txt | 1 + testing/constraints-3.11.txt | 1 + testing/constraints-3.12.txt | 1 + testing/constraints-3.13.txt | 1 + testing/constraints-3.7.txt | 1 + testing/constraints-3.8.txt | 1 + testing/constraints-3.9.txt | 1 + tests/unit/test_metric_exporter.py | 488 ++++++++++++++++++ 12 files changed, 970 insertions(+) create mode 100644 google/cloud/spanner_v1/metrics/README.md create mode 100644 google/cloud/spanner_v1/metrics/constants.py create mode 100644 google/cloud/spanner_v1/metrics/metrics_exporter.py create mode 100644 tests/unit/test_metric_exporter.py diff --git a/google/cloud/spanner_v1/metrics/README.md b/google/cloud/spanner_v1/metrics/README.md new file mode 100644 index 0000000000..9619715c85 --- /dev/null +++ b/google/cloud/spanner_v1/metrics/README.md @@ -0,0 +1,19 @@ +# Custom Metric Exporter +The custom metric exporter, as defined in [metrics_exporter.py](./metrics_exporter.py), is designed to work in conjunction with OpenTelemetry and the Spanner client. It converts data into its protobuf equivalent and sends it to Google Cloud Monitoring. + +## Filtering Criteria +The exporter filters metrics based on the following conditions, utilizing values defined in [constants.py](./constants.py): + +* Metrics with a scope set to `gax-python`. +* Metrics with one of the following predefined names: + * `attempt_latencies` + * `attempt_count` + * `operation_latencies` + * `operation_count` + * `gfe_latency` + * `gfe_missing_header_count` + +## Service Endpoint +The exporter sends metrics to the Google Cloud Monitoring [service endpoint](https://cloud.google.com/python/docs/reference/monitoring/latest/google.cloud.monitoring_v3.services.metric_service.MetricServiceClient#google_cloud_monitoring_v3_services_metric_service_MetricServiceClient_create_service_time_series), distinct from the regular client endpoint. This service endpoint operates under a different quota limit than the user endpoint and features an additional server-side filter that only permits a predefined set of metrics to pass through. + +When introducing new service metrics, it is essential to ensure they are allowed through by the server-side filter as well. diff --git a/google/cloud/spanner_v1/metrics/constants.py b/google/cloud/spanner_v1/metrics/constants.py new file mode 100644 index 0000000000..5eca1fa83d --- /dev/null +++ b/google/cloud/spanner_v1/metrics/constants.py @@ -0,0 +1,63 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +BUILT_IN_METRICS_METER_NAME = "gax-python" +NATIVE_METRICS_PREFIX = "spanner.googleapis.com/internal/client" +SPANNER_RESOURCE_TYPE = "spanner_instance_client" + +# Monitored resource labels +MONITORED_RES_LABEL_KEY_PROJECT = "project_id" +MONITORED_RES_LABEL_KEY_INSTANCE = "instance_id" +MONITORED_RES_LABEL_KEY_INSTANCE_CONFIG = "instance_config" +MONITORED_RES_LABEL_KEY_LOCATION = "location" +MONITORED_RES_LABEL_KEY_CLIENT_HASH = "client_hash" +MONITORED_RESOURCE_LABELS = [ + MONITORED_RES_LABEL_KEY_PROJECT, + MONITORED_RES_LABEL_KEY_INSTANCE, + MONITORED_RES_LABEL_KEY_INSTANCE_CONFIG, + MONITORED_RES_LABEL_KEY_LOCATION, + MONITORED_RES_LABEL_KEY_CLIENT_HASH, +] + +# Metric labels +METRIC_LABEL_KEY_CLIENT_UID = "client_uid" +METRIC_LABEL_KEY_CLIENT_NAME = "client_name" +METRIC_LABEL_KEY_DATABASE = "database" +METRIC_LABEL_KEY_METHOD = "method" +METRIC_LABEL_KEY_STATUS = "status" +METRIC_LABEL_KEY_DIRECT_PATH_ENABLED = "directpath_enabled" +METRIC_LABEL_KEY_DIRECT_PATH_USED = "directpath_used" +METRIC_LABELS = [ + METRIC_LABEL_KEY_CLIENT_UID, + METRIC_LABEL_KEY_CLIENT_NAME, + METRIC_LABEL_KEY_DATABASE, + METRIC_LABEL_KEY_METHOD, + METRIC_LABEL_KEY_STATUS, + METRIC_LABEL_KEY_DIRECT_PATH_ENABLED, + METRIC_LABEL_KEY_DIRECT_PATH_USED, +] + +# Metric names +METRIC_NAME_OPERATION_LATENCIES = "operation_latencies" +METRIC_NAME_ATTEMPT_LATENCIES = "attempt_latencies" +METRIC_NAME_OPERATION_COUNT = "operation_count" +METRIC_NAME_ATTEMPT_COUNT = "attempt_count" +METRIC_NAME_GFE_LATENCY = "gfe_latency" +METRIC_NAME_GFE_MISSING_HEADER_COUNT = "gfe_missing_header_count" +METRIC_NAMES = [ + METRIC_NAME_OPERATION_LATENCIES, + METRIC_NAME_ATTEMPT_LATENCIES, + METRIC_NAME_OPERATION_COUNT, + METRIC_NAME_ATTEMPT_COUNT, +] diff --git a/google/cloud/spanner_v1/metrics/metrics_exporter.py b/google/cloud/spanner_v1/metrics/metrics_exporter.py new file mode 100644 index 0000000000..f7d3aa18c8 --- /dev/null +++ b/google/cloud/spanner_v1/metrics/metrics_exporter.py @@ -0,0 +1,392 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .constants import ( + BUILT_IN_METRICS_METER_NAME, + NATIVE_METRICS_PREFIX, + SPANNER_RESOURCE_TYPE, + MONITORED_RESOURCE_LABELS, + METRIC_LABELS, + METRIC_NAMES, +) + +import logging +from typing import Optional, List, Union, NoReturn, Tuple + +import google.auth +from google.api.distribution_pb2 import ( # pylint: disable=no-name-in-module + Distribution, +) + +# pylint: disable=no-name-in-module +from google.api.metric_pb2 import ( # pylint: disable=no-name-in-module + Metric as GMetric, + MetricDescriptor, +) +from google.api.monitored_resource_pb2 import ( # pylint: disable=no-name-in-module + MonitoredResource, +) + +from google.cloud.monitoring_v3.services.metric_service.transports.grpc import ( + MetricServiceGrpcTransport, +) + +# pylint: disable=no-name-in-module +from google.protobuf.timestamp_pb2 import Timestamp +from google.cloud.spanner_v1.gapic_version import __version__ + +try: + from opentelemetry.sdk.metrics.export import ( + Gauge, + Histogram, + HistogramDataPoint, + Metric, + MetricExporter, + MetricExportResult, + MetricsData, + NumberDataPoint, + Sum, + ) + from opentelemetry.sdk.resources import Resource + + HAS_OPENTELEMETRY_INSTALLED = True +except ImportError: + HAS_OPENTELEMETRY_INSTALLED = False + +try: + from google.cloud.monitoring_v3 import ( + CreateTimeSeriesRequest, + MetricServiceClient, + Point, + TimeInterval, + TimeSeries, + TypedValue, + ) + + HAS_GOOGLE_CLOUD_MONITORING_INSTALLED = True +except ImportError: + HAS_GOOGLE_CLOUD_MONITORING_INSTALLED = False + +HAS_DEPENDENCIES_INSTALLED = ( + HAS_OPENTELEMETRY_INSTALLED and HAS_GOOGLE_CLOUD_MONITORING_INSTALLED +) + +logger = logging.getLogger(__name__) +MAX_BATCH_WRITE = 200 +MILLIS_PER_SECOND = 1000 + +_USER_AGENT = f"python-spanner; google-cloud-service-metric-exporter {__version__}" + +# Set user-agent metadata, see https://github.com/grpc/grpc/issues/23644 and default options +# from +# https://github.com/googleapis/python-monitoring/blob/v2.11.3/google/cloud/monitoring_v3/services/metric_service/transports/grpc.py#L175-L178 +_OPTIONS = [ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ("grpc.primary_user_agent", _USER_AGENT), +] + + +# pylint is unable to resolve members of protobuf objects +# pylint: disable=no-member +# pylint: disable=too-many-branches +# pylint: disable=too-many-locals +class CloudMonitoringMetricsExporter(MetricExporter): + """Implementation of Metrics Exporter to Google Cloud Monitoring. + + You can manually pass in project_id and client, or else the + Exporter will take that information from Application Default + Credentials. + + Args: + project_id: project id of your Google Cloud project. + client: Client to upload metrics to Google Cloud Monitoring. + """ + + # Based on the cloud_monitoring exporter found here: https://github.com/GoogleCloudPlatform/opentelemetry-operations-python/blob/main/opentelemetry-exporter-gcp-monitoring/src/opentelemetry/exporter/cloud_monitoring/__init__.py + + def __init__( + self, + project_id: Optional[str] = None, + client: Optional[MetricServiceClient] = None, + ): + """Initialize a custom exporter to send metrics for the Spanner Service Metrics.""" + # Default preferred_temporality is all CUMULATIVE so need to customize + super().__init__() + + # Create a new GRPC Client for Google Cloud Monitoring if not provided + self.client = client or MetricServiceClient( + transport=MetricServiceGrpcTransport( + channel=MetricServiceGrpcTransport.create_channel( + options=_OPTIONS, + ) + ) + ) + + # Set project information + self.project_id: str + if not project_id: + _, default_project_id = google.auth.default() + self.project_id = str(default_project_id) + else: + self.project_id = project_id + self.project_name = self.client.common_project_path(self.project_id) + + def _batch_write(self, series: List[TimeSeries], timeout_millis: float) -> None: + """Cloud Monitoring allows writing up to 200 time series at once. + + :param series: ProtoBuf TimeSeries + :return: + """ + write_ind = 0 + timeout = timeout_millis / MILLIS_PER_SECOND + while write_ind < len(series): + request = CreateTimeSeriesRequest( + name=self.project_name, + time_series=series[write_ind : write_ind + MAX_BATCH_WRITE], + ) + + self.client.create_service_time_series( + request=request, + timeout=timeout, + ) + write_ind += MAX_BATCH_WRITE + + @staticmethod + def _resource_to_monitored_resource_pb( + resource: Resource, labels: any + ) -> MonitoredResource: + """ + Convert the resource to a Google Cloud Monitoring monitored resource. + + :param resource: OpenTelemetry resource + :param labels: labels to add to the monitored resource + :return: Google Cloud Monitoring monitored resource + """ + monitored_resource = MonitoredResource( + type=SPANNER_RESOURCE_TYPE, + labels=labels, + ) + return monitored_resource + + @staticmethod + def _to_metric_kind(metric: Metric) -> MetricDescriptor.MetricKind: + """ + Convert the metric to a Google Cloud Monitoring metric kind. + + :param metric: OpenTelemetry metric + :return: Google Cloud Monitoring metric kind + """ + data = metric.data + if isinstance(data, Sum): + if data.is_monotonic: + return MetricDescriptor.MetricKind.CUMULATIVE + else: + return MetricDescriptor.MetricKind.GAUGE + elif isinstance(data, Gauge): + return MetricDescriptor.MetricKind.GAUGE + elif isinstance(data, Histogram): + return MetricDescriptor.MetricKind.CUMULATIVE + else: + # Exhaustive check + _: NoReturn = data + logger.warning( + "Unsupported metric data type %s, ignoring it", + type(data).__name__, + ) + return None + + @staticmethod + def _extract_metric_labels( + data_point: Union[NumberDataPoint, HistogramDataPoint] + ) -> Tuple[dict, dict]: + """ + Extract the metric labels from the data point. + + :param data_point: OpenTelemetry data point + :return: tuple of metric labels and monitored resource labels + """ + metric_labels = {} + monitored_resource_labels = {} + for key, value in (data_point.attributes or {}).items(): + normalized_key = _normalize_label_key(key) + val = str(value) + if key in METRIC_LABELS: + metric_labels[normalized_key] = val + if key in MONITORED_RESOURCE_LABELS: + monitored_resource_labels[normalized_key] = val + return metric_labels, monitored_resource_labels + + # Unchanged from https://github.com/GoogleCloudPlatform/opentelemetry-operations-python/blob/main/opentelemetry-exporter-gcp-monitoring/src/opentelemetry/exporter/cloud_monitoring/__init__.py + @staticmethod + def _to_point( + kind: "MetricDescriptor.MetricKind.V", + data_point: Union[NumberDataPoint, HistogramDataPoint], + ) -> Point: + # Create a Google Cloud Monitoring data point value based on the OpenTelemetry metric data point type + ## For histograms, we need to calculate the mean and bucket counts + if isinstance(data_point, HistogramDataPoint): + mean = data_point.sum / data_point.count if data_point.count else 0.0 + point_value = TypedValue( + distribution_value=Distribution( + count=data_point.count, + mean=mean, + bucket_counts=data_point.bucket_counts, + bucket_options=Distribution.BucketOptions( + explicit_buckets=Distribution.BucketOptions.Explicit( + bounds=data_point.explicit_bounds, + ) + ), + ) + ) + else: + # For other metric types, we can use the data point value directly + if isinstance(data_point.value, int): + point_value = TypedValue(int64_value=data_point.value) + else: + point_value = TypedValue(double_value=data_point.value) + + # DELTA case should never happen but adding it to be future proof + if ( + kind is MetricDescriptor.MetricKind.CUMULATIVE + or kind is MetricDescriptor.MetricKind.DELTA + ): + # Create a Google Cloud Monitoring time interval from the OpenTelemetry data point timestamps + interval = TimeInterval( + start_time=_timestamp_from_nanos(data_point.start_time_unix_nano), + end_time=_timestamp_from_nanos(data_point.time_unix_nano), + ) + else: + # For non time ranged metrics, we only need the end time + interval = TimeInterval( + end_time=_timestamp_from_nanos(data_point.time_unix_nano), + ) + return Point(interval=interval, value=point_value) + + @staticmethod + def _data_point_to_timeseries_pb( + data_point, + metric, + monitored_resource, + labels, + ) -> TimeSeries: + """ + Convert the data point to a Google Cloud Monitoring time series. + + :param data_point: OpenTelemetry data point + :param metric: OpenTelemetry metric + :param monitored_resource: Google Cloud Monitoring monitored resource + :param labels: metric labels + :return: Google Cloud Monitoring time series + """ + if metric.name not in METRIC_NAMES: + return None + + kind = CloudMonitoringMetricsExporter._to_metric_kind(metric) + point = CloudMonitoringMetricsExporter._to_point(kind, data_point) + type = f"{NATIVE_METRICS_PREFIX}/{metric.name}" + series = TimeSeries( + resource=monitored_resource, + metric_kind=kind, + points=[point], + metric=GMetric(type=type, labels=labels), + unit=metric.unit or "", + ) + return series + + @staticmethod + def _resource_metrics_to_timeseries_pb( + metrics_data: MetricsData, + ) -> List[TimeSeries]: + """ + Convert the metrics data to a list of Google Cloud Monitoring time series. + + :param metrics_data: OpenTelemetry metrics data + :return: list of Google Cloud Monitoring time series + """ + timeseries_list = [] + for resource_metric in metrics_data.resource_metrics: + for scope_metric in resource_metric.scope_metrics: + # Filter for spanner builtin metrics + if scope_metric.scope.name != BUILT_IN_METRICS_METER_NAME: + continue + + for metric in scope_metric.metrics: + for data_point in metric.data.data_points: + ( + metric_labels, + monitored_resource_labels, + ) = CloudMonitoringMetricsExporter._extract_metric_labels( + data_point + ) + monitored_resource = CloudMonitoringMetricsExporter._resource_to_monitored_resource_pb( + resource_metric.resource, monitored_resource_labels + ) + timeseries = ( + CloudMonitoringMetricsExporter._data_point_to_timeseries_pb( + data_point, metric, monitored_resource, metric_labels + ) + ) + if timeseries is not None: + timeseries_list.append(timeseries) + + return timeseries_list + + def export( + self, + metrics_data: MetricsData, + timeout_millis: float = 10_000, + **kwargs, + ) -> MetricExportResult: + """ + Export the metrics data to Google Cloud Monitoring. + + :param metrics_data: OpenTelemetry metrics data + :param timeout_millis: timeout in milliseconds + :return: MetricExportResult + """ + if not HAS_DEPENDENCIES_INSTALLED: + logger.warning("Metric exporter called without dependencies installed.") + return False + + time_series_list = self._resource_metrics_to_timeseries_pb(metrics_data) + self._batch_write(time_series_list, timeout_millis) + return True + + def force_flush(self, timeout_millis: float = 10_000) -> bool: + """Not implemented.""" + return True + + def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None: + """Not implemented.""" + pass + + +def _timestamp_from_nanos(nanos: int) -> Timestamp: + ts = Timestamp() + ts.FromNanoseconds(nanos) + return ts + + +def _normalize_label_key(key: str) -> str: + """Make the key into a valid Google Cloud Monitoring label key. + + See reference impl + https://github.com/GoogleCloudPlatform/opentelemetry-operations-go/blob/e955c204f4f2bfdc92ff0ad52786232b975efcc2/exporter/metric/metric.go#L595-L604 + """ + sanitized = "".join(c if c.isalpha() or c.isnumeric() else "_" for c in key) + if sanitized[0].isdigit(): + sanitized = "key_" + sanitized + return sanitized diff --git a/setup.py b/setup.py index 544d117fd7..619607b794 100644 --- a/setup.py +++ b/setup.py @@ -50,6 +50,7 @@ "opentelemetry-api >= 1.22.0", "opentelemetry-sdk >= 1.22.0", "opentelemetry-semantic-conventions >= 0.43b0", + "google-cloud-monitoring >= 2.16.0", ], "libcst": "libcst >= 0.2.5", } diff --git a/testing/constraints-3.10.txt b/testing/constraints-3.10.txt index ad3f0fa58e..5369861daf 100644 --- a/testing/constraints-3.10.txt +++ b/testing/constraints-3.10.txt @@ -5,3 +5,4 @@ google-api-core proto-plus protobuf grpc-google-iam-v1 +google-cloud-monitoring diff --git a/testing/constraints-3.11.txt b/testing/constraints-3.11.txt index ad3f0fa58e..28bc2bd36c 100644 --- a/testing/constraints-3.11.txt +++ b/testing/constraints-3.11.txt @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # This constraints file is required for unit tests. # List all library dependencies and extras in this file. +google-cloud-monitoring google-api-core proto-plus protobuf diff --git a/testing/constraints-3.12.txt b/testing/constraints-3.12.txt index ad3f0fa58e..5369861daf 100644 --- a/testing/constraints-3.12.txt +++ b/testing/constraints-3.12.txt @@ -5,3 +5,4 @@ google-api-core proto-plus protobuf grpc-google-iam-v1 +google-cloud-monitoring diff --git a/testing/constraints-3.13.txt b/testing/constraints-3.13.txt index ad3f0fa58e..5369861daf 100644 --- a/testing/constraints-3.13.txt +++ b/testing/constraints-3.13.txt @@ -5,3 +5,4 @@ google-api-core proto-plus protobuf grpc-google-iam-v1 +google-cloud-monitoring diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index e468d57168..af33b0c8e8 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -16,3 +16,4 @@ opentelemetry-semantic-conventions==0.43b0 protobuf==3.20.2 deprecated==1.2.14 grpc-interceptor==0.15.4 +google-cloud-monitoring==2.16.0 diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index ad3f0fa58e..5369861daf 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -5,3 +5,4 @@ google-api-core proto-plus protobuf grpc-google-iam-v1 +google-cloud-monitoring diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index ad3f0fa58e..5369861daf 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -5,3 +5,4 @@ google-api-core proto-plus protobuf grpc-google-iam-v1 +google-cloud-monitoring diff --git a/tests/unit/test_metric_exporter.py b/tests/unit/test_metric_exporter.py new file mode 100644 index 0000000000..08ae9ecf21 --- /dev/null +++ b/tests/unit/test_metric_exporter.py @@ -0,0 +1,488 @@ +# Copyright 2016 Google LLC All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from unittest.mock import patch, MagicMock, Mock +from google.cloud.spanner_v1.metrics.metrics_exporter import ( + CloudMonitoringMetricsExporter, + _normalize_label_key, +) +from google.api.metric_pb2 import MetricDescriptor +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import ( + InMemoryMetricReader, + Sum, + Gauge, + Histogram, + NumberDataPoint, + HistogramDataPoint, + AggregationTemporality, +) +from google.cloud.spanner_v1.metrics.constants import METRIC_NAME_OPERATION_COUNT + +from tests._helpers import ( + HAS_OPENTELEMETRY_INSTALLED, +) + + +# Test Constants +PROJECT_ID = "fake-project-id" +INSTANCE_ID = "fake-instance-id" +DATABASE_ID = "fake-database-id" +SCOPE_NAME = "gax-python" + +# Skip tests if opentelemetry is not installed +if HAS_OPENTELEMETRY_INSTALLED: + + class TestMetricsExporter(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.metric_attributes = { + "project_id": PROJECT_ID, + "instance_id": INSTANCE_ID, + "instance_config": "test_config", + "location": "test_location", + "client_hash": "test_hash", + "client_uid": "test_uid", + "client_name": "test_name", + "database": DATABASE_ID, + "method": "test_method", + "status": "test_status", + "directpath_enabled": "true", + "directpath_used": "false", + "other": "ignored", + } + + def setUp(self): + self.metric_reader = InMemoryMetricReader() + self.provider = MeterProvider(metric_readers=[self.metric_reader]) + self.meter = self.provider.get_meter(SCOPE_NAME) + self.operation_count = self.meter.create_counter( + name=METRIC_NAME_OPERATION_COUNT, + description="A test counter", + unit="counts", + ) + + def test_default_ctor(self): + exporter = CloudMonitoringMetricsExporter() + self.assertIsNotNone(exporter.project_id) + + def test_normalize_label_key(self): + """Test label key normalization""" + test_cases = [ + ("simple", "simple"), + ("with space", "with_space"), + ("with-dash", "with_dash"), + ("123_number_prefix", "key_123_number_prefix"), + ("special!characters@", "special_characters_"), + ] + + for input_key, expected_output in test_cases: + self.assertEqual(_normalize_label_key(input_key), expected_output) + + def test_to_metric_kind(self): + """Test conversion of different metric types to GCM metric kinds""" + # Test monotonic Sum returns CUMULATIVE + metric_sum = Mock( + data=Sum( + data_points=[], + aggregation_temporality=AggregationTemporality.UNSPECIFIED, + is_monotonic=True, + ) + ) + self.assertEqual( + CloudMonitoringMetricsExporter._to_metric_kind(metric_sum), + MetricDescriptor.MetricKind.CUMULATIVE, + ) + + # Test non-monotonic Sum returns GAUGE + metric_sum_non_monotonic = Mock( + data=Sum( + data_points=[], + aggregation_temporality=AggregationTemporality.UNSPECIFIED, + is_monotonic=False, + ) + ) + self.assertEqual( + CloudMonitoringMetricsExporter._to_metric_kind( + metric_sum_non_monotonic + ), + MetricDescriptor.MetricKind.GAUGE, + ) + + # Test Gauge returns GAUGE + metric_gauge = Mock(data=Gauge(data_points=[])) + self.assertEqual( + CloudMonitoringMetricsExporter._to_metric_kind(metric_gauge), + MetricDescriptor.MetricKind.GAUGE, + ) + + # Test Histogram returns CUMULATIVE + metric_histogram = Mock( + data=Histogram( + data_points=[], + aggregation_temporality=AggregationTemporality.UNSPECIFIED, + ) + ) + self.assertEqual( + CloudMonitoringMetricsExporter._to_metric_kind(metric_histogram), + MetricDescriptor.MetricKind.CUMULATIVE, + ) + + # Test Unknown data type warns + metric_unknown = Mock(data=Mock()) + with self.assertLogs( + "google.cloud.spanner_v1.metrics.metrics_exporter", level="WARNING" + ) as log: + self.assertIsNone( + CloudMonitoringMetricsExporter._to_metric_kind(metric_unknown) + ) + self.assertIn( + "WARNING:google.cloud.spanner_v1.metrics.metrics_exporter:Unsupported metric data type Mock, ignoring it", + log.output, + ) + + def test_extract_metric_labels(self): + """Test extraction of metric and resource labels""" + import time + + data_point = NumberDataPoint( + attributes={ + # Metric labels + "client_uid": "test-client-uid", + "client_name": "test-client-name", + "database": "test-db", + "method": "test-method", + "status": "test-status", + "directpath_enabled": "test-directpath-enabled", + "directpath_used": "test-directpath-used", + # Monitored Resource label + "project_id": "test-project-id", + "instance_id": "test-instance-id", + "instance_config": "test-instance-config", + "location": "test-location", + "client_hash": "test-client-hash", + # All other labels ignored + "unknown": "ignored", + "Client_UID": "ignored", + }, + start_time_unix_nano=time.time_ns(), + time_unix_nano=time.time_ns(), + value=0, + ) + + ( + metric_labels, + resource_labels, + ) = CloudMonitoringMetricsExporter._extract_metric_labels(data_point) + + # Verify that the attributes are properly distributed and reassigned + + ## Metric Labels + self.assertIn("client_uid", metric_labels) + self.assertEqual(metric_labels["client_uid"], "test-client-uid") + self.assertIn("client_name", metric_labels) + self.assertEqual(metric_labels["client_name"], "test-client-name") + self.assertIn("database", metric_labels) + self.assertEqual(metric_labels["database"], "test-db") + self.assertIn("method", metric_labels) + self.assertEqual(metric_labels["method"], "test-method") + self.assertIn("status", metric_labels) + self.assertEqual(metric_labels["status"], "test-status") + self.assertIn("directpath_enabled", metric_labels) + self.assertEqual( + metric_labels["directpath_enabled"], "test-directpath-enabled" + ) + self.assertIn("directpath_used", metric_labels) + self.assertEqual(metric_labels["directpath_used"], "test-directpath-used") + + ## Metric Resource Labels + self.assertIn("project_id", resource_labels) + self.assertEqual(resource_labels["project_id"], "test-project-id") + self.assertIn("instance_id", resource_labels) + self.assertEqual(resource_labels["instance_id"], "test-instance-id") + self.assertIn("instance_config", resource_labels) + self.assertEqual(resource_labels["instance_config"], "test-instance-config") + self.assertIn("location", resource_labels) + self.assertEqual(resource_labels["location"], "test-location") + self.assertIn("client_hash", resource_labels) + self.assertEqual(resource_labels["client_hash"], "test-client-hash") + + # Other attributes are ignored + self.assertNotIn("unknown", metric_labels) + self.assertNotIn("unknown", resource_labels) + ## including case sensitive keys + self.assertNotIn("Client_UID", metric_labels) + self.assertNotIn("Client_UID", resource_labels) + + def test_metric_timeseries_conversion(self): + """Test to verify conversion from OTEL Metrics to GCM Time Series.""" + # Add metrics + self.operation_count.add(1, attributes=self.metric_attributes) + self.operation_count.add(2, attributes=self.metric_attributes) + + # Export metrics + metrics = self.metric_reader.get_metrics_data() + self.assertTrue(metrics is not None) + + exporter = CloudMonitoringMetricsExporter(PROJECT_ID) + timeseries = exporter._resource_metrics_to_timeseries_pb(metrics) + + # Both counter values should be summed together + self.assertEqual(len(timeseries), 1) + self.assertEqual(timeseries[0].points.pop(0).value.int64_value, 3) + + def test_metric_timeseries_scope_filtering(self): + """Test to verify that metrics without the `gax-python` scope are filtered out.""" + # Create metric instruments + meter = self.provider.get_meter("WRONG_SCOPE") + counter = meter.create_counter( + name="operation_latencies", description="A test counter", unit="ms" + ) + + # Add metrics + counter.add(1, attributes=self.metric_attributes) + counter.add(2, attributes=self.metric_attributes) + + # Export metrics + metrics = self.metric_reader.get_metrics_data() + exporter = CloudMonitoringMetricsExporter(PROJECT_ID) + timeseries = exporter._resource_metrics_to_timeseries_pb(metrics) + + # Metris with incorrect sope should be filtered out + self.assertEqual(len(timeseries), 0) + + def test_batch_write(self): + """Verify that writes happen in batches of 200""" + from google.protobuf.timestamp_pb2 import Timestamp + from google.cloud.monitoring_v3 import MetricServiceClient + from google.api.monitored_resource_pb2 import MonitoredResource + from google.api.metric_pb2 import Metric as GMetric + import random + from google.cloud.monitoring_v3 import ( + TimeSeries, + Point, + TimeInterval, + TypedValue, + ) + + mockClient = MagicMock(spec=MetricServiceClient) + mockClient.create_service_time_series = Mock(return_value=None) + exporter = CloudMonitoringMetricsExporter(PROJECT_ID, mockClient) + + # Create timestamps for the time series + start_time = Timestamp() + start_time.FromSeconds(1234567890) + end_time = Timestamp() + end_time.FromSeconds(1234567900) + + # Create test time series + timeseries = [] + for i in range(400): + timeseries.append( + TimeSeries( + metric=GMetric( + type=f"custom.googleapis.com/spanner/test_metric_{i}", + labels={"client_uid": "test-client", "database": "test-db"}, + ), + resource=MonitoredResource( + type="spanner_instance", + labels={ + "project_id": PROJECT_ID, + "instance_id": INSTANCE_ID, + "location": "test-location", + }, + ), + metric_kind=MetricDescriptor.MetricKind.CUMULATIVE, + points=[ + Point( + interval=TimeInterval( + start_time=start_time, end_time=end_time + ), + value=TypedValue(int64_value=random.randint(1, 100)), + ) + ], + ), + ) + + # Define a side effect to extract time series data passed to mocked CreatetimeSeriesRquest + tsr_timeseries = [] + + def create_tsr_side_effect(name, time_series): + nonlocal tsr_timeseries + tsr_timeseries = time_series + + patch_path = "google.cloud.spanner_v1.metrics.metrics_exporter.CreateTimeSeriesRequest" + with patch(patch_path, side_effect=create_tsr_side_effect): + exporter._batch_write(timeseries, 10000) + # Verify that the Create Time Series calls happen in batches of max 200 elements + self.assertTrue(len(tsr_timeseries) > 0 and len(tsr_timeseries) <= 200) + + # Verify the mock was called with the correct arguments + self.assertEqual(len(mockClient.create_service_time_series.mock_calls), 2) + + @patch( + "google.cloud.spanner_v1.metrics.metrics_exporter.HAS_DEPENDENCIES_INSTALLED", + False, + ) + def test_export_early_exit_if_extras_not_installed(self): + """Verify that Export will early exit and return None if OpenTelemetry and/or Google Cloud Monitoring extra modules are not installed.""" + # Suppress expected warning log + with self.assertLogs( + "google.cloud.spanner_v1.metrics.metrics_exporter", level="WARNING" + ) as log: + exporter = CloudMonitoringMetricsExporter(PROJECT_ID) + self.assertFalse(exporter.export([])) + self.assertIn( + "WARNING:google.cloud.spanner_v1.metrics.metrics_exporter:Metric exporter called without dependencies installed.", + log.output, + ) + + def test_export(self): + """Verify that the export call will convert and send the requests out.""" + # Create metric instruments + meter = self.provider.get_meter("gax-python") + counter = meter.create_counter( + name="attempt_count", description="A test counter", unit="count" + ) + latency = meter.create_counter( + name="attempt_latencies", description="test latencies", unit="ms" + ) + + # Add metrics + counter.add(10, attributes=self.metric_attributes) + counter.add(25, attributes=self.metric_attributes) + latency.add(30, attributes=self.metric_attributes) + latency.add(45, attributes=self.metric_attributes) + + # Export metrics + metrics = self.metric_reader.get_metrics_data() + mock_client = Mock() + exporter = CloudMonitoringMetricsExporter(PROJECT_ID, mock_client) + patch_path = "google.cloud.spanner_v1.metrics.metrics_exporter.CloudMonitoringMetricsExporter._batch_write" + with patch(patch_path) as mock_batch_write: + exporter.export(metrics) + + # Verify metrics passed to be sent to Google Cloud Monitoring + mock_batch_write.assert_called_once() + batch_args, _ = mock_batch_write.call_args + timeseries = batch_args[0] + self.assertEqual(len(timeseries), 2) + + def test_force_flush(self): + """Verify that the unimplemented force flush can be called.""" + exporter = CloudMonitoringMetricsExporter(PROJECT_ID) + self.assertTrue(exporter.force_flush()) + + def test_shutdown(self): + """Verify that the unimplemented shutdown can be called.""" + exporter = CloudMonitoringMetricsExporter() + try: + exporter.shutdown() + except Exception as e: + self.fail(f"Shutdown() raised an exception: {e}") + + def test_data_point_to_timeseries_early_exit(self): + """Early exit function if an unknown metric name is supplied.""" + metric = Mock(name="TestMetricName") + self.assertIsNone( + CloudMonitoringMetricsExporter._data_point_to_timeseries_pb( + None, metric, None, None + ) + ) + + @patch( + "google.cloud.spanner_v1.metrics.metrics_exporter.CloudMonitoringMetricsExporter._data_point_to_timeseries_pb" + ) + def test_metrics_to_time_series_empty_input( + self, mocked_data_point_to_timeseries_pb + ): + """Verify that metric entries with no timeseries data do not return a time series entry.""" + exporter = CloudMonitoringMetricsExporter() + data_point = Mock() + metric = Mock(data_points=[data_point]) + scope_metric = Mock( + metrics=[metric], scope=Mock(name="operation_latencies") + ) + resource_metric = Mock(scope_metrics=[scope_metric]) + metrics_data = Mock(resource_metrics=[resource_metric]) + + exporter._resource_metrics_to_timeseries_pb(metrics_data) + + def test_to_point(self): + """Verify conversion of datapoints.""" + exporter = CloudMonitoringMetricsExporter() + + number_point = NumberDataPoint( + attributes=[], start_time_unix_nano=0, time_unix_nano=0, value=9 + ) + + # Test that provided int number point values are set to the converted int data point + converted_num_point = exporter._to_point( + MetricDescriptor.MetricKind.CUMULATIVE, number_point + ) + + self.assertEqual(converted_num_point.value.int64_value, 9) + + # Test that provided float number point values are set to converted double data point + float_number_point = NumberDataPoint( + attributes=[], start_time_unix_nano=0, time_unix_nano=0, value=12.20 + ) + converted_float_num_point = exporter._to_point( + MetricDescriptor.MetricKind.CUMULATIVE, float_number_point + ) + self.assertEqual(converted_float_num_point.value.double_value, 12.20) + + hist_point = HistogramDataPoint( + attributes=[], + start_time_unix_nano=123, + time_unix_nano=456, + count=1, + sum=2, + bucket_counts=[3], + explicit_bounds=[4], + min=5.0, + max=6.0, + ) + + # Test that provided histogram point values are set to the converted data point + converted_hist_point = exporter._to_point( + MetricDescriptor.MetricKind.CUMULATIVE, hist_point + ) + self.assertEqual(converted_hist_point.value.distribution_value.count, 1) + self.assertEqual(converted_hist_point.value.distribution_value.mean, 2) + + hist_point_missing_count = HistogramDataPoint( + attributes=[], + start_time_unix_nano=123, + time_unix_nano=456, + count=None, + sum=2, + bucket_counts=[3], + explicit_bounds=[4], + min=5.0, + max=6.0, + ) + + # Test that histogram points missing a count value has mean defaulted to 0 + # and that non cmulative / delta kinds default to single timepoint interval + converted_hist_point_no_count = exporter._to_point( + MetricDescriptor.MetricKind.METRIC_KIND_UNSPECIFIED, + hist_point_missing_count, + ) + self.assertEqual( + converted_hist_point_no_count.value.distribution_value.mean, 0 + ) + self.assertIsNone(converted_hist_point_no_count.interval.start_time) + self.assertIsNotNone(converted_hist_point_no_count.interval.end_time) From 0887eb43b6ea8bd9076ca81977d1446011335853 Mon Sep 17 00:00:00 2001 From: aakashanandg Date: Thu, 9 Jan 2025 18:04:07 +0530 Subject: [PATCH 11/19] fix: update retry strategy for mutation calls to handle aborted transactions (#1279) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: update retry strategy for mutation calls to handle aborted transactions * test: add mock server test for aborted batch * chore(python): Update the python version in docs presubmit to use 3.10 (#1281) Source-Link: https://github.com/googleapis/synthtool/commit/de3def663b75d8b9ae1e5d548364c960ff13af8f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:a1c5112b81d645f5bbc4d4bbc99d7dcb5089a52216c0e3fb1203a0eeabadd7d5 Co-authored-by: Owl Bot * fix:Refactoring existing retry logic for aborted transactions and clean up redundant code * fix: fixed linting errors * feat: support GRAPH and pipe syntax in dbapi (#1285) Recognize GRAPH and pipe syntax queries as valid queries in dbapi. * chore: Add Custom OpenTelemetry Exporter in for Service Metrics (#1273) * chore: Add Custom OpenTelemetry Exporter in for Service Metrics * Updated copyright dates to 2025 --------- Co-authored-by: rahul2393 * fix: removing retry logic for RST_STREAM errors from _retry_on_aborted_exception handler --------- Co-authored-by: Knut Olav Løite Co-authored-by: gcf-owl-bot[bot] <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Owl Bot Co-authored-by: Lester Szeto Co-authored-by: rahul2393 --- .gitignore | 4 + .../cloud/spanner_dbapi/transaction_helper.py | 2 +- google/cloud/spanner_v1/_helpers.py | 75 +++++++++++++++++++ google/cloud/spanner_v1/batch.py | 16 +++- google/cloud/spanner_v1/database.py | 10 ++- google/cloud/spanner_v1/session.py | 58 +------------- .../cloud/spanner_v1/testing/mock_spanner.py | 17 ++++- .../test_aborted_transaction.py | 24 ++++++ tests/unit/test__helpers.py | 60 +++++++++++++++ tests/unit/test_batch.py | 36 +++++++++ tests/unit/test_database.py | 13 ++-- tests/unit/test_session.py | 4 +- 12 files changed, 247 insertions(+), 72 deletions(-) diff --git a/.gitignore b/.gitignore index d083ea1ddc..4797754726 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,7 @@ system_tests/local_test_setup # Make sure a generated file isn't accidentally committed. pylintrc pylintrc.test + + +# Ignore coverage files +.coverage* diff --git a/google/cloud/spanner_dbapi/transaction_helper.py b/google/cloud/spanner_dbapi/transaction_helper.py index bc896009c7..f8f5bfa584 100644 --- a/google/cloud/spanner_dbapi/transaction_helper.py +++ b/google/cloud/spanner_dbapi/transaction_helper.py @@ -20,7 +20,7 @@ from google.cloud.spanner_dbapi.batch_dml_executor import BatchMode from google.cloud.spanner_dbapi.exceptions import RetryAborted -from google.cloud.spanner_v1.session import _get_retry_delay +from google.cloud.spanner_v1._helpers import _get_retry_delay if TYPE_CHECKING: from google.cloud.spanner_dbapi import Connection, Cursor diff --git a/google/cloud/spanner_v1/_helpers.py b/google/cloud/spanner_v1/_helpers.py index 1f4bf5b174..27e53200ed 100644 --- a/google/cloud/spanner_v1/_helpers.py +++ b/google/cloud/spanner_v1/_helpers.py @@ -27,11 +27,15 @@ from google.protobuf.internal.enum_type_wrapper import EnumTypeWrapper from google.api_core import datetime_helpers +from google.api_core.exceptions import Aborted from google.cloud._helpers import _date_from_iso8601_date from google.cloud.spanner_v1 import TypeCode from google.cloud.spanner_v1 import ExecuteSqlRequest from google.cloud.spanner_v1 import JsonObject from google.cloud.spanner_v1.request_id_header import with_request_id +from google.rpc.error_details_pb2 import RetryInfo + +import random # Validation error messages NUMERIC_MAX_SCALE_ERR_MSG = ( @@ -460,6 +464,23 @@ def _metadata_with_prefix(prefix, **kw): return [("google-cloud-resource-prefix", prefix)] +def _retry_on_aborted_exception( + func, + deadline, +): + """ + Handles retry logic for Aborted exceptions, considering the deadline. + """ + attempts = 0 + while True: + try: + attempts += 1 + return func() + except Aborted as exc: + _delay_until_retry(exc, deadline=deadline, attempts=attempts) + continue + + def _retry( func, retry_count=5, @@ -529,6 +550,60 @@ def _metadata_with_leader_aware_routing(value, **kw): return ("x-goog-spanner-route-to-leader", str(value).lower()) +def _delay_until_retry(exc, deadline, attempts): + """Helper for :meth:`Session.run_in_transaction`. + + Detect retryable abort, and impose server-supplied delay. + + :type exc: :class:`google.api_core.exceptions.Aborted` + :param exc: exception for aborted transaction + + :type deadline: float + :param deadline: maximum timestamp to continue retrying the transaction. + + :type attempts: int + :param attempts: number of call retries + """ + + cause = exc.errors[0] + now = time.time() + if now >= deadline: + raise + + delay = _get_retry_delay(cause, attempts) + if delay is not None: + if now + delay > deadline: + raise + + time.sleep(delay) + + +def _get_retry_delay(cause, attempts): + """Helper for :func:`_delay_until_retry`. + + :type exc: :class:`grpc.Call` + :param exc: exception for aborted transaction + + :rtype: float + :returns: seconds to wait before retrying the transaction. + + :type attempts: int + :param attempts: number of call retries + """ + if hasattr(cause, "trailing_metadata"): + metadata = dict(cause.trailing_metadata()) + else: + metadata = {} + retry_info_pb = metadata.get("google.rpc.retryinfo-bin") + if retry_info_pb is not None: + retry_info = RetryInfo() + retry_info.ParseFromString(retry_info_pb) + nanos = retry_info.retry_delay.nanos + return retry_info.retry_delay.seconds + nanos / 1.0e9 + + return 2**attempts + random.random() + + class AtomicCounter: def __init__(self, start_value=0): self.__lock = threading.Lock() diff --git a/google/cloud/spanner_v1/batch.py b/google/cloud/spanner_v1/batch.py index 8d62ac0883..3e61872368 100644 --- a/google/cloud/spanner_v1/batch.py +++ b/google/cloud/spanner_v1/batch.py @@ -29,8 +29,12 @@ from google.cloud.spanner_v1._opentelemetry_tracing import trace_call from google.cloud.spanner_v1 import RequestOptions from google.cloud.spanner_v1._helpers import _retry +from google.cloud.spanner_v1._helpers import _retry_on_aborted_exception from google.cloud.spanner_v1._helpers import _check_rst_stream_error from google.api_core.exceptions import InternalServerError +import time + +DEFAULT_RETRY_TIMEOUT_SECS = 30 class _BatchBase(_SessionWrapper): @@ -162,6 +166,7 @@ def commit( request_options=None, max_commit_delay=None, exclude_txn_from_change_streams=False, + **kwargs, ): """Commit mutations to the database. @@ -227,9 +232,12 @@ def commit( request=request, metadata=metadata, ) - response = _retry( + deadline = time.time() + kwargs.get( + "timeout_secs", DEFAULT_RETRY_TIMEOUT_SECS + ) + response = _retry_on_aborted_exception( method, - allowed_exceptions={InternalServerError: _check_rst_stream_error}, + deadline=deadline, ) self.committed = response.commit_timestamp self.commit_stats = response.commit_stats @@ -348,7 +356,9 @@ def batch_write(self, request_options=None, exclude_txn_from_change_streams=Fals ) response = _retry( method, - allowed_exceptions={InternalServerError: _check_rst_stream_error}, + allowed_exceptions={ + InternalServerError: _check_rst_stream_error, + }, ) self.committed = True return response diff --git a/google/cloud/spanner_v1/database.py b/google/cloud/spanner_v1/database.py index 88d2bb60f7..8c28cda7ce 100644 --- a/google/cloud/spanner_v1/database.py +++ b/google/cloud/spanner_v1/database.py @@ -775,6 +775,7 @@ def batch( request_options=None, max_commit_delay=None, exclude_txn_from_change_streams=False, + **kw, ): """Return an object which wraps a batch. @@ -805,7 +806,11 @@ def batch( :returns: new wrapper """ return BatchCheckout( - self, request_options, max_commit_delay, exclude_txn_from_change_streams + self, + request_options, + max_commit_delay, + exclude_txn_from_change_streams, + **kw, ) def mutation_groups(self): @@ -1166,6 +1171,7 @@ def __init__( request_options=None, max_commit_delay=None, exclude_txn_from_change_streams=False, + **kw, ): self._database = database self._session = self._batch = None @@ -1177,6 +1183,7 @@ def __init__( self._request_options = request_options self._max_commit_delay = max_commit_delay self._exclude_txn_from_change_streams = exclude_txn_from_change_streams + self._kw = kw def __enter__(self): """Begin ``with`` block.""" @@ -1197,6 +1204,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): request_options=self._request_options, max_commit_delay=self._max_commit_delay, exclude_txn_from_change_streams=self._exclude_txn_from_change_streams, + **self._kw, ) finally: if self._database.log_commit_stats and self._batch.commit_stats: diff --git a/google/cloud/spanner_v1/session.py b/google/cloud/spanner_v1/session.py index d73a8cc2b5..ccc0c4ebdc 100644 --- a/google/cloud/spanner_v1/session.py +++ b/google/cloud/spanner_v1/session.py @@ -15,7 +15,6 @@ """Wrapper for Cloud Spanner Session objects.""" from functools import total_ordering -import random import time from datetime import datetime @@ -23,7 +22,8 @@ from google.api_core.exceptions import GoogleAPICallError from google.api_core.exceptions import NotFound from google.api_core.gapic_v1 import method -from google.rpc.error_details_pb2 import RetryInfo +from google.cloud.spanner_v1._helpers import _delay_until_retry +from google.cloud.spanner_v1._helpers import _get_retry_delay from google.cloud.spanner_v1 import ExecuteSqlRequest from google.cloud.spanner_v1 import CreateSessionRequest @@ -554,57 +554,3 @@ def run_in_transaction(self, func, *args, **kw): extra={"commit_stats": txn.commit_stats}, ) return return_value - - -# Rational: this function factors out complex shared deadline / retry -# handling from two `except:` clauses. -def _delay_until_retry(exc, deadline, attempts): - """Helper for :meth:`Session.run_in_transaction`. - - Detect retryable abort, and impose server-supplied delay. - - :type exc: :class:`google.api_core.exceptions.Aborted` - :param exc: exception for aborted transaction - - :type deadline: float - :param deadline: maximum timestamp to continue retrying the transaction. - - :type attempts: int - :param attempts: number of call retries - """ - cause = exc.errors[0] - - now = time.time() - - if now >= deadline: - raise - - delay = _get_retry_delay(cause, attempts) - if delay is not None: - if now + delay > deadline: - raise - - time.sleep(delay) - - -def _get_retry_delay(cause, attempts): - """Helper for :func:`_delay_until_retry`. - - :type exc: :class:`grpc.Call` - :param exc: exception for aborted transaction - - :rtype: float - :returns: seconds to wait before retrying the transaction. - - :type attempts: int - :param attempts: number of call retries - """ - metadata = dict(cause.trailing_metadata()) - retry_info_pb = metadata.get("google.rpc.retryinfo-bin") - if retry_info_pb is not None: - retry_info = RetryInfo() - retry_info.ParseFromString(retry_info_pb) - nanos = retry_info.retry_delay.nanos - return retry_info.retry_delay.seconds + nanos / 1.0e9 - - return 2**attempts + random.random() diff --git a/google/cloud/spanner_v1/testing/mock_spanner.py b/google/cloud/spanner_v1/testing/mock_spanner.py index 6b50d9a6d1..f60dbbe72a 100644 --- a/google/cloud/spanner_v1/testing/mock_spanner.py +++ b/google/cloud/spanner_v1/testing/mock_spanner.py @@ -213,10 +213,19 @@ def __create_transaction( def Commit(self, request, context): self._requests.append(request) self.mock_spanner.pop_error(context) - tx = self.transactions[request.transaction_id] - if tx is None: - raise ValueError(f"Transaction not found: {request.transaction_id}") - del self.transactions[request.transaction_id] + if not request.transaction_id == b"": + tx = self.transactions[request.transaction_id] + if tx is None: + raise ValueError(f"Transaction not found: {request.transaction_id}") + tx_id = request.transaction_id + elif not request.single_use_transaction == TransactionOptions(): + tx = self.__create_transaction( + request.session, request.single_use_transaction + ) + tx_id = tx.id + else: + raise ValueError("Unsupported transaction type") + del self.transactions[tx_id] return commit.CommitResponse() def Rollback(self, request, context): diff --git a/tests/mockserver_tests/test_aborted_transaction.py b/tests/mockserver_tests/test_aborted_transaction.py index 89b30a0875..93eb42fe39 100644 --- a/tests/mockserver_tests/test_aborted_transaction.py +++ b/tests/mockserver_tests/test_aborted_transaction.py @@ -95,6 +95,30 @@ def test_run_in_transaction_batch_dml_aborted(self): self.assertTrue(isinstance(requests[2], ExecuteBatchDmlRequest)) self.assertTrue(isinstance(requests[3], CommitRequest)) + def test_batch_commit_aborted(self): + # Add an Aborted error for the Commit method on the mock server. + add_error(SpannerServicer.Commit.__name__, aborted_status()) + with self.database.batch() as batch: + batch.insert( + table="Singers", + columns=("SingerId", "FirstName", "LastName"), + values=[ + (1, "Marc", "Richards"), + (2, "Catalina", "Smith"), + (3, "Alice", "Trentor"), + (4, "Lea", "Martin"), + (5, "David", "Lomond"), + ], + ) + + # Verify that the transaction was retried. + requests = self.spanner_service.requests + self.assertEqual(3, len(requests), msg=requests) + self.assertTrue(isinstance(requests[0], BatchCreateSessionsRequest)) + self.assertTrue(isinstance(requests[1], CommitRequest)) + # The transaction is aborted and retried. + self.assertTrue(isinstance(requests[2], CommitRequest)) + def _insert_mutations(transaction: Transaction): transaction.insert("my_table", ["col1", "col2"], ["value1", "value2"]) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index e62bff2a2e..ecc8018648 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -882,6 +882,66 @@ def test_check_rst_stream_error(self): self.assertEqual(test_api.test_fxn.call_count, 3) + def test_retry_on_aborted_exception_with_success_after_first_aborted_retry(self): + from google.api_core.exceptions import Aborted + import time + from google.cloud.spanner_v1._helpers import _retry_on_aborted_exception + import functools + + test_api = mock.create_autospec(self.test_class) + test_api.test_fxn.side_effect = [ + Aborted("aborted exception", errors=("Aborted error")), + "true", + ] + deadline = time.time() + 30 + result_after_retry = _retry_on_aborted_exception( + functools.partial(test_api.test_fxn), deadline + ) + + self.assertEqual(test_api.test_fxn.call_count, 2) + self.assertTrue(result_after_retry) + + def test_retry_on_aborted_exception_with_success_after_three_retries(self): + from google.api_core.exceptions import Aborted + import time + from google.cloud.spanner_v1._helpers import _retry_on_aborted_exception + import functools + + test_api = mock.create_autospec(self.test_class) + # Case where aborted exception is thrown after other generic exceptions + test_api.test_fxn.side_effect = [ + Aborted("aborted exception", errors=("Aborted error")), + Aborted("aborted exception", errors=("Aborted error")), + Aborted("aborted exception", errors=("Aborted error")), + "true", + ] + deadline = time.time() + 30 + _retry_on_aborted_exception( + functools.partial(test_api.test_fxn), + deadline=deadline, + ) + + self.assertEqual(test_api.test_fxn.call_count, 4) + + def test_retry_on_aborted_exception_raises_aborted_if_deadline_expires(self): + from google.api_core.exceptions import Aborted + import time + from google.cloud.spanner_v1._helpers import _retry_on_aborted_exception + import functools + + test_api = mock.create_autospec(self.test_class) + test_api.test_fxn.side_effect = [ + Aborted("aborted exception", errors=("Aborted error")), + "true", + ] + deadline = time.time() + 0.1 + with self.assertRaises(Aborted): + _retry_on_aborted_exception( + functools.partial(test_api.test_fxn), deadline=deadline + ) + + self.assertEqual(test_api.test_fxn.call_count, 1) + class Test_metadata_with_leader_aware_routing(unittest.TestCase): def _call_fut(self, *args, **kw): diff --git a/tests/unit/test_batch.py b/tests/unit/test_batch.py index a43678f3b9..738bce9529 100644 --- a/tests/unit/test_batch.py +++ b/tests/unit/test_batch.py @@ -14,6 +14,7 @@ import unittest +from unittest.mock import MagicMock from tests._helpers import ( OpenTelemetryBase, StatusCode, @@ -265,6 +266,37 @@ def test_commit_ok(self): attributes=dict(BASE_ATTRIBUTES, num_mutations=1), ) + def test_aborted_exception_on_commit_with_retries(self): + # Test case to verify that an Aborted exception is raised when + # batch.commit() is called and the transaction is aborted internally. + from google.api_core.exceptions import Aborted + + database = _Database() + # Setup the spanner API which throws Aborted exception when calling commit API. + api = database.spanner_api = _FauxSpannerAPI(_aborted_error=True) + api.commit = MagicMock( + side_effect=Aborted("Transaction was aborted", errors=("Aborted error")) + ) + + # Create mock session and batch objects + session = _Session(database) + batch = self._make_one(session) + batch.insert(TABLE_NAME, COLUMNS, VALUES) + + # Assertion: Ensure that calling batch.commit() raises the Aborted exception + with self.assertRaises(Aborted) as context: + batch.commit() + + # Verify additional details about the exception + self.assertEqual(str(context.exception), "409 Transaction was aborted") + self.assertGreater( + api.commit.call_count, 1, "commit should be called more than once" + ) + # Since we are using exponential backoff here and default timeout is set to 30 sec 2^x <= 30. So value for x will be 4 + self.assertEqual( + api.commit.call_count, 4, "commit should be called exactly 4 times" + ) + def _test_commit_with_options( self, request_options=None, @@ -630,6 +662,7 @@ class _FauxSpannerAPI: _committed = None _batch_request = None _rpc_error = False + _aborted_error = False def __init__(self, **kwargs): self.__dict__.update(**kwargs) @@ -640,6 +673,7 @@ def commit( metadata=None, ): from google.api_core.exceptions import Unknown + from google.api_core.exceptions import Aborted max_commit_delay = None if type(request).pb(request).HasField("max_commit_delay"): @@ -656,6 +690,8 @@ def commit( ) if self._rpc_error: raise Unknown("error") + if self._aborted_error: + raise Aborted("Transaction was aborted", errors=("Aborted error")) return self._commit_response def batch_write( diff --git a/tests/unit/test_database.py b/tests/unit/test_database.py index 6e29255fb7..13a37f66fe 100644 --- a/tests/unit/test_database.py +++ b/tests/unit/test_database.py @@ -1899,8 +1899,8 @@ def test_context_mgr_w_commit_stats_success(self): "CommitStats: mutation_count: 4\n", extra={"commit_stats": commit_stats} ) - def test_context_mgr_w_commit_stats_error(self): - from google.api_core.exceptions import Unknown + def test_context_mgr_w_aborted_commit_status(self): + from google.api_core.exceptions import Aborted from google.cloud.spanner_v1 import CommitRequest from google.cloud.spanner_v1 import TransactionOptions from google.cloud.spanner_v1.batch import Batch @@ -1908,13 +1908,13 @@ def test_context_mgr_w_commit_stats_error(self): database = _Database(self.DATABASE_NAME) database.log_commit_stats = True api = database.spanner_api = self._make_spanner_client() - api.commit.side_effect = Unknown("testing") + api.commit.side_effect = Aborted("aborted exception", errors=("Aborted error")) pool = database._pool = _Pool() session = _Session(database) pool.put(session) checkout = self._make_one(database) - with self.assertRaises(Unknown): + with self.assertRaises(Aborted): with checkout as batch: self.assertIsNone(pool._session) self.assertIsInstance(batch, Batch) @@ -1931,7 +1931,10 @@ def test_context_mgr_w_commit_stats_error(self): return_commit_stats=True, request_options=RequestOptions(), ) - api.commit.assert_called_once_with( + # Asserts that the exponential backoff retry for aborted transactions with a 30-second deadline + # allows for a maximum of 4 retries (2^x <= 30) to stay within the time limit. + self.assertEqual(api.commit.call_count, 4) + api.commit.assert_any_call( request=request, metadata=[ ("google-cloud-resource-prefix", database.name), diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index 0d60e98cd0..55c91435f8 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -1911,7 +1911,7 @@ def unit_of_work(txn, *args, **kw): ) def test_delay_helper_w_no_delay(self): - from google.cloud.spanner_v1.session import _delay_until_retry + from google.cloud.spanner_v1._helpers import _delay_until_retry metadata_mock = mock.Mock() metadata_mock.trailing_metadata.return_value = {} @@ -1928,7 +1928,7 @@ def _time_func(): with mock.patch("time.time", _time_func): with mock.patch( - "google.cloud.spanner_v1.session._get_retry_delay" + "google.cloud.spanner_v1._helpers._get_retry_delay" ) as get_retry_delay_mock: with mock.patch("time.sleep") as sleep_mock: get_retry_delay_mock.return_value = None From 592047ffe858164b93d1a2f6c2fca6f7d3b9dbef Mon Sep 17 00:00:00 2001 From: Emmanuel T Odeke Date: Fri, 10 Jan 2025 03:39:10 -0800 Subject: [PATCH 12/19] observability: PDML + some batch write spans (#1274) * observability: PDML + some batch write spans This change adds spans for Partitioned DML and making updates for Batch. Carved out from PR #1241. * Add more system tests * Account for lack of OpenTelemetry on Python-3.7 * Update tests * Fix more test assertions * Updates from code review * Update tests with code review suggestions * Remove return per code review nit --- google/cloud/spanner_v1/batch.py | 2 +- google/cloud/spanner_v1/database.py | 222 +++++++++++-------- google/cloud/spanner_v1/merged_result_set.py | 12 + google/cloud/spanner_v1/pool.py | 29 +-- google/cloud/spanner_v1/snapshot.py | 8 +- tests/_helpers.py | 19 +- tests/system/test_observability_options.py | 167 ++++++++++---- tests/system/test_session_api.py | 66 ++++-- tests/unit/test_batch.py | 6 +- tests/unit/test_pool.py | 6 +- tests/unit/test_snapshot.py | 27 ++- 11 files changed, 370 insertions(+), 194 deletions(-) diff --git a/google/cloud/spanner_v1/batch.py b/google/cloud/spanner_v1/batch.py index 3e61872368..6a9f1f48f5 100644 --- a/google/cloud/spanner_v1/batch.py +++ b/google/cloud/spanner_v1/batch.py @@ -344,7 +344,7 @@ def batch_write(self, request_options=None, exclude_txn_from_change_streams=Fals ) observability_options = getattr(database, "observability_options", None) with trace_call( - "CloudSpanner.BatchWrite", + "CloudSpanner.batch_write", self._session, trace_attributes, observability_options=observability_options, diff --git a/google/cloud/spanner_v1/database.py b/google/cloud/spanner_v1/database.py index 8c28cda7ce..963debdab8 100644 --- a/google/cloud/spanner_v1/database.py +++ b/google/cloud/spanner_v1/database.py @@ -699,38 +699,43 @@ def execute_partitioned_dml( ) def execute_pdml(): - with SessionCheckout(self._pool) as session: - txn = api.begin_transaction( - session=session.name, options=txn_options, metadata=metadata - ) + with trace_call( + "CloudSpanner.Database.execute_partitioned_pdml", + observability_options=self.observability_options, + ) as span: + with SessionCheckout(self._pool) as session: + add_span_event(span, "Starting BeginTransaction") + txn = api.begin_transaction( + session=session.name, options=txn_options, metadata=metadata + ) - txn_selector = TransactionSelector(id=txn.id) + txn_selector = TransactionSelector(id=txn.id) - request = ExecuteSqlRequest( - session=session.name, - sql=dml, - params=params_pb, - param_types=param_types, - query_options=query_options, - request_options=request_options, - ) - method = functools.partial( - api.execute_streaming_sql, - metadata=metadata, - ) + request = ExecuteSqlRequest( + session=session.name, + sql=dml, + params=params_pb, + param_types=param_types, + query_options=query_options, + request_options=request_options, + ) + method = functools.partial( + api.execute_streaming_sql, + metadata=metadata, + ) - iterator = _restart_on_unavailable( - method=method, - trace_name="CloudSpanner.ExecuteStreamingSql", - request=request, - transaction_selector=txn_selector, - observability_options=self.observability_options, - ) + iterator = _restart_on_unavailable( + method=method, + trace_name="CloudSpanner.ExecuteStreamingSql", + request=request, + transaction_selector=txn_selector, + observability_options=self.observability_options, + ) - result_set = StreamedResultSet(iterator) - list(result_set) # consume all partials + result_set = StreamedResultSet(iterator) + list(result_set) # consume all partials - return result_set.stats.row_count_lower_bound + return result_set.stats.row_count_lower_bound return _retry_on_aborted(execute_pdml, DEFAULT_RETRY_BACKOFF)() @@ -1357,6 +1362,10 @@ def to_dict(self): "transaction_id": snapshot._transaction_id, } + @property + def observability_options(self): + return getattr(self._database, "observability_options", {}) + def _get_session(self): """Create session as needed. @@ -1476,27 +1485,32 @@ def generate_read_batches( mappings of information used perform actual partitioned reads via :meth:`process_read_batch`. """ - partitions = self._get_snapshot().partition_read( - table=table, - columns=columns, - keyset=keyset, - index=index, - partition_size_bytes=partition_size_bytes, - max_partitions=max_partitions, - retry=retry, - timeout=timeout, - ) + with trace_call( + f"CloudSpanner.{type(self).__name__}.generate_read_batches", + extra_attributes=dict(table=table, columns=columns), + observability_options=self.observability_options, + ): + partitions = self._get_snapshot().partition_read( + table=table, + columns=columns, + keyset=keyset, + index=index, + partition_size_bytes=partition_size_bytes, + max_partitions=max_partitions, + retry=retry, + timeout=timeout, + ) - read_info = { - "table": table, - "columns": columns, - "keyset": keyset._to_dict(), - "index": index, - "data_boost_enabled": data_boost_enabled, - "directed_read_options": directed_read_options, - } - for partition in partitions: - yield {"partition": partition, "read": read_info.copy()} + read_info = { + "table": table, + "columns": columns, + "keyset": keyset._to_dict(), + "index": index, + "data_boost_enabled": data_boost_enabled, + "directed_read_options": directed_read_options, + } + for partition in partitions: + yield {"partition": partition, "read": read_info.copy()} def process_read_batch( self, @@ -1522,12 +1536,17 @@ def process_read_batch( :rtype: :class:`~google.cloud.spanner_v1.streamed.StreamedResultSet` :returns: a result set instance which can be used to consume rows. """ - kwargs = copy.deepcopy(batch["read"]) - keyset_dict = kwargs.pop("keyset") - kwargs["keyset"] = KeySet._from_dict(keyset_dict) - return self._get_snapshot().read( - partition=batch["partition"], **kwargs, retry=retry, timeout=timeout - ) + observability_options = self.observability_options + with trace_call( + f"CloudSpanner.{type(self).__name__}.process_read_batch", + observability_options=observability_options, + ): + kwargs = copy.deepcopy(batch["read"]) + keyset_dict = kwargs.pop("keyset") + kwargs["keyset"] = KeySet._from_dict(keyset_dict) + return self._get_snapshot().read( + partition=batch["partition"], **kwargs, retry=retry, timeout=timeout + ) def generate_query_batches( self, @@ -1602,34 +1621,39 @@ def generate_query_batches( mappings of information used perform actual partitioned reads via :meth:`process_read_batch`. """ - partitions = self._get_snapshot().partition_query( - sql=sql, - params=params, - param_types=param_types, - partition_size_bytes=partition_size_bytes, - max_partitions=max_partitions, - retry=retry, - timeout=timeout, - ) + with trace_call( + f"CloudSpanner.{type(self).__name__}.generate_query_batches", + extra_attributes=dict(sql=sql), + observability_options=self.observability_options, + ): + partitions = self._get_snapshot().partition_query( + sql=sql, + params=params, + param_types=param_types, + partition_size_bytes=partition_size_bytes, + max_partitions=max_partitions, + retry=retry, + timeout=timeout, + ) - query_info = { - "sql": sql, - "data_boost_enabled": data_boost_enabled, - "directed_read_options": directed_read_options, - } - if params: - query_info["params"] = params - query_info["param_types"] = param_types - - # Query-level options have higher precedence than client-level and - # environment-level options - default_query_options = self._database._instance._client._query_options - query_info["query_options"] = _merge_query_options( - default_query_options, query_options - ) + query_info = { + "sql": sql, + "data_boost_enabled": data_boost_enabled, + "directed_read_options": directed_read_options, + } + if params: + query_info["params"] = params + query_info["param_types"] = param_types + + # Query-level options have higher precedence than client-level and + # environment-level options + default_query_options = self._database._instance._client._query_options + query_info["query_options"] = _merge_query_options( + default_query_options, query_options + ) - for partition in partitions: - yield {"partition": partition, "query": query_info} + for partition in partitions: + yield {"partition": partition, "query": query_info} def process_query_batch( self, @@ -1654,9 +1678,16 @@ def process_query_batch( :rtype: :class:`~google.cloud.spanner_v1.streamed.StreamedResultSet` :returns: a result set instance which can be used to consume rows. """ - return self._get_snapshot().execute_sql( - partition=batch["partition"], **batch["query"], retry=retry, timeout=timeout - ) + with trace_call( + f"CloudSpanner.{type(self).__name__}.process_query_batch", + observability_options=self.observability_options, + ): + return self._get_snapshot().execute_sql( + partition=batch["partition"], + **batch["query"], + retry=retry, + timeout=timeout, + ) def run_partitioned_query( self, @@ -1711,18 +1742,23 @@ def run_partitioned_query( :rtype: :class:`~google.cloud.spanner_v1.merged_result_set.MergedResultSet` :returns: a result set instance which can be used to consume rows. """ - partitions = list( - self.generate_query_batches( - sql, - params, - param_types, - partition_size_bytes, - max_partitions, - query_options, - data_boost_enabled, + with trace_call( + f"CloudSpanner.${type(self).__name__}.run_partitioned_query", + extra_attributes=dict(sql=sql), + observability_options=self.observability_options, + ): + partitions = list( + self.generate_query_batches( + sql, + params, + param_types, + partition_size_bytes, + max_partitions, + query_options, + data_boost_enabled, + ) ) - ) - return MergedResultSet(self, partitions, 0) + return MergedResultSet(self, partitions, 0) def process(self, batch): """Process a single, partitioned query or read. diff --git a/google/cloud/spanner_v1/merged_result_set.py b/google/cloud/spanner_v1/merged_result_set.py index 9165af9ee3..bfecad1e46 100644 --- a/google/cloud/spanner_v1/merged_result_set.py +++ b/google/cloud/spanner_v1/merged_result_set.py @@ -17,6 +17,8 @@ from typing import Any, TYPE_CHECKING from threading import Lock, Event +from google.cloud.spanner_v1._opentelemetry_tracing import trace_call + if TYPE_CHECKING: from google.cloud.spanner_v1.database import BatchSnapshot @@ -37,6 +39,16 @@ def __init__(self, batch_snapshot, partition_id, merged_result_set): self._queue: Queue[PartitionExecutorResult] = merged_result_set._queue def run(self): + observability_options = getattr( + self._batch_snapshot, "observability_options", {} + ) + with trace_call( + "CloudSpanner.PartitionExecutor.run", + observability_options=observability_options, + ): + self.__run() + + def __run(self): results = None try: results = self._batch_snapshot.process_query_batch(self._partition_id) diff --git a/google/cloud/spanner_v1/pool.py b/google/cloud/spanner_v1/pool.py index 03bff81b52..596f76a1f1 100644 --- a/google/cloud/spanner_v1/pool.py +++ b/google/cloud/spanner_v1/pool.py @@ -523,12 +523,11 @@ def bind(self, database): metadata.append( _metadata_with_leader_aware_routing(database._route_to_leader_enabled) ) - created_session_count = 0 self._database_role = self._database_role or self._database.database_role request = BatchCreateSessionsRequest( database=database.name, - session_count=self.size - created_session_count, + session_count=self.size, session_template=Session(creator_role=self.database_role), ) @@ -549,38 +548,28 @@ def bind(self, database): span_event_attributes, ) - if created_session_count >= self.size: - add_span_event( - current_span, - "Created no new sessions as sessionPool is full", - span_event_attributes, - ) - return - - add_span_event( - current_span, - f"Creating {request.session_count} sessions", - span_event_attributes, - ) - observability_options = getattr(self._database, "observability_options", None) with trace_call( "CloudSpanner.PingingPool.BatchCreateSessions", observability_options=observability_options, ) as span: returned_session_count = 0 - while created_session_count < self.size: + while returned_session_count < self.size: resp = api.batch_create_sessions( request=request, metadata=metadata, ) + + add_span_event( + span, + f"Created {len(resp.session)} sessions", + ) + for session_pb in resp.session: session = self._new_session() + returned_session_count += 1 session._session_id = session_pb.name.split("/")[-1] self.put(session) - returned_session_count += 1 - - created_session_count += len(resp.session) add_span_event( span, diff --git a/google/cloud/spanner_v1/snapshot.py b/google/cloud/spanner_v1/snapshot.py index de610e1387..dc28644d6c 100644 --- a/google/cloud/spanner_v1/snapshot.py +++ b/google/cloud/spanner_v1/snapshot.py @@ -680,10 +680,14 @@ def partition_read( ) trace_attributes = {"table_id": table, "columns": columns} + can_include_index = (index != "") and (index is not None) + if can_include_index: + trace_attributes["index"] = index + with trace_call( f"CloudSpanner.{type(self).__name__}.partition_read", self._session, - trace_attributes, + extra_attributes=trace_attributes, observability_options=getattr(database, "observability_options", None), ): method = functools.partial( @@ -784,7 +788,7 @@ def partition_query( trace_attributes = {"db.statement": sql} with trace_call( - "CloudSpanner.PartitionReadWriteTransaction", + f"CloudSpanner.{type(self).__name__}.partition_query", self._session, trace_attributes, observability_options=getattr(database, "observability_options", None), diff --git a/tests/_helpers.py b/tests/_helpers.py index c7b1665e89..667f9f8be1 100644 --- a/tests/_helpers.py +++ b/tests/_helpers.py @@ -86,7 +86,7 @@ def assertSpanAttributes( ): if HAS_OPENTELEMETRY_INSTALLED: if not span: - span_list = self.ot_exporter.get_finished_spans() + span_list = self.get_finished_spans() self.assertEqual(len(span_list) > 0, True) span = span_list[0] @@ -132,3 +132,20 @@ def get_finished_spans(self): def reset(self): self.tearDown() + + def finished_spans_events_statuses(self): + span_list = self.get_finished_spans() + # Some event attributes are noisy/highly ephemeral + # and can't be directly compared against. + got_all_events = [] + imprecise_event_attributes = ["exception.stacktrace", "delay_seconds", "cause"] + for span in span_list: + for event in span.events: + evt_attributes = event.attributes.copy() + for attr_name in imprecise_event_attributes: + if attr_name in evt_attributes: + evt_attributes[attr_name] = "EPHEMERAL" + + got_all_events.append((event.name, evt_attributes)) + + return got_all_events diff --git a/tests/system/test_observability_options.py b/tests/system/test_observability_options.py index 42ce0de7fe..a91955496f 100644 --- a/tests/system/test_observability_options.py +++ b/tests/system/test_observability_options.py @@ -16,6 +16,9 @@ from . import _helpers from google.cloud.spanner_v1 import Client +from google.api_core.exceptions import Aborted +from google.auth.credentials import AnonymousCredentials +from google.rpc import code_pb2 HAS_OTEL_INSTALLED = False @@ -37,7 +40,7 @@ not HAS_OTEL_INSTALLED, reason="OpenTelemetry is necessary to test traces." ) @pytest.mark.skipif( - not _helpers.USE_EMULATOR, reason="mulator is necessary to test traces." + not _helpers.USE_EMULATOR, reason="Emulator is necessary to test traces." ) def test_observability_options_propagation(): PROJECT = _helpers.EMULATOR_PROJECT @@ -97,7 +100,8 @@ def test_propagation(enable_extended_tracing): _ = val from_global_spans = global_trace_exporter.get_finished_spans() - from_inject_spans = inject_trace_exporter.get_finished_spans() + target_spans = inject_trace_exporter.get_finished_spans() + from_inject_spans = sorted(target_spans, key=lambda v1: v1.start_time) assert ( len(from_global_spans) == 0 ) # "Expecting no spans from the global trace exporter" @@ -131,23 +135,11 @@ def test_propagation(enable_extended_tracing): test_propagation(False) -@pytest.mark.skipif( - not _helpers.USE_EMULATOR, - reason="Emulator needed to run this tests", -) -@pytest.mark.skipif( - not HAS_OTEL_INSTALLED, - reason="Tracing requires OpenTelemetry", -) -def test_transaction_abort_then_retry_spans(): - from google.auth.credentials import AnonymousCredentials - from google.api_core.exceptions import Aborted - from google.rpc import code_pb2 +def create_db_trace_exporter(): from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( InMemorySpanExporter, ) - from opentelemetry.trace.status import StatusCode from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.sampling import ALWAYS_ON @@ -159,20 +151,6 @@ def test_transaction_abort_then_retry_spans(): NODE_COUNT = 5 LABELS = {"test": "true"} - counters = dict(aborted=0) - - def select_in_txn(txn): - results = txn.execute_sql("SELECT 1") - for row in results: - _ = row - - if counters["aborted"] == 0: - counters["aborted"] = 1 - raise Aborted( - "Thrown from ClientInterceptor for testing", - errors=[_helpers.FauxCall(code_pb2.ABORTED)], - ) - tracer_provider = TracerProvider(sampler=ALWAYS_ON) trace_exporter = InMemorySpanExporter() tracer_provider.add_span_processor(SimpleSpanProcessor(trace_exporter)) @@ -206,22 +184,72 @@ def select_in_txn(txn): except Exception: pass + return db, trace_exporter + + +@pytest.mark.skipif( + not _helpers.USE_EMULATOR, + reason="Emulator needed to run this test", +) +@pytest.mark.skipif( + not HAS_OTEL_INSTALLED, + reason="Tracing requires OpenTelemetry", +) +def test_transaction_abort_then_retry_spans(): + from opentelemetry.trace.status import StatusCode + + db, trace_exporter = create_db_trace_exporter() + + counters = dict(aborted=0) + + def select_in_txn(txn): + results = txn.execute_sql("SELECT 1") + for row in results: + _ = row + + if counters["aborted"] == 0: + counters["aborted"] = 1 + raise Aborted( + "Thrown from ClientInterceptor for testing", + errors=[_helpers.FauxCall(code_pb2.ABORTED)], + ) + db.run_in_transaction(select_in_txn) + got_statuses, got_events = finished_spans_statuses(trace_exporter) + + # Check for the series of events + want_events = [ + ("Acquiring session", {"kind": "BurstyPool"}), + ("Waiting for a session to become available", {"kind": "BurstyPool"}), + ("No sessions available in pool. Creating session", {"kind": "BurstyPool"}), + ("Creating Session", {}), + ( + "Transaction was aborted in user operation, retrying", + {"delay_seconds": "EPHEMERAL", "cause": "EPHEMERAL", "attempt": 1}, + ), + ("Starting Commit", {}), + ("Commit Done", {}), + ] + assert got_events == want_events + + # Check for the statues. + codes = StatusCode + want_statuses = [ + ("CloudSpanner.Database.run_in_transaction", codes.OK, None), + ("CloudSpanner.CreateSession", codes.OK, None), + ("CloudSpanner.Session.run_in_transaction", codes.OK, None), + ("CloudSpanner.Transaction.execute_streaming_sql", codes.OK, None), + ("CloudSpanner.Transaction.execute_streaming_sql", codes.OK, None), + ("CloudSpanner.Transaction.commit", codes.OK, None), + ] + assert got_statuses == want_statuses + + +def finished_spans_statuses(trace_exporter): span_list = trace_exporter.get_finished_spans() # Sort the spans by their start time in the hierarchy. span_list = sorted(span_list, key=lambda span: span.start_time) - got_span_names = [span.name for span in span_list] - want_span_names = [ - "CloudSpanner.Database.run_in_transaction", - "CloudSpanner.CreateSession", - "CloudSpanner.Session.run_in_transaction", - "CloudSpanner.Transaction.execute_streaming_sql", - "CloudSpanner.Transaction.execute_streaming_sql", - "CloudSpanner.Transaction.commit", - ] - - assert got_span_names == want_span_names got_events = [] got_statuses = [] @@ -233,6 +261,7 @@ def select_in_txn(txn): got_statuses.append( (span.name, span.status.status_code, span.status.description) ) + for event in span.events: evt_attributes = event.attributes.copy() for attr_name in imprecise_event_attributes: @@ -241,30 +270,70 @@ def select_in_txn(txn): got_events.append((event.name, evt_attributes)) + return got_statuses, got_events + + +@pytest.mark.skipif( + not _helpers.USE_EMULATOR, + reason="Emulator needed to run this test", +) +@pytest.mark.skipif( + not HAS_OTEL_INSTALLED, + reason="Tracing requires OpenTelemetry", +) +def test_database_partitioned_error(): + from opentelemetry.trace.status import StatusCode + + db, trace_exporter = create_db_trace_exporter() + + try: + db.execute_partitioned_dml("UPDATE NonExistent SET name = 'foo' WHERE id > 1") + except Exception: + pass + + got_statuses, got_events = finished_spans_statuses(trace_exporter) # Check for the series of events want_events = [ ("Acquiring session", {"kind": "BurstyPool"}), ("Waiting for a session to become available", {"kind": "BurstyPool"}), ("No sessions available in pool. Creating session", {"kind": "BurstyPool"}), ("Creating Session", {}), + ("Starting BeginTransaction", {}), ( - "Transaction was aborted in user operation, retrying", - {"delay_seconds": "EPHEMERAL", "cause": "EPHEMERAL", "attempt": 1}, + "exception", + { + "exception.type": "google.api_core.exceptions.InvalidArgument", + "exception.message": "400 Table not found: NonExistent [at 1:8]\nUPDATE NonExistent SET name = 'foo' WHERE id > 1\n ^", + "exception.stacktrace": "EPHEMERAL", + "exception.escaped": "False", + }, + ), + ( + "exception", + { + "exception.type": "google.api_core.exceptions.InvalidArgument", + "exception.message": "400 Table not found: NonExistent [at 1:8]\nUPDATE NonExistent SET name = 'foo' WHERE id > 1\n ^", + "exception.stacktrace": "EPHEMERAL", + "exception.escaped": "False", + }, ), - ("Starting Commit", {}), - ("Commit Done", {}), ] assert got_events == want_events # Check for the statues. codes = StatusCode want_statuses = [ - ("CloudSpanner.Database.run_in_transaction", codes.OK, None), + ( + "CloudSpanner.Database.execute_partitioned_pdml", + codes.ERROR, + "InvalidArgument: 400 Table not found: NonExistent [at 1:8]\nUPDATE NonExistent SET name = 'foo' WHERE id > 1\n ^", + ), ("CloudSpanner.CreateSession", codes.OK, None), - ("CloudSpanner.Session.run_in_transaction", codes.OK, None), - ("CloudSpanner.Transaction.execute_streaming_sql", codes.OK, None), - ("CloudSpanner.Transaction.execute_streaming_sql", codes.OK, None), - ("CloudSpanner.Transaction.commit", codes.OK, None), + ( + "CloudSpanner.ExecuteStreamingSql", + codes.ERROR, + "InvalidArgument: 400 Table not found: NonExistent [at 1:8]\nUPDATE NonExistent SET name = 'foo' WHERE id > 1\n ^", + ), ] assert got_statuses == want_statuses diff --git a/tests/system/test_session_api.py b/tests/system/test_session_api.py index 4e80657584..d2a86c8ddf 100644 --- a/tests/system/test_session_api.py +++ b/tests/system/test_session_api.py @@ -437,7 +437,6 @@ def test_batch_insert_then_read(sessions_database, ot_exporter): if ot_exporter is not None: span_list = ot_exporter.get_finished_spans() - assert len(span_list) == 4 assert_span_attributes( ot_exporter, @@ -464,6 +463,8 @@ def test_batch_insert_then_read(sessions_database, ot_exporter): span=span_list[3], ) + assert len(span_list) == 4 + def test_batch_insert_then_read_string_array_of_string(sessions_database, not_postgres): table = "string_plus_array_of_string" @@ -1193,30 +1194,57 @@ def unit_of_work(transaction): with tracer.start_as_current_span("Test Span"): session.run_in_transaction(unit_of_work) - span_list = ot_exporter.get_finished_spans() + span_list = [] + for span in ot_exporter.get_finished_spans(): + if span and span.name: + span_list.append(span) + + span_list = sorted(span_list, key=lambda v1: v1.start_time) got_span_names = [span.name for span in span_list] - want_span_names = [ + expected_span_names = [ "CloudSpanner.CreateSession", "CloudSpanner.Batch.commit", + "Test Span", + "CloudSpanner.Session.run_in_transaction", "CloudSpanner.DMLTransaction", "CloudSpanner.Transaction.commit", - "CloudSpanner.Session.run_in_transaction", - "Test Span", ] - assert got_span_names == want_span_names - - def assert_parent_hierarchy(parent, children): - for child in children: - assert child.context.trace_id == parent.context.trace_id - assert child.parent.span_id == parent.context.span_id - - test_span = span_list[-1] - test_span_children = [span_list[-2]] - assert_parent_hierarchy(test_span, test_span_children) - - session_run_in_txn = span_list[-2] - session_run_in_txn_children = span_list[2:-2] - assert_parent_hierarchy(session_run_in_txn, session_run_in_txn_children) + assert got_span_names == expected_span_names + + # We expect: + # |------CloudSpanner.CreateSession-------- + # + # |---Test Span----------------------------| + # |>--Session.run_in_transaction----------| + # |---------DMLTransaction-------| + # + # |>----Transaction.commit---| + + # CreateSession should have a trace of its own, with no children + # nor being a child of any other span. + session_span = span_list[0] + test_span = span_list[2] + # assert session_span.context.trace_id != test_span.context.trace_id + for span in span_list[1:]: + if span.parent: + assert span.parent.span_id != session_span.context.span_id + + def assert_parent_and_children(parent_span, children): + for span in children: + assert span.context.trace_id == parent_span.context.trace_id + assert span.parent.span_id == parent_span.context.span_id + + # [CreateSession --> Batch] should have their own trace. + session_run_in_txn_span = span_list[3] + children_of_test_span = [session_run_in_txn_span] + assert_parent_and_children(test_span, children_of_test_span) + + dml_txn_span = span_list[4] + batch_commit_txn_span = span_list[5] + children_of_session_run_in_txn_span = [dml_txn_span, batch_commit_txn_span] + assert_parent_and_children( + session_run_in_txn_span, children_of_session_run_in_txn_span + ) def test_execute_partitioned_dml( diff --git a/tests/unit/test_batch.py b/tests/unit/test_batch.py index 738bce9529..eb5069b497 100644 --- a/tests/unit/test_batch.py +++ b/tests/unit/test_batch.py @@ -527,7 +527,7 @@ def test_batch_write_already_committed(self): group.delete(TABLE_NAME, keyset=keyset) groups.batch_write() self.assertSpanAttributes( - "CloudSpanner.BatchWrite", + "CloudSpanner.batch_write", status=StatusCode.OK, attributes=dict(BASE_ATTRIBUTES, num_mutation_groups=1), ) @@ -553,7 +553,7 @@ def test_batch_write_grpc_error(self): groups.batch_write() self.assertSpanAttributes( - "CloudSpanner.BatchWrite", + "CloudSpanner.batch_write", status=StatusCode.ERROR, attributes=dict(BASE_ATTRIBUTES, num_mutation_groups=1), ) @@ -615,7 +615,7 @@ def _test_batch_write_with_request_options( ) self.assertSpanAttributes( - "CloudSpanner.BatchWrite", + "CloudSpanner.batch_write", status=StatusCode.OK, attributes=dict(BASE_ATTRIBUTES, num_mutation_groups=1), ) diff --git a/tests/unit/test_pool.py b/tests/unit/test_pool.py index 89715c741d..9b5d2c9885 100644 --- a/tests/unit/test_pool.py +++ b/tests/unit/test_pool.py @@ -918,7 +918,11 @@ def test_spans_put_full(self): attributes=attrs, span=span_list[-1], ) - wantEventNames = ["Requested for 4 sessions, returned 4"] + wantEventNames = [ + "Created 2 sessions", + "Created 2 sessions", + "Requested for 4 sessions, returned 4", + ] self.assertSpanEvents( "CloudSpanner.PingingPool.BatchCreateSessions", wantEventNames ) diff --git a/tests/unit/test_snapshot.py b/tests/unit/test_snapshot.py index a4446a0d1e..099bd31bea 100644 --- a/tests/unit/test_snapshot.py +++ b/tests/unit/test_snapshot.py @@ -1194,12 +1194,17 @@ def _partition_read_helper( timeout=timeout, ) + want_span_attributes = dict( + BASE_ATTRIBUTES, + table_id=TABLE_NAME, + columns=tuple(COLUMNS), + ) + if index: + want_span_attributes["index"] = index self.assertSpanAttributes( "CloudSpanner._Derived.partition_read", status=StatusCode.OK, - attributes=dict( - BASE_ATTRIBUTES, table_id=TABLE_NAME, columns=tuple(COLUMNS) - ), + attributes=want_span_attributes, ) def test_partition_read_single_use_raises(self): @@ -1369,7 +1374,7 @@ def _partition_query_helper( ) self.assertSpanAttributes( - "CloudSpanner.PartitionReadWriteTransaction", + "CloudSpanner._Derived.partition_query", status=StatusCode.OK, attributes=dict(BASE_ATTRIBUTES, **{"db.statement": SQL_QUERY_WITH_PARAM}), ) @@ -1387,7 +1392,7 @@ def test_partition_query_other_error(self): list(derived.partition_query(SQL_QUERY)) self.assertSpanAttributes( - "CloudSpanner.PartitionReadWriteTransaction", + "CloudSpanner._Derived.partition_query", status=StatusCode.ERROR, attributes=dict(BASE_ATTRIBUTES, **{"db.statement": SQL_QUERY}), ) @@ -1696,6 +1701,14 @@ def test_begin_w_other_error(self): with self.assertRaises(RuntimeError): snapshot.begin() + if not HAS_OPENTELEMETRY_INSTALLED: + return + + span_list = self.get_finished_spans() + got_span_names = [span.name for span in span_list] + want_span_names = ["CloudSpanner.Snapshot.begin"] + assert got_span_names == want_span_names + self.assertSpanAttributes( "CloudSpanner.Snapshot.begin", status=StatusCode.ERROR, @@ -1816,6 +1829,10 @@ def __init__(self, directed_read_options=None): self._route_to_leader_enabled = True self._directed_read_options = directed_read_options + @property + def observability_options(self): + return dict(db_name=self.name) + class _Session(object): def __init__(self, database=None, name=TestSnapshot.SESSION_NAME): From d9ee75ac9ecfbf37a95c95a56295bdd79da3006d Mon Sep 17 00:00:00 2001 From: Emmanuel T Odeke Date: Mon, 13 Jan 2025 01:47:03 -0800 Subject: [PATCH 13/19] fix(tracing): ensure nesting of Transaction.begin under commit + fix suggestions from feature review (#1287) * fix(tracing): ensure nesting of Transaction.begin under commit + fix suggestions from feature review This change ensures that: * If a transaction was not yet begin, that if .commit() is invoked the resulting span hierarchy has .begin nested under .commit * We use "CloudSpanner.Transaction.execute_sql" instead of "CloudSpanner.Transaction.execute_streaming_sql" * If we have a tracer_provider that produces non-recordings spans, that it won't crash due to lacking `span._status` Fixes #1286 * Address code review requests * Fix by lint --- .../spanner_v1/_opentelemetry_tracing.py | 5 +- google/cloud/spanner_v1/snapshot.py | 2 +- google/cloud/spanner_v1/transaction.py | 66 +++++----- tests/system/test_observability_options.py | 116 +++++++++++++++++- tests/unit/test__opentelemetry_tracing.py | 31 ++++- tests/unit/test_snapshot.py | 4 +- tests/unit/test_transaction.py | 88 ++++++++++++- 7 files changed, 268 insertions(+), 44 deletions(-) diff --git a/google/cloud/spanner_v1/_opentelemetry_tracing.py b/google/cloud/spanner_v1/_opentelemetry_tracing.py index 6f3997069e..e80ddc97ee 100644 --- a/google/cloud/spanner_v1/_opentelemetry_tracing.py +++ b/google/cloud/spanner_v1/_opentelemetry_tracing.py @@ -117,7 +117,10 @@ def trace_call(name, session=None, extra_attributes=None, observability_options= # invoke .record_exception on our own else we shall have 2 exceptions. raise else: - if (not span._status) or span._status.status_code == StatusCode.UNSET: + # All spans still have set_status available even if for example + # NonRecordingSpan doesn't have "_status". + absent_span_status = getattr(span, "_status", None) is None + if absent_span_status or span._status.status_code == StatusCode.UNSET: # OpenTelemetry-Python only allows a status change # if the current code is UNSET or ERROR. At the end # of the generator's consumption, only set it to OK diff --git a/google/cloud/spanner_v1/snapshot.py b/google/cloud/spanner_v1/snapshot.py index dc28644d6c..f9edbe96fa 100644 --- a/google/cloud/spanner_v1/snapshot.py +++ b/google/cloud/spanner_v1/snapshot.py @@ -583,7 +583,7 @@ def _get_streamed_result_set( iterator = _restart_on_unavailable( restart, request, - f"CloudSpanner.{type(self).__name__}.execute_streaming_sql", + f"CloudSpanner.{type(self).__name__}.execute_sql", self._session, trace_attributes, transaction=self, diff --git a/google/cloud/spanner_v1/transaction.py b/google/cloud/spanner_v1/transaction.py index a8aef7f470..cc59789248 100644 --- a/google/cloud/spanner_v1/transaction.py +++ b/google/cloud/spanner_v1/transaction.py @@ -242,39 +242,7 @@ def commit( :returns: timestamp of the committed changes. :raises ValueError: if there are no mutations to commit. """ - self._check_state() - if self._transaction_id is None and len(self._mutations) > 0: - self.begin() - elif self._transaction_id is None and len(self._mutations) == 0: - raise ValueError("Transaction is not begun") - database = self._session._database - api = database.spanner_api - metadata = _metadata_with_prefix(database.name) - if database._route_to_leader_enabled: - metadata.append( - _metadata_with_leader_aware_routing(database._route_to_leader_enabled) - ) - - if request_options is None: - request_options = RequestOptions() - elif type(request_options) is dict: - request_options = RequestOptions(request_options) - if self.transaction_tag is not None: - request_options.transaction_tag = self.transaction_tag - - # Request tags are not supported for commit requests. - request_options.request_tag = None - - request = CommitRequest( - session=self._session.name, - mutations=self._mutations, - transaction_id=self._transaction_id, - return_commit_stats=return_commit_stats, - max_commit_delay=max_commit_delay, - request_options=request_options, - ) - trace_attributes = {"num_mutations": len(self._mutations)} observability_options = getattr(database, "observability_options", None) with trace_call( @@ -283,6 +251,40 @@ def commit( trace_attributes, observability_options, ) as span: + self._check_state() + if self._transaction_id is None and len(self._mutations) > 0: + self.begin() + elif self._transaction_id is None and len(self._mutations) == 0: + raise ValueError("Transaction is not begun") + + api = database.spanner_api + metadata = _metadata_with_prefix(database.name) + if database._route_to_leader_enabled: + metadata.append( + _metadata_with_leader_aware_routing( + database._route_to_leader_enabled + ) + ) + + if request_options is None: + request_options = RequestOptions() + elif type(request_options) is dict: + request_options = RequestOptions(request_options) + if self.transaction_tag is not None: + request_options.transaction_tag = self.transaction_tag + + # Request tags are not supported for commit requests. + request_options.request_tag = None + + request = CommitRequest( + session=self._session.name, + mutations=self._mutations, + transaction_id=self._transaction_id, + return_commit_stats=return_commit_stats, + max_commit_delay=max_commit_delay, + request_options=request_options, + ) + add_span_event(span, "Starting Commit") method = functools.partial( diff --git a/tests/system/test_observability_options.py b/tests/system/test_observability_options.py index a91955496f..d40b34f800 100644 --- a/tests/system/test_observability_options.py +++ b/tests/system/test_observability_options.py @@ -111,7 +111,7 @@ def test_propagation(enable_extended_tracing): gotNames = [span.name for span in from_inject_spans] wantNames = [ "CloudSpanner.CreateSession", - "CloudSpanner.Snapshot.execute_streaming_sql", + "CloudSpanner.Snapshot.execute_sql", ] assert gotNames == wantNames @@ -239,8 +239,8 @@ def select_in_txn(txn): ("CloudSpanner.Database.run_in_transaction", codes.OK, None), ("CloudSpanner.CreateSession", codes.OK, None), ("CloudSpanner.Session.run_in_transaction", codes.OK, None), - ("CloudSpanner.Transaction.execute_streaming_sql", codes.OK, None), - ("CloudSpanner.Transaction.execute_streaming_sql", codes.OK, None), + ("CloudSpanner.Transaction.execute_sql", codes.OK, None), + ("CloudSpanner.Transaction.execute_sql", codes.OK, None), ("CloudSpanner.Transaction.commit", codes.OK, None), ] assert got_statuses == want_statuses @@ -273,6 +273,116 @@ def finished_spans_statuses(trace_exporter): return got_statuses, got_events +@pytest.mark.skipif( + not _helpers.USE_EMULATOR, + reason="Emulator needed to run this tests", +) +@pytest.mark.skipif( + not HAS_OTEL_INSTALLED, + reason="Tracing requires OpenTelemetry", +) +def test_transaction_update_implicit_begin_nested_inside_commit(): + # Tests to ensure that transaction.commit() without a began transaction + # has transaction.begin() inlined and nested under the commit span. + from google.auth.credentials import AnonymousCredentials + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, + ) + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.sampling import ALWAYS_ON + + PROJECT = _helpers.EMULATOR_PROJECT + CONFIGURATION_NAME = "config-name" + INSTANCE_ID = _helpers.INSTANCE_ID + DISPLAY_NAME = "display-name" + DATABASE_ID = _helpers.unique_id("temp_db") + NODE_COUNT = 5 + LABELS = {"test": "true"} + + def tx_update(txn): + txn.insert( + "Singers", + columns=["SingerId", "FirstName"], + values=[["1", "Bryan"], ["2", "Slash"]], + ) + + tracer_provider = TracerProvider(sampler=ALWAYS_ON) + trace_exporter = InMemorySpanExporter() + tracer_provider.add_span_processor(SimpleSpanProcessor(trace_exporter)) + observability_options = dict( + tracer_provider=tracer_provider, + enable_extended_tracing=True, + ) + + client = Client( + project=PROJECT, + observability_options=observability_options, + credentials=AnonymousCredentials(), + ) + + instance = client.instance( + INSTANCE_ID, + CONFIGURATION_NAME, + display_name=DISPLAY_NAME, + node_count=NODE_COUNT, + labels=LABELS, + ) + + try: + instance.create() + except Exception: + pass + + db = instance.database(DATABASE_ID) + try: + db._ddl_statements = [ + """CREATE TABLE Singers ( + SingerId INT64 NOT NULL, + FirstName STRING(1024), + LastName STRING(1024), + SingerInfo BYTES(MAX), + FullName STRING(2048) AS ( + ARRAY_TO_STRING([FirstName, LastName], " ") + ) STORED + ) PRIMARY KEY (SingerId)""", + """CREATE TABLE Albums ( + SingerId INT64 NOT NULL, + AlbumId INT64 NOT NULL, + AlbumTitle STRING(MAX), + MarketingBudget INT64, + ) PRIMARY KEY (SingerId, AlbumId), + INTERLEAVE IN PARENT Singers ON DELETE CASCADE""", + ] + db.create() + except Exception: + pass + + try: + db.run_in_transaction(tx_update) + except Exception: + pass + + span_list = trace_exporter.get_finished_spans() + # Sort the spans by their start time in the hierarchy. + span_list = sorted(span_list, key=lambda span: span.start_time) + got_span_names = [span.name for span in span_list] + want_span_names = [ + "CloudSpanner.Database.run_in_transaction", + "CloudSpanner.CreateSession", + "CloudSpanner.Session.run_in_transaction", + "CloudSpanner.Transaction.commit", + "CloudSpanner.Transaction.begin", + ] + + assert got_span_names == want_span_names + + # Our object is to ensure that .begin() is a child of .commit() + span_tx_begin = span_list[-1] + span_tx_commit = span_list[-2] + assert span_tx_begin.parent.span_id == span_tx_commit.context.span_id + + @pytest.mark.skipif( not _helpers.USE_EMULATOR, reason="Emulator needed to run this test", diff --git a/tests/unit/test__opentelemetry_tracing.py b/tests/unit/test__opentelemetry_tracing.py index 1150ce7778..884928a279 100644 --- a/tests/unit/test__opentelemetry_tracing.py +++ b/tests/unit/test__opentelemetry_tracing.py @@ -159,7 +159,7 @@ def test_trace_codeless_error(self): span = span_list[0] self.assertEqual(span.status.status_code, StatusCode.ERROR) - def test_trace_call_terminal_span_status(self): + def test_trace_call_terminal_span_status_ALWAYS_ON_sampler(self): # Verify that we don't unconditionally set the terminal span status to # SpanStatus.OK per https://github.com/googleapis/python-spanner/issues/1246 from opentelemetry.sdk.trace.export import SimpleSpanProcessor @@ -195,3 +195,32 @@ def test_trace_call_terminal_span_status(self): ("VerifyTerminalSpanStatus", StatusCode.ERROR, "Our error exhibit"), ] assert got_statuses == want_statuses + + def test_trace_call_terminal_span_status_ALWAYS_OFF_sampler(self): + # Verify that we get the correct status even when using the ALWAYS_OFF + # sampler which produces the NonRecordingSpan per + # https://github.com/googleapis/python-spanner/issues/1286 + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, + ) + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.sampling import ALWAYS_OFF + + tracer_provider = TracerProvider(sampler=ALWAYS_OFF) + trace_exporter = InMemorySpanExporter() + tracer_provider.add_span_processor(SimpleSpanProcessor(trace_exporter)) + observability_options = dict(tracer_provider=tracer_provider) + + session = _make_session() + used_span = None + with _opentelemetry_tracing.trace_call( + "VerifyWithNonRecordingSpan", + session, + observability_options=observability_options, + ) as span: + used_span = span + + assert type(used_span).__name__ == "NonRecordingSpan" + span_list = list(trace_exporter.get_finished_spans()) + assert span_list == [] diff --git a/tests/unit/test_snapshot.py b/tests/unit/test_snapshot.py index 099bd31bea..02cc35e017 100644 --- a/tests/unit/test_snapshot.py +++ b/tests/unit/test_snapshot.py @@ -868,7 +868,7 @@ def test_execute_sql_other_error(self): self.assertEqual(derived._execute_sql_count, 1) self.assertSpanAttributes( - "CloudSpanner._Derived.execute_streaming_sql", + "CloudSpanner._Derived.execute_sql", status=StatusCode.ERROR, attributes=dict(BASE_ATTRIBUTES, **{"db.statement": SQL_QUERY}), ) @@ -1024,7 +1024,7 @@ def _execute_sql_helper( self.assertEqual(derived._execute_sql_count, sql_count + 1) self.assertSpanAttributes( - "CloudSpanner._Derived.execute_streaming_sql", + "CloudSpanner._Derived.execute_sql", status=StatusCode.OK, attributes=dict(BASE_ATTRIBUTES, **{"db.statement": SQL_QUERY_WITH_PARAM}), ) diff --git a/tests/unit/test_transaction.py b/tests/unit/test_transaction.py index d3d7035854..9707632421 100644 --- a/tests/unit/test_transaction.py +++ b/tests/unit/test_transaction.py @@ -22,6 +22,7 @@ from google.api_core import gapic_v1 from tests._helpers import ( + HAS_OPENTELEMETRY_INSTALLED, OpenTelemetryBase, StatusCode, enrich_with_otel_scope, @@ -226,7 +227,7 @@ def test_rollback_not_begun(self): transaction.rollback() self.assertTrue(transaction.rolled_back) - # Since there was no transaction to be rolled back, rollbacl rpc is not called. + # Since there was no transaction to be rolled back, rollback rpc is not called. api.rollback.assert_not_called() self.assertNoSpans() @@ -309,7 +310,27 @@ def test_commit_not_begun(self): with self.assertRaises(ValueError): transaction.commit() - self.assertNoSpans() + if not HAS_OPENTELEMETRY_INSTALLED: + return + + span_list = self.get_finished_spans() + got_span_names = [span.name for span in span_list] + want_span_names = ["CloudSpanner.Transaction.commit"] + assert got_span_names == want_span_names + + got_span_events_statuses = self.finished_spans_events_statuses() + want_span_events_statuses = [ + ( + "exception", + { + "exception.type": "ValueError", + "exception.message": "Transaction is not begun", + "exception.stacktrace": "EPHEMERAL", + "exception.escaped": "False", + }, + ) + ] + assert got_span_events_statuses == want_span_events_statuses def test_commit_already_committed(self): session = _Session() @@ -319,7 +340,27 @@ def test_commit_already_committed(self): with self.assertRaises(ValueError): transaction.commit() - self.assertNoSpans() + if not HAS_OPENTELEMETRY_INSTALLED: + return + + span_list = self.get_finished_spans() + got_span_names = [span.name for span in span_list] + want_span_names = ["CloudSpanner.Transaction.commit"] + assert got_span_names == want_span_names + + got_span_events_statuses = self.finished_spans_events_statuses() + want_span_events_statuses = [ + ( + "exception", + { + "exception.type": "ValueError", + "exception.message": "Transaction is already committed", + "exception.stacktrace": "EPHEMERAL", + "exception.escaped": "False", + }, + ) + ] + assert got_span_events_statuses == want_span_events_statuses def test_commit_already_rolled_back(self): session = _Session() @@ -329,7 +370,27 @@ def test_commit_already_rolled_back(self): with self.assertRaises(ValueError): transaction.commit() - self.assertNoSpans() + if not HAS_OPENTELEMETRY_INSTALLED: + return + + span_list = self.get_finished_spans() + got_span_names = [span.name for span in span_list] + want_span_names = ["CloudSpanner.Transaction.commit"] + assert got_span_names == want_span_names + + got_span_events_statuses = self.finished_spans_events_statuses() + want_span_events_statuses = [ + ( + "exception", + { + "exception.type": "ValueError", + "exception.message": "Transaction is already rolled back", + "exception.stacktrace": "EPHEMERAL", + "exception.escaped": "False", + }, + ) + ] + assert got_span_events_statuses == want_span_events_statuses def test_commit_w_other_error(self): database = _Database() @@ -435,6 +496,18 @@ def _commit_helper( ), ) + if not HAS_OPENTELEMETRY_INSTALLED: + return + + span_list = self.get_finished_spans() + got_span_names = [span.name for span in span_list] + want_span_names = ["CloudSpanner.Transaction.commit"] + assert got_span_names == want_span_names + + got_span_events_statuses = self.finished_spans_events_statuses() + want_span_events_statuses = [("Starting Commit", {}), ("Commit Done", {})] + assert got_span_events_statuses == want_span_events_statuses + def test_commit_no_mutations(self): self._commit_helper(mutate=False) @@ -586,6 +659,13 @@ def _execute_update_helper( ) self.assertEqual(transaction._execute_sql_count, count + 1) + want_span_attributes = dict(TestTransaction.BASE_ATTRIBUTES) + want_span_attributes["db.statement"] = DML_QUERY_WITH_PARAM + self.assertSpanAttributes( + "CloudSpanner.Transaction.execute_update", + status=StatusCode.OK, + attributes=want_span_attributes, + ) def test_execute_update_new_transaction(self): self._execute_update_helper() From ee9662f57dbb730afb08b9b9829e4e19bda5e69a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Knut=20Olav=20L=C3=B8ite?= Date: Mon, 13 Jan 2025 14:09:23 +0100 Subject: [PATCH 14/19] feat: support transaction and request tags in dbapi (#1262) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: support transaction and request tags in dbapi Adds support for setting transaction tags and request tags in dbapi. This makes these options available to frameworks that depend on dbapi, like SQLAlchemy and Django. Towards https://github.com/googleapis/python-spanner-sqlalchemy/issues/525 * test: add test for transaction_tag with read-only tx * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .gitignore | 4 - google/cloud/spanner_dbapi/connection.py | 35 +++- google/cloud/spanner_dbapi/cursor.py | 42 ++++- tests/mockserver_tests/test_tags.py | 206 +++++++++++++++++++++++ 4 files changed, 277 insertions(+), 10 deletions(-) create mode 100644 tests/mockserver_tests/test_tags.py diff --git a/.gitignore b/.gitignore index 4797754726..d083ea1ddc 100644 --- a/.gitignore +++ b/.gitignore @@ -62,7 +62,3 @@ system_tests/local_test_setup # Make sure a generated file isn't accidentally committed. pylintrc pylintrc.test - - -# Ignore coverage files -.coverage* diff --git a/google/cloud/spanner_dbapi/connection.py b/google/cloud/spanner_dbapi/connection.py index cec6c64dac..c2aa385d2a 100644 --- a/google/cloud/spanner_dbapi/connection.py +++ b/google/cloud/spanner_dbapi/connection.py @@ -113,7 +113,7 @@ def __init__(self, instance, database=None, read_only=False, **kwargs): self.request_priority = None self._transaction_begin_marked = False # whether transaction started at Spanner. This means that we had - # made atleast one call to Spanner. + # made at least one call to Spanner. self._spanner_transaction_started = False self._batch_mode = BatchMode.NONE self._batch_dml_executor: BatchDmlExecutor = None @@ -261,6 +261,28 @@ def request_options(self): self.request_priority = None return req_opts + @property + def transaction_tag(self): + """The transaction tag that will be applied to the next read/write + transaction on this `Connection`. This property is automatically cleared + when a new transaction is started. + + Returns: + str: The transaction tag that will be applied to the next read/write transaction. + """ + return self._connection_variables.get("transaction_tag", None) + + @transaction_tag.setter + def transaction_tag(self, value): + """Sets the transaction tag for the next read/write transaction on this + `Connection`. This property is automatically cleared when a new transaction + is started. + + Args: + value (str): The transaction tag for the next read/write transaction. + """ + self._connection_variables["transaction_tag"] = value + @property def staleness(self): """Current read staleness option value of this `Connection`. @@ -340,6 +362,8 @@ def transaction_checkout(self): if not self.read_only and self._client_transaction_started: if not self._spanner_transaction_started: self._transaction = self._session_checkout().transaction() + self._transaction.transaction_tag = self.transaction_tag + self.transaction_tag = None self._snapshot = None self._spanner_transaction_started = True self._transaction.begin() @@ -458,7 +482,9 @@ def run_prior_DDL_statements(self): return self.database.update_ddl(ddl_statements).result() - def run_statement(self, statement: Statement): + def run_statement( + self, statement: Statement, request_options: RequestOptions = None + ): """Run single SQL statement in begun transaction. This method is never used in autocommit mode. In @@ -472,6 +498,9 @@ def run_statement(self, statement: Statement): :param retried: (Optional) Retry the SQL statement if statement execution failed. Defaults to false. + :type request_options: :class:`RequestOptions` + :param request_options: Request options to use for this statement. + :rtype: :class:`google.cloud.spanner_v1.streamed.StreamedResultSet`, :class:`google.cloud.spanner_dbapi.checksum.ResultsChecksum` :returns: Streamed result set of the statement and a @@ -482,7 +511,7 @@ def run_statement(self, statement: Statement): statement.sql, statement.params, param_types=statement.param_types, - request_options=self.request_options, + request_options=request_options or self.request_options, ) @check_not_closed diff --git a/google/cloud/spanner_dbapi/cursor.py b/google/cloud/spanner_dbapi/cursor.py index 8b4170e3f2..a72a8e9de1 100644 --- a/google/cloud/spanner_dbapi/cursor.py +++ b/google/cloud/spanner_dbapi/cursor.py @@ -50,6 +50,7 @@ from google.cloud.spanner_dbapi.transaction_helper import CursorStatementType from google.cloud.spanner_dbapi.utils import PeekIterator from google.cloud.spanner_dbapi.utils import StreamedManyResultSets +from google.cloud.spanner_v1 import RequestOptions from google.cloud.spanner_v1.merged_result_set import MergedResultSet ColumnDetails = namedtuple("column_details", ["null_ok", "spanner_type"]) @@ -97,6 +98,39 @@ def __init__(self, connection): self._parsed_statement: ParsedStatement = None self._in_retry_mode = False self._batch_dml_rows_count = None + self._request_tag = None + + @property + def request_tag(self): + """The request tag that will be applied to the next statement on this + cursor. This property is automatically cleared when a statement is + executed. + + Returns: + str: The request tag that will be applied to the next statement on + this cursor. + """ + return self._request_tag + + @request_tag.setter + def request_tag(self, value): + """Sets the request tag for the next statement on this cursor. This + property is automatically cleared when a statement is executed. + + Args: + value (str): The request tag for the statement. + """ + self._request_tag = value + + @property + def request_options(self): + options = self.connection.request_options + if self._request_tag: + if not options: + options = RequestOptions() + options.request_tag = self._request_tag + self._request_tag = None + return options @property def is_closed(self): @@ -284,7 +318,7 @@ def _execute(self, sql, args=None, call_from_execute_many=False): sql, params=args, param_types=self._parsed_statement.statement.param_types, - request_options=self.connection.request_options, + request_options=self.request_options, ) self._result_set = None else: @@ -318,7 +352,9 @@ def _execute_in_rw_transaction(self): if self.connection._client_transaction_started: while True: try: - self._result_set = self.connection.run_statement(statement) + self._result_set = self.connection.run_statement( + statement, self.request_options + ) self._itr = PeekIterator(self._result_set) return except Aborted: @@ -478,7 +514,7 @@ def _handle_DQL_with_snapshot(self, snapshot, sql, params): sql, params, get_param_types(params), - request_options=self.connection.request_options, + request_options=self.request_options, ) # Read the first element so that the StreamedResultSet can # return the metadata after a DQL statement. diff --git a/tests/mockserver_tests/test_tags.py b/tests/mockserver_tests/test_tags.py new file mode 100644 index 0000000000..c84d69b7bd --- /dev/null +++ b/tests/mockserver_tests/test_tags.py @@ -0,0 +1,206 @@ +# Copyright 2024 Google LLC All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud.spanner_dbapi import Connection +from google.cloud.spanner_v1 import ( + BatchCreateSessionsRequest, + ExecuteSqlRequest, + BeginTransactionRequest, + TypeCode, + CommitRequest, +) +from tests.mockserver_tests.mock_server_test_base import ( + MockServerTestBase, + add_single_result, +) + + +class TestTags(MockServerTestBase): + @classmethod + def setup_class(cls): + super().setup_class() + add_single_result( + "select name from singers", "name", TypeCode.STRING, [("Some Singer",)] + ) + + def test_select_autocommit_no_tags(self): + connection = Connection(self.instance, self.database) + connection.autocommit = True + request = self._execute_and_verify_select_singers(connection) + self.assertEqual("", request.request_options.request_tag) + self.assertEqual("", request.request_options.transaction_tag) + + def test_select_autocommit_with_request_tag(self): + connection = Connection(self.instance, self.database) + connection.autocommit = True + request = self._execute_and_verify_select_singers( + connection, request_tag="my_tag" + ) + self.assertEqual("my_tag", request.request_options.request_tag) + self.assertEqual("", request.request_options.transaction_tag) + + def test_select_read_only_transaction_no_tags(self): + connection = Connection(self.instance, self.database) + connection.autocommit = False + connection.read_only = True + request = self._execute_and_verify_select_singers(connection) + self.assertEqual("", request.request_options.request_tag) + self.assertEqual("", request.request_options.transaction_tag) + + def test_select_read_only_transaction_with_request_tag(self): + connection = Connection(self.instance, self.database) + connection.autocommit = False + connection.read_only = True + request = self._execute_and_verify_select_singers( + connection, request_tag="my_tag" + ) + self.assertEqual("my_tag", request.request_options.request_tag) + self.assertEqual("", request.request_options.transaction_tag) + + def test_select_read_only_transaction_with_transaction_tag(self): + connection = Connection(self.instance, self.database) + connection.autocommit = False + connection.read_only = True + connection.transaction_tag = "my_transaction_tag" + self._execute_and_verify_select_singers(connection) + self._execute_and_verify_select_singers(connection) + + # Read-only transactions do not support tags, so the transaction_tag is + # also not cleared from the connection when a read-only transaction is + # executed. + self.assertEqual("my_transaction_tag", connection.transaction_tag) + + # Read-only transactions do not need to be committed or rolled back on + # Spanner, but dbapi requires this to end the transaction. + connection.commit() + requests = self.spanner_service.requests + self.assertEqual(4, len(requests)) + self.assertTrue(isinstance(requests[0], BatchCreateSessionsRequest)) + self.assertTrue(isinstance(requests[1], BeginTransactionRequest)) + self.assertTrue(isinstance(requests[2], ExecuteSqlRequest)) + self.assertTrue(isinstance(requests[3], ExecuteSqlRequest)) + # Transaction tags are not supported for read-only transactions. + self.assertEqual("", requests[2].request_options.transaction_tag) + self.assertEqual("", requests[3].request_options.transaction_tag) + + def test_select_read_write_transaction_no_tags(self): + connection = Connection(self.instance, self.database) + connection.autocommit = False + request = self._execute_and_verify_select_singers(connection) + self.assertEqual("", request.request_options.request_tag) + self.assertEqual("", request.request_options.transaction_tag) + + def test_select_read_write_transaction_with_request_tag(self): + connection = Connection(self.instance, self.database) + connection.autocommit = False + request = self._execute_and_verify_select_singers( + connection, request_tag="my_tag" + ) + self.assertEqual("my_tag", request.request_options.request_tag) + self.assertEqual("", request.request_options.transaction_tag) + + def test_select_read_write_transaction_with_transaction_tag(self): + connection = Connection(self.instance, self.database) + connection.autocommit = False + connection.transaction_tag = "my_transaction_tag" + # The transaction tag should be included for all statements in the transaction. + self._execute_and_verify_select_singers(connection) + self._execute_and_verify_select_singers(connection) + + # The transaction tag was cleared from the connection when the transaction + # was started. + self.assertIsNone(connection.transaction_tag) + # The commit call should also include a transaction tag. + connection.commit() + requests = self.spanner_service.requests + self.assertEqual(5, len(requests)) + self.assertTrue(isinstance(requests[0], BatchCreateSessionsRequest)) + self.assertTrue(isinstance(requests[1], BeginTransactionRequest)) + self.assertTrue(isinstance(requests[2], ExecuteSqlRequest)) + self.assertTrue(isinstance(requests[3], ExecuteSqlRequest)) + self.assertTrue(isinstance(requests[4], CommitRequest)) + self.assertEqual( + "my_transaction_tag", requests[2].request_options.transaction_tag + ) + self.assertEqual( + "my_transaction_tag", requests[3].request_options.transaction_tag + ) + self.assertEqual( + "my_transaction_tag", requests[4].request_options.transaction_tag + ) + + def test_select_read_write_transaction_with_transaction_and_request_tag(self): + connection = Connection(self.instance, self.database) + connection.autocommit = False + connection.transaction_tag = "my_transaction_tag" + # The transaction tag should be included for all statements in the transaction. + self._execute_and_verify_select_singers(connection, request_tag="my_tag1") + self._execute_and_verify_select_singers(connection, request_tag="my_tag2") + + # The transaction tag was cleared from the connection when the transaction + # was started. + self.assertIsNone(connection.transaction_tag) + # The commit call should also include a transaction tag. + connection.commit() + requests = self.spanner_service.requests + self.assertEqual(5, len(requests)) + self.assertTrue(isinstance(requests[0], BatchCreateSessionsRequest)) + self.assertTrue(isinstance(requests[1], BeginTransactionRequest)) + self.assertTrue(isinstance(requests[2], ExecuteSqlRequest)) + self.assertTrue(isinstance(requests[3], ExecuteSqlRequest)) + self.assertTrue(isinstance(requests[4], CommitRequest)) + self.assertEqual( + "my_transaction_tag", requests[2].request_options.transaction_tag + ) + self.assertEqual("my_tag1", requests[2].request_options.request_tag) + self.assertEqual( + "my_transaction_tag", requests[3].request_options.transaction_tag + ) + self.assertEqual("my_tag2", requests[3].request_options.request_tag) + self.assertEqual( + "my_transaction_tag", requests[4].request_options.transaction_tag + ) + + def test_request_tag_is_cleared(self): + connection = Connection(self.instance, self.database) + connection.autocommit = True + with connection.cursor() as cursor: + cursor.request_tag = "my_tag" + cursor.execute("select name from singers") + # This query will not have a request tag. + cursor.execute("select name from singers") + requests = self.spanner_service.requests + self.assertTrue(isinstance(requests[1], ExecuteSqlRequest)) + self.assertTrue(isinstance(requests[2], ExecuteSqlRequest)) + self.assertEqual("my_tag", requests[1].request_options.request_tag) + self.assertEqual("", requests[2].request_options.request_tag) + + def _execute_and_verify_select_singers( + self, connection: Connection, request_tag: str = "", transaction_tag: str = "" + ) -> ExecuteSqlRequest: + with connection.cursor() as cursor: + if request_tag: + cursor.request_tag = request_tag + cursor.execute("select name from singers") + result_list = cursor.fetchall() + for row in result_list: + self.assertEqual("Some Singer", row[0]) + self.assertEqual(1, len(result_list)) + requests = self.spanner_service.requests + return next( + request + for request in requests + if isinstance(request, ExecuteSqlRequest) + and request.sql == "select name from singers" + ) From 32e761b0d4052938bf67cfec63a0e83702a35ada Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:06:20 -0500 Subject: [PATCH 15/19] chore(python): exclude .github/workflows/unittest.yml in renovate config (#1288) Source-Link: https://github.com/googleapis/synthtool/commit/106d292bd234e5d9977231dcfbc4831e34eba13a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- renovate.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 1d0fd7e787..10cf433a8b 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:a1c5112b81d645f5bbc4d4bbc99d7dcb5089a52216c0e3fb1203a0eeabadd7d5 -# created: 2025-01-02T23:09:36.975468657Z + digest: sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a +# created: 2025-01-09T12:01:16.422459506Z diff --git a/renovate.json b/renovate.json index 39b2a0ec92..c7875c469b 100644 --- a/renovate.json +++ b/renovate.json @@ -5,7 +5,7 @@ ":preserveSemverRanges", ":disableDependencyDashboard" ], - "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py"], + "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] } From 8fbde6b84d11db12ee4d536f0d5b8064619bdaa9 Mon Sep 17 00:00:00 2001 From: Lester Szeto Date: Thu, 23 Jan 2025 20:12:24 -0800 Subject: [PATCH 16/19] Feat: MetricsTracer implementation (#1291) --- .../spanner_v1/metrics/metrics_exporter.py | 2 +- .../spanner_v1/metrics/metrics_tracer.py | 558 ++++++++++++++++++ .../metrics/metrics_tracer_factory.py | 309 ++++++++++ tests/unit/test_metrics_tracer.py | 224 +++++++ tests/unit/test_metrics_tracer_factory.py | 59 ++ 5 files changed, 1151 insertions(+), 1 deletion(-) create mode 100644 google/cloud/spanner_v1/metrics/metrics_tracer.py create mode 100644 google/cloud/spanner_v1/metrics/metrics_tracer_factory.py create mode 100644 tests/unit/test_metrics_tracer.py create mode 100644 tests/unit/test_metrics_tracer_factory.py diff --git a/google/cloud/spanner_v1/metrics/metrics_exporter.py b/google/cloud/spanner_v1/metrics/metrics_exporter.py index f7d3aa18c8..fb32985365 100644 --- a/google/cloud/spanner_v1/metrics/metrics_exporter.py +++ b/google/cloud/spanner_v1/metrics/metrics_exporter.py @@ -62,7 +62,7 @@ from opentelemetry.sdk.resources import Resource HAS_OPENTELEMETRY_INSTALLED = True -except ImportError: +except ImportError: # pragma: NO COVER HAS_OPENTELEMETRY_INSTALLED = False try: diff --git a/google/cloud/spanner_v1/metrics/metrics_tracer.py b/google/cloud/spanner_v1/metrics/metrics_tracer.py new file mode 100644 index 0000000000..60525d6e4e --- /dev/null +++ b/google/cloud/spanner_v1/metrics/metrics_tracer.py @@ -0,0 +1,558 @@ +# -*- coding: utf-8 -*- +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This module contains the MetricTracer class and its related helper classes. + +The MetricTracer class is responsible for collecting and tracing metrics, +while the helper classes provide additional functionality and context for the metrics being traced. +""" + +from datetime import datetime +from typing import Dict +from grpc import StatusCode +from .constants import ( + METRIC_LABEL_KEY_CLIENT_NAME, + METRIC_LABEL_KEY_CLIENT_UID, + METRIC_LABEL_KEY_DATABASE, + METRIC_LABEL_KEY_DIRECT_PATH_ENABLED, + METRIC_LABEL_KEY_METHOD, + METRIC_LABEL_KEY_STATUS, + MONITORED_RES_LABEL_KEY_CLIENT_HASH, + MONITORED_RES_LABEL_KEY_INSTANCE, + MONITORED_RES_LABEL_KEY_INSTANCE_CONFIG, + MONITORED_RES_LABEL_KEY_LOCATION, + MONITORED_RES_LABEL_KEY_PROJECT, +) + +try: + from opentelemetry.metrics import Counter, Histogram + + HAS_OPENTELEMETRY_INSTALLED = True +except ImportError: # pragma: NO COVER + HAS_OPENTELEMETRY_INSTALLED = False + + +class MetricAttemptTracer: + """ + This class is designed to hold information related to a metric attempt. + + It captures the start time of the attempt, whether the direct path was used, and the status of the attempt. + """ + + _start_time: datetime + direct_path_used: bool + status: str + + def __init__(self): + """ + Initialize a MetricAttemptTracer instance with default values. + + This constructor sets the start time of the metric attempt to the current datetime, initializes the status as an empty string, and sets direct path used flag to False by default. + """ + self._start_time = datetime.now() + self.status = "" + self.direct_path_used = False + + @property + def start_time(self): + """Getter method for the start_time property. + + This method returns the start time of the metric attempt. + + Returns: + datetime: The start time of the metric attempt. + """ + return self._start_time + + +class MetricOpTracer: + """ + This class is designed to store and manage information related to metric operations. + It captures the method name, start time, attempt count, current attempt, status, and direct path enabled status of a metric operation. + """ + + _attempt_count: int + _start_time: datetime + _current_attempt: MetricAttemptTracer + status: str + + def __init__(self, is_direct_path_enabled: bool = False): + """ + Initialize a MetricOpTracer instance with the given parameters. + + This constructor sets up a MetricOpTracer instance with the provided instrumentations for attempt latency, + attempt counter, operation latency and operation counter. + + Args: + instrument_attempt_latency (Histogram): The instrumentation for measuring attempt latency. + instrument_attempt_counter (Counter): The instrumentation for counting attempts. + instrument_operation_latency (Histogram): The instrumentation for measuring operation latency. + instrument_operation_counter (Counter): The instrumentation for counting operations. + """ + self._attempt_count = 0 + self._start_time = datetime.now() + self._current_attempt = None + self.status = "" + + @property + def attempt_count(self): + """ + Getter method for the attempt_count property. + + This method returns the current count of attempts made for the metric operation. + + Returns: + int: The current count of attempts. + """ + return self._attempt_count + + @property + def current_attempt(self): + """ + Getter method for the current_attempt property. + + This method returns the current MetricAttemptTracer instance associated with the metric operation. + + Returns: + MetricAttemptTracer: The current MetricAttemptTracer instance. + """ + return self._current_attempt + + @property + def start_time(self): + """ + Getter method for the start_time property. + + This method returns the start time of the metric operation. + + Returns: + datetime: The start time of the metric operation. + """ + return self._start_time + + def increment_attempt_count(self): + """ + Increments the attempt count by 1. + + This method updates the attempt count by incrementing it by 1, indicating a new attempt has been made. + """ + self._attempt_count += 1 + + def start(self): + """ + Set the start time of the metric operation to the current time. + + This method updates the start time of the metric operation to the current time, indicating the operation has started. + """ + self._start_time = datetime.now() + + def new_attempt(self): + """ + Initialize a new MetricAttemptTracer instance for the current metric operation. + + This method sets up a new MetricAttemptTracer instance, indicating a new attempt is being made within the metric operation. + """ + self._current_attempt = MetricAttemptTracer() + + +class MetricsTracer: + """ + This class computes generic metrics that can be observed in the lifecycle of an RPC operation. + + The responsibility of recording metrics should delegate to MetricsRecorder, hence this + class should not have any knowledge about the observability framework used for metrics recording. + """ + + _client_attributes: Dict[str, str] + _instrument_attempt_counter: Counter + _instrument_attempt_latency: Histogram + _instrument_operation_counter: Counter + _instrument_operation_latency: Histogram + current_op: MetricOpTracer + enabled: bool + method: str + + def __init__( + self, + enabled: bool, + instrument_attempt_latency: Histogram, + instrument_attempt_counter: Counter, + instrument_operation_latency: Histogram, + instrument_operation_counter: Counter, + client_attributes: Dict[str, str], + ): + """ + Initialize a MetricsTracer instance with the given parameters. + + This constructor initializes a MetricsTracer instance with the provided method name, enabled status, direct path enabled status, + instrumented metrics for attempt latency, attempt counter, operation latency, operation counter, and client attributes. + It sets up the necessary metrics tracing infrastructure for recording metrics related to RPC operations. + + Args: + enabled (bool): A flag indicating if metrics tracing is enabled. + instrument_attempt_latency (Histogram): The instrument for measuring attempt latency. + instrument_attempt_counter (Counter): The instrument for counting attempts. + instrument_operation_latency (Histogram): The instrument for measuring operation latency. + instrument_operation_counter (Counter): The instrument for counting operations. + client_attributes (dict[str, str]): A dictionary of client attributes used for metrics tracing. + """ + self.current_op = MetricOpTracer() + self._client_attributes = client_attributes + self._instrument_attempt_latency = instrument_attempt_latency + self._instrument_attempt_counter = instrument_attempt_counter + self._instrument_operation_latency = instrument_operation_latency + self._instrument_operation_counter = instrument_operation_counter + self.enabled = enabled + + @staticmethod + def _get_ms_time_diff(start: datetime, end: datetime) -> float: + """ + Calculate the time difference in milliseconds between two datetime objects. + + This method calculates the time difference between two datetime objects and returns the result in milliseconds. + This is useful for measuring the duration of operations or attempts for metrics tracing. + Note: total_seconds() returns a float value of seconds. + + Args: + start (datetime): The start datetime. + end (datetime): The end datetime. + + Returns: + float: The time difference in milliseconds. + """ + time_delta = end - start + return time_delta.total_seconds() * 1000 + + @property + def client_attributes(self) -> Dict[str, str]: + """ + Return a dictionary of client attributes used for metrics tracing. + + This property returns a dictionary containing client attributes such as project, instance, + instance configuration, location, client hash, client UID, client name, and database. + These attributes are used to provide context to the metrics being traced. + + Returns: + dict[str, str]: A dictionary of client attributes. + """ + return self._client_attributes + + @property + def instrument_attempt_counter(self) -> Counter: + """ + Return the instrument for counting attempts. + + This property returns the Counter instrument used to count the number of attempts made during RPC operations. + This metric is useful for tracking the frequency of attempts and can help identify patterns or issues in the operation flow. + + Returns: + Counter: The instrument for counting attempts. + """ + return self._instrument_attempt_counter + + @property + def instrument_attempt_latency(self) -> Histogram: + """ + Return the instrument for measuring attempt latency. + + This property returns the Histogram instrument used to measure the latency of individual attempts. + This metric is useful for tracking the performance of attempts and can help identify bottlenecks or issues in the operation flow. + + Returns: + Histogram: The instrument for measuring attempt latency. + """ + return self._instrument_attempt_latency + + @property + def instrument_operation_counter(self) -> Counter: + """ + Return the instrument for counting operations. + + This property returns the Counter instrument used to count the number of operations made during RPC operations. + This metric is useful for tracking the frequency of operations and can help identify patterns or issues in the operation flow. + + Returns: + Counter: The instrument for counting operations. + """ + return self._instrument_operation_counter + + @property + def instrument_operation_latency(self) -> Histogram: + """ + Return the instrument for measuring operation latency. + + This property returns the Histogram instrument used to measure the latency of operations. + This metric is useful for tracking the performance of operations and can help identify bottlenecks or issues in the operation flow. + + Returns: + Histogram: The instrument for measuring operation latency. + """ + return self._instrument_operation_latency + + def record_attempt_start(self) -> None: + """ + Record the start of a new attempt within the current operation. + + This method increments the attempt count for the current operation and marks the start of a new attempt. + It is used to track the number of attempts made during an operation and to identify the start of each attempt for metrics and tracing purposes. + """ + self.current_op.increment_attempt_count() + self.current_op.new_attempt() + + def record_attempt_completion(self, status: str = StatusCode.OK.name) -> None: + """ + Record the completion of an attempt within the current operation. + + This method updates the status of the current attempt to indicate its completion and records the latency of the attempt. + It calculates the elapsed time since the attempt started and uses this value to record the attempt latency metric. + This metric is useful for tracking the performance of individual attempts and can help identify bottlenecks or issues in the operation flow. + + If metrics tracing is not enabled, this method does not perform any operations. + """ + if not self.enabled: + return + self.current_op.current_attempt.status = status + + # Build Attributes + attempt_attributes = self._create_attempt_otel_attributes() + + # Calculate elapsed time + attempt_latency_ms = self._get_ms_time_diff( + start=self.current_op.current_attempt.start_time, end=datetime.now() + ) + + # Record attempt latency + self.instrument_attempt_latency.record( + amount=attempt_latency_ms, attributes=attempt_attributes + ) + + def record_operation_start(self) -> None: + """ + Record the start of a new operation. + + This method marks the beginning of a new operation and initializes the operation's metrics tracking. + It is used to track the start time of an operation, which is essential for calculating operation latency and other metrics. + If metrics tracing is not enabled, this method does not perform any operations. + """ + if not self.enabled: + return + self.current_op.start() + + def record_operation_completion(self) -> None: + """ + Record the completion of an operation. + + This method marks the end of an operation and updates the metrics accordingly. + It calculates the operation latency by measuring the time elapsed since the operation started and records this metric. + Additionally, it increments the operation count and records the attempt count for the operation. + If metrics tracing is not enabled, this method does not perform any operations. + """ + if not self.enabled: + return + end_time = datetime.now() + # Build Attributes + operation_attributes = self._create_operation_otel_attributes() + attempt_attributes = self._create_attempt_otel_attributes() + + # Calculate elapsed time + operation_latency_ms = self._get_ms_time_diff( + start=self.current_op.start_time, end=end_time + ) + + # Increase operation count + self.instrument_operation_counter.add(amount=1, attributes=operation_attributes) + + # Record operation latency + self.instrument_operation_latency.record( + amount=operation_latency_ms, attributes=operation_attributes + ) + + # Record Attempt Count + self.instrument_attempt_counter.add( + self.current_op.attempt_count, attributes=attempt_attributes + ) + + def _create_operation_otel_attributes(self) -> dict: + """ + Create additional attributes for operation metrics tracing. + + This method populates the client attributes dictionary with the operation status if metrics tracing is enabled. + It returns the updated client attributes dictionary. + """ + if not self.enabled: + return {} + + self._client_attributes[METRIC_LABEL_KEY_STATUS] = self.current_op.status + return self._client_attributes + + def _create_attempt_otel_attributes(self) -> dict: + """ + Create additional attributes for attempt metrics tracing. + + This method populates the attributes dictionary with the attempt status if metrics tracing is enabled and an attempt exists. + It returns the updated attributes dictionary. + """ + if not self.enabled: + return {} + + attributes = {} + # Short circuit out if we don't have an attempt + if self.current_op.current_attempt is not None: + attributes[METRIC_LABEL_KEY_STATUS] = self.current_op.current_attempt.status + + return attributes + + def set_project(self, project: str) -> "MetricsTracer": + """ + Set the project attribute for metrics tracing. + + This method updates the project attribute in the client attributes dictionary for metrics tracing purposes. + If the project attribute already has a value, this method does nothing and returns. + + :param project: The project name to set. + :return: This instance of MetricsTracer for method chaining. + """ + if MONITORED_RES_LABEL_KEY_PROJECT not in self._client_attributes: + self._client_attributes[MONITORED_RES_LABEL_KEY_PROJECT] = project + return self + + def set_instance(self, instance: str) -> "MetricsTracer": + """ + Set the instance attribute for metrics tracing. + + This method updates the instance attribute in the client attributes dictionary for metrics tracing purposes. + If the instance attribute already has a value, this method does nothing and returns. + + :param instance: The instance name to set. + :return: This instance of MetricsTracer for method chaining. + """ + if MONITORED_RES_LABEL_KEY_INSTANCE not in self._client_attributes: + self._client_attributes[MONITORED_RES_LABEL_KEY_INSTANCE] = instance + return self + + def set_instance_config(self, instance_config: str) -> "MetricsTracer": + """ + Set the instance configuration attribute for metrics tracing. + + This method updates the instance configuration attribute in the client attributes dictionary for metrics tracing purposes. + If the instance configuration attribute already has a value, this method does nothing and returns. + + :param instance_config: The instance configuration name to set. + :return: This instance of MetricsTracer for method chaining. + """ + if MONITORED_RES_LABEL_KEY_INSTANCE_CONFIG not in self._client_attributes: + self._client_attributes[ + MONITORED_RES_LABEL_KEY_INSTANCE_CONFIG + ] = instance_config + return self + + def set_location(self, location: str) -> "MetricsTracer": + """ + Set the location attribute for metrics tracing. + + This method updates the location attribute in the client attributes dictionary for metrics tracing purposes. + If the location attribute already has a value, this method does nothing and returns. + + :param location: The location name to set. + :return: This instance of MetricsTracer for method chaining. + """ + if MONITORED_RES_LABEL_KEY_LOCATION not in self._client_attributes: + self._client_attributes[MONITORED_RES_LABEL_KEY_LOCATION] = location + return self + + def set_client_hash(self, hash: str) -> "MetricsTracer": + """ + Set the client hash attribute for metrics tracing. + + This method updates the client hash attribute in the client attributes dictionary for metrics tracing purposes. + If the client hash attribute already has a value, this method does nothing and returns. + + :param hash: The client hash to set. + :return: This instance of MetricsTracer for method chaining. + """ + if MONITORED_RES_LABEL_KEY_CLIENT_HASH not in self._client_attributes: + self._client_attributes[MONITORED_RES_LABEL_KEY_CLIENT_HASH] = hash + return self + + def set_client_uid(self, client_uid: str) -> "MetricsTracer": + """ + Set the client UID attribute for metrics tracing. + + This method updates the client UID attribute in the client attributes dictionary for metrics tracing purposes. + If the client UID attribute already has a value, this method does nothing and returns. + + :param client_uid: The client UID to set. + :return: This instance of MetricsTracer for method chaining. + """ + if METRIC_LABEL_KEY_CLIENT_UID not in self._client_attributes: + self._client_attributes[METRIC_LABEL_KEY_CLIENT_UID] = client_uid + return self + + def set_client_name(self, client_name: str) -> "MetricsTracer": + """ + Set the client name attribute for metrics tracing. + + This method updates the client name attribute in the client attributes dictionary for metrics tracing purposes. + If the client name attribute already has a value, this method does nothing and returns. + + :param client_name: The client name to set. + :return: This instance of MetricsTracer for method chaining. + """ + if METRIC_LABEL_KEY_CLIENT_NAME not in self._client_attributes: + self._client_attributes[METRIC_LABEL_KEY_CLIENT_NAME] = client_name + return self + + def set_database(self, database: str) -> "MetricsTracer": + """ + Set the database attribute for metrics tracing. + + This method updates the database attribute in the client attributes dictionary for metrics tracing purposes. + If the database attribute already has a value, this method does nothing and returns. + + :param database: The database name to set. + :return: This instance of MetricsTracer for method chaining. + """ + if METRIC_LABEL_KEY_DATABASE not in self._client_attributes: + self._client_attributes[METRIC_LABEL_KEY_DATABASE] = database + return self + + def set_method(self, method: str) -> "MetricsTracer": + """ + Set the method attribute for metrics tracing. + + This method updates the method attribute in the client attributes dictionary for metrics tracing purposes. + If the database attribute already has a value, this method does nothing and returns. + + :param method: The method name to set. + :return: This instance of MetricsTracer for method chaining. + """ + if METRIC_LABEL_KEY_METHOD not in self._client_attributes: + self.client_attributes[METRIC_LABEL_KEY_METHOD] = method + return self + + def enable_direct_path(self, enable: bool = False) -> "MetricsTracer": + """ + Enable or disable the direct path for metrics tracing. + + This method updates the direct path enabled attribute in the client attributes dictionary for metrics tracing purposes. + If the direct path enabled attribute already has a value, this method does nothing and returns. + + :param enable: Boolean indicating whether to enable the direct path. + :return: This instance of MetricsTracer for method chaining. + """ + if METRIC_LABEL_KEY_DIRECT_PATH_ENABLED not in self._client_attributes: + self._client_attributes[METRIC_LABEL_KEY_DIRECT_PATH_ENABLED] = str(enable) + return self diff --git a/google/cloud/spanner_v1/metrics/metrics_tracer_factory.py b/google/cloud/spanner_v1/metrics/metrics_tracer_factory.py new file mode 100644 index 0000000000..f7a4088019 --- /dev/null +++ b/google/cloud/spanner_v1/metrics/metrics_tracer_factory.py @@ -0,0 +1,309 @@ +# -*- coding: utf-8 -*- +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Factory for creating MetricTracer instances, facilitating metrics collection and tracing.""" + +from google.cloud.spanner_v1.metrics.metrics_tracer import MetricsTracer + +from google.cloud.spanner_v1.metrics.constants import ( + METRIC_NAME_OPERATION_LATENCIES, + MONITORED_RES_LABEL_KEY_PROJECT, + METRIC_NAME_ATTEMPT_LATENCIES, + METRIC_NAME_OPERATION_COUNT, + METRIC_NAME_ATTEMPT_COUNT, + MONITORED_RES_LABEL_KEY_INSTANCE, + MONITORED_RES_LABEL_KEY_INSTANCE_CONFIG, + MONITORED_RES_LABEL_KEY_LOCATION, + MONITORED_RES_LABEL_KEY_CLIENT_HASH, + METRIC_LABEL_KEY_CLIENT_UID, + METRIC_LABEL_KEY_CLIENT_NAME, + METRIC_LABEL_KEY_DATABASE, + METRIC_LABEL_KEY_DIRECT_PATH_ENABLED, + BUILT_IN_METRICS_METER_NAME, +) + +from typing import Dict + +try: + from opentelemetry.metrics import Counter, Histogram, get_meter_provider + + HAS_OPENTELEMETRY_INSTALLED = True +except ImportError: # pragma: NO COVER + HAS_OPENTELEMETRY_INSTALLED = False + +from google.cloud.spanner_v1 import __version__ + + +class MetricsTracerFactory: + """Factory class for creating MetricTracer instances. This class facilitates the creation of MetricTracer objects, which are responsible for collecting and tracing metrics.""" + + enabled: bool + _instrument_attempt_latency: Histogram + _instrument_attempt_counter: Counter + _instrument_operation_latency: Histogram + _instrument_operation_counter: Counter + _client_attributes: Dict[str, str] + + @property + def instrument_attempt_latency(self) -> Histogram: + return self._instrument_attempt_latency + + @property + def instrument_attempt_counter(self) -> Counter: + return self._instrument_attempt_counter + + @property + def instrument_operation_latency(self) -> Histogram: + return self._instrument_operation_latency + + @property + def instrument_operation_counter(self) -> Counter: + return self._instrument_operation_counter + + def __init__(self, enabled: bool, service_name: str): + """Initialize a MetricsTracerFactory instance with the given parameters. + + This constructor initializes a MetricsTracerFactory instance with the provided service name, project, instance, instance configuration, location, client hash, client UID, client name, and database. It sets up the necessary metric instruments and client attributes for metrics tracing. + + Args: + service_name (str): The name of the service for which metrics are being traced. + project (str): The project ID for the monitored resource. + """ + self.enabled = enabled + self._create_metric_instruments(service_name) + self._client_attributes = {} + + @property + def client_attributes(self) -> Dict[str, str]: + """Return a dictionary of client attributes used for metrics tracing. + + This property returns a dictionary containing client attributes such as project, instance, + instance configuration, location, client hash, client UID, client name, and database. + These attributes are used to provide context to the metrics being traced. + + Returns: + dict[str, str]: A dictionary of client attributes. + """ + return self._client_attributes + + def set_project(self, project: str) -> "MetricsTracerFactory": + """Set the project attribute for metrics tracing. + + This method updates the client attributes dictionary with the provided project name. + The project name is used to identify the project for which metrics are being traced + and is passed to the created MetricsTracer. + + Args: + project (str): The name of the project for metrics tracing. + + Returns: + MetricsTracerFactory: The current instance of MetricsTracerFactory to enable method chaining. + """ + self._client_attributes[MONITORED_RES_LABEL_KEY_PROJECT] = project + return self + + def set_instance(self, instance: str) -> "MetricsTracerFactory": + """Set the instance attribute for metrics tracing. + + This method updates the client attributes dictionary with the provided instance name. + The instance name is used to identify the instance for which metrics are being traced + and is passed to the created MetricsTracer. + + Args: + instance (str): The name of the instance for metrics tracing. + + Returns: + MetricsTracerFactory: The current instance of MetricsTracerFactory to enable method chaining. + """ + self._client_attributes[MONITORED_RES_LABEL_KEY_INSTANCE] = instance + return self + + def set_instance_config(self, instance_config: str) -> "MetricsTracerFactory": + """Sets the instance configuration attribute for metrics tracing. + + This method updates the client attributes dictionary with the provided instance configuration. + The instance configuration is used to identify the configuration of the instance for which + metrics are being traced and is passed to the created MetricsTracer. + + Args: + instance_config (str): The configuration of the instance for metrics tracing. + + Returns: + MetricsTracerFactory: The current instance of MetricsTracerFactory to enable method chaining. + """ + self._client_attributes[ + MONITORED_RES_LABEL_KEY_INSTANCE_CONFIG + ] = instance_config + return self + + def set_location(self, location: str) -> "MetricsTracerFactory": + """Set the location attribute for metrics tracing. + + This method updates the client attributes dictionary with the provided location. + The location is used to identify the location for which metrics are being traced + and is passed to the created MetricsTracer. + + Args: + location (str): The location for metrics tracing. + + Returns: + MetricsTracerFactory: The current instance of MetricsTracerFactory to enable method chaining. + """ + self._client_attributes[MONITORED_RES_LABEL_KEY_LOCATION] = location + return self + + def set_client_hash(self, hash: str) -> "MetricsTracerFactory": + """Set the client hash attribute for metrics tracing. + + This method updates the client attributes dictionary with the provided client hash. + The client hash is used to identify the client for which metrics are being traced + and is passed to the created MetricsTracer. + + Args: + hash (str): The hash of the client for metrics tracing. + + Returns: + MetricsTracerFactory: The current instance of MetricsTracerFactory to enable method chaining. + """ + self._client_attributes[MONITORED_RES_LABEL_KEY_CLIENT_HASH] = hash + return self + + def set_client_uid(self, client_uid: str) -> "MetricsTracerFactory": + """Set the client UID attribute for metrics tracing. + + This method updates the client attributes dictionary with the provided client UID. + The client UID is used to identify the client for which metrics are being traced + and is passed to the created MetricsTracer. + + Args: + client_uid (str): The UID of the client for metrics tracing. + + Returns: + MetricsTracerFactory: The current instance of MetricsTracerFactory to enable method chaining. + """ + self._client_attributes[METRIC_LABEL_KEY_CLIENT_UID] = client_uid + return self + + def set_client_name(self, client_name: str) -> "MetricsTracerFactory": + """Set the client name attribute for metrics tracing. + + This method updates the client attributes dictionary with the provided client name. + The client name is used to identify the client for which metrics are being traced + and is passed to the created MetricsTracer. + + Args: + client_name (str): The name of the client for metrics tracing. + + Returns: + MetricsTracerFactory: The current instance of MetricsTracerFactory to enable method chaining. + """ + self._client_attributes[METRIC_LABEL_KEY_CLIENT_NAME] = client_name + return self + + def set_database(self, database: str) -> "MetricsTracerFactory": + """Set the database attribute for metrics tracing. + + This method updates the client attributes dictionary with the provided database name. + The database name is used to identify the database for which metrics are being traced + and is passed to the created MetricsTracer. + + Args: + database (str): The name of the database for metrics tracing. + + Returns: + MetricsTracerFactory: The current instance of MetricsTracerFactory to enable method chaining. + """ + self._client_attributes[METRIC_LABEL_KEY_DATABASE] = database + return self + + def enable_direct_path(self, enable: bool = False) -> "MetricsTracerFactory": + """Enable or disable the direct path for metrics tracing. + + This method updates the client attributes dictionary with the provided enable status. + The direct path enabled status is used to determine whether to use the direct path for metrics tracing + and is passed to the created MetricsTracer. + + Args: + enable (bool, optional): Whether to enable the direct path for metrics tracing. Defaults to False. + + Returns: + MetricsTracerFactory: The current instance of MetricsTracerFactory to enable method chaining. + """ + self._client_attributes[METRIC_LABEL_KEY_DIRECT_PATH_ENABLED] = enable + return self + + def create_metrics_tracer(self) -> MetricsTracer: + """ + Create and return a MetricsTracer instance with default settings and client attributes. + + This method initializes a MetricsTracer instance with default settings for metrics tracing, + including metrics tracing enabled if OpenTelemetry is installed and the direct path disabled by default. + It also sets the client attributes based on the factory's configuration. + + Returns: + MetricsTracer: A MetricsTracer instance with default settings and client attributes. + """ + metrics_tracer = MetricsTracer( + enabled=self.enabled and HAS_OPENTELEMETRY_INSTALLED, + instrument_attempt_latency=self._instrument_attempt_latency, + instrument_attempt_counter=self._instrument_attempt_counter, + instrument_operation_latency=self._instrument_operation_latency, + instrument_operation_counter=self._instrument_operation_counter, + client_attributes=self._client_attributes.copy(), + ) + return metrics_tracer + + def _create_metric_instruments(self, service_name: str) -> None: + """ + Creates and sets up metric instruments for the given service name. + + This method initializes and configures metric instruments for attempt latency, attempt counter, + operation latency, and operation counter. These instruments are used to measure and track + metrics related to attempts and operations within the service. + + Args: + service_name (str): The name of the service for which metric instruments are being created. + """ + if not HAS_OPENTELEMETRY_INSTALLED: # pragma: NO COVER + return + + meter_provider = get_meter_provider() + meter = meter_provider.get_meter( + name=BUILT_IN_METRICS_METER_NAME, version=__version__ + ) + + self._instrument_attempt_latency = meter.create_histogram( + name=METRIC_NAME_ATTEMPT_LATENCIES, + unit="ms", + description="Time an individual attempt took.", + ) + + self._instrument_attempt_counter = meter.create_counter( + name=METRIC_NAME_ATTEMPT_COUNT, + unit="1", + description="Number of attempts.", + ) + + self._instrument_operation_latency = meter.create_histogram( + name=METRIC_NAME_OPERATION_LATENCIES, + unit="ms", + description="Total time until final operation success or failure, including retries and backoff.", + ) + + self._instrument_operation_counter = meter.create_counter( + name=METRIC_NAME_OPERATION_COUNT, + unit="1", + description="Number of operations.", + ) diff --git a/tests/unit/test_metrics_tracer.py b/tests/unit/test_metrics_tracer.py new file mode 100644 index 0000000000..9b59c59a7c --- /dev/null +++ b/tests/unit/test_metrics_tracer.py @@ -0,0 +1,224 @@ +# -*- coding: utf-8 -*- +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from google.cloud.spanner_v1.metrics.metrics_tracer import MetricsTracer, MetricOpTracer +import mock +from opentelemetry.metrics import Counter, Histogram +from datetime import datetime + +pytest.importorskip("opentelemetry") + + +@pytest.fixture +def metrics_tracer(): + mock_attempt_counter = mock.create_autospec(Counter, instance=True) + mock_attempt_latency = mock.create_autospec(Histogram, instance=True) + mock_operation_counter = mock.create_autospec(Counter, instance=True) + mock_operation_latency = mock.create_autospec(Histogram, instance=True) + return MetricsTracer( + enabled=True, + instrument_attempt_latency=mock_attempt_latency, + instrument_attempt_counter=mock_attempt_counter, + instrument_operation_latency=mock_operation_latency, + instrument_operation_counter=mock_operation_counter, + client_attributes={"project_id": "test_project"}, + ) + + +def test_record_attempt_start(metrics_tracer): + metrics_tracer.record_attempt_start() + assert metrics_tracer.current_op.current_attempt is not None + assert metrics_tracer.current_op.current_attempt.start_time is not None + assert metrics_tracer.current_op.attempt_count == 1 + + +def test_record_operation_start(metrics_tracer): + metrics_tracer.record_operation_start() + assert metrics_tracer.current_op.start_time is not None + + +def test_record_attempt_completion(metrics_tracer): + metrics_tracer.record_attempt_start() + metrics_tracer.record_attempt_completion() + assert metrics_tracer.current_op.current_attempt.status == "OK" + + +def test_record_operation_completion(metrics_tracer): + metrics_tracer.record_operation_start() + metrics_tracer.record_attempt_start() + metrics_tracer.record_attempt_completion() + metrics_tracer.record_operation_completion() + assert metrics_tracer.instrument_attempt_counter.add.call_count == 1 + assert metrics_tracer.instrument_attempt_latency.record.call_count == 1 + assert metrics_tracer.instrument_operation_latency.record.call_count == 1 + assert metrics_tracer.instrument_operation_counter.add.call_count == 1 + + +def test_atempt_otel_attributes(metrics_tracer): + from google.cloud.spanner_v1.metrics.constants import ( + METRIC_LABEL_KEY_DIRECT_PATH_USED, + ) + + metrics_tracer.current_op._current_attempt = None + attributes = metrics_tracer._create_attempt_otel_attributes() + assert METRIC_LABEL_KEY_DIRECT_PATH_USED not in attributes + + +def test_disabled(metrics_tracer): + mock_operation = mock.create_autospec(MetricOpTracer, instance=True) + metrics_tracer.enabled = False + metrics_tracer._current_op = mock_operation + + # Attempt start should be skipped + metrics_tracer.record_attempt_start() + assert mock_operation.new_attempt.call_count == 0 + + # Attempt completion should also be skipped + metrics_tracer.record_attempt_completion() + assert metrics_tracer.instrument_attempt_latency.record.call_count == 0 + + # Operation start should be skipped + metrics_tracer.record_operation_start() + assert mock_operation.start.call_count == 0 + + # Operation completion should also skip all metric logic + metrics_tracer.record_operation_completion() + assert metrics_tracer.instrument_attempt_counter.add.call_count == 0 + assert metrics_tracer.instrument_operation_latency.record.call_count == 0 + assert metrics_tracer.instrument_operation_counter.add.call_count == 0 + assert not metrics_tracer._create_operation_otel_attributes() + assert not metrics_tracer._create_attempt_otel_attributes() + + +def test_get_ms_time_diff(): + # Create two datetime objects + start_time = datetime(2025, 1, 1, 12, 0, 0) + end_time = datetime(2025, 1, 1, 12, 0, 1) # 1 second later + + # Calculate expected milliseconds difference + expected_diff = 1000.0 # 1 second in milliseconds + + # Call the static method + actual_diff = MetricsTracer._get_ms_time_diff(start_time, end_time) + + # Assert the expected and actual values are equal + assert actual_diff == expected_diff + + +def test_get_ms_time_diff_negative(): + # Create two datetime objects where end is before start + start_time = datetime(2025, 1, 1, 12, 0, 1) + end_time = datetime(2025, 1, 1, 12, 0, 0) # 1 second earlier + + # Calculate expected milliseconds difference + expected_diff = -1000.0 # -1 second in milliseconds + + # Call the static method + actual_diff = MetricsTracer._get_ms_time_diff(start_time, end_time) + + # Assert the expected and actual values are equal + assert actual_diff == expected_diff + + +def test_set_project(metrics_tracer): + metrics_tracer.set_project("test_project") + assert metrics_tracer.client_attributes["project_id"] == "test_project" + + # Ensure it does not overwrite + metrics_tracer.set_project("new_project") + assert metrics_tracer.client_attributes["project_id"] == "test_project" + + +def test_set_instance(metrics_tracer): + metrics_tracer.set_instance("test_instance") + assert metrics_tracer.client_attributes["instance_id"] == "test_instance" + + # Ensure it does not overwrite + metrics_tracer.set_instance("new_instance") + assert metrics_tracer.client_attributes["instance_id"] == "test_instance" + + +def test_set_instance_config(metrics_tracer): + metrics_tracer.set_instance_config("test_config") + assert metrics_tracer.client_attributes["instance_config"] == "test_config" + + # Ensure it does not overwrite + metrics_tracer.set_instance_config("new_config") + assert metrics_tracer.client_attributes["instance_config"] == "test_config" + + +def test_set_location(metrics_tracer): + metrics_tracer.set_location("test_location") + assert metrics_tracer.client_attributes["location"] == "test_location" + + # Ensure it does not overwrite + metrics_tracer.set_location("new_location") + assert metrics_tracer.client_attributes["location"] == "test_location" + + +def test_set_client_hash(metrics_tracer): + metrics_tracer.set_client_hash("test_hash") + assert metrics_tracer.client_attributes["client_hash"] == "test_hash" + + # Ensure it does not overwrite + metrics_tracer.set_client_hash("new_hash") + assert metrics_tracer.client_attributes["client_hash"] == "test_hash" + + +def test_set_client_uid(metrics_tracer): + metrics_tracer.set_client_uid("test_uid") + assert metrics_tracer.client_attributes["client_uid"] == "test_uid" + + # Ensure it does not overwrite + metrics_tracer.set_client_uid("new_uid") + assert metrics_tracer.client_attributes["client_uid"] == "test_uid" + + +def test_set_client_name(metrics_tracer): + metrics_tracer.set_client_name("test_name") + assert metrics_tracer.client_attributes["client_name"] == "test_name" + + # Ensure it does not overwrite + metrics_tracer.set_client_name("new_name") + assert metrics_tracer.client_attributes["client_name"] == "test_name" + + +def test_set_database(metrics_tracer): + metrics_tracer.set_database("test_db") + assert metrics_tracer.client_attributes["database"] == "test_db" + + # Ensure it does not overwrite + metrics_tracer.set_database("new_db") + assert metrics_tracer.client_attributes["database"] == "test_db" + + +def test_enable_direct_path(metrics_tracer): + metrics_tracer.enable_direct_path(True) + assert metrics_tracer.client_attributes["directpath_enabled"] == "True" + + # Ensure it does not overwrite + metrics_tracer.enable_direct_path(False) + assert metrics_tracer.client_attributes["directpath_enabled"] == "True" + + +def test_set_method(metrics_tracer): + metrics_tracer.set_method("test_method") + assert metrics_tracer.client_attributes["method"] == "test_method" + + # Ensure it does not overwrite + metrics_tracer.set_method("new_method") + assert metrics_tracer.client_attributes["method"] == "test_method" diff --git a/tests/unit/test_metrics_tracer_factory.py b/tests/unit/test_metrics_tracer_factory.py new file mode 100644 index 0000000000..637bc4c06a --- /dev/null +++ b/tests/unit/test_metrics_tracer_factory.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from google.cloud.spanner_v1.metrics.metrics_tracer_factory import MetricsTracerFactory +from google.cloud.spanner_v1.metrics.metrics_tracer import MetricsTracer + +pytest.importorskip("opentelemetry") + + +@pytest.fixture +def metrics_tracer_factory(): + factory = MetricsTracerFactory( + enabled=True, + service_name="test_service", + ) + factory.set_project("test_project").set_instance( + "test_instance" + ).set_instance_config("test_config").set_location("test_location").set_client_hash( + "test_hash" + ).set_client_uid( + "test_uid" + ).set_client_name( + "test_name" + ).set_database( + "test_db" + ).enable_direct_path( + False + ) + return factory + + +def test_initialization(metrics_tracer_factory): + assert metrics_tracer_factory.enabled is True + assert metrics_tracer_factory.client_attributes["project_id"] == "test_project" + + +def test_create_metrics_tracer(metrics_tracer_factory): + tracer = metrics_tracer_factory.create_metrics_tracer() + assert isinstance(tracer, MetricsTracer) + + +def test_client_attributes(metrics_tracer_factory): + attributes = metrics_tracer_factory.client_attributes + assert attributes["project_id"] == "test_project" + assert attributes["instance_id"] == "test_instance" From c9d530727649e15a1f661261da3ee07b39821d14 Mon Sep 17 00:00:00 2001 From: Sakthivel Subramanian <179120858+sakthivelmanii@users.noreply.github.com> Date: Wed, 29 Jan 2025 22:33:09 +0530 Subject: [PATCH 17/19] chore(spanner): Update CODEOWNERS (#1304) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(spanner): Update CODEOWNERS * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .github/CODEOWNERS | 8 ++++---- .github/blunderbuss.yml | 6 +++--- .repo-metadata.json | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index c18f5b0b26..07f48edc31 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -5,8 +5,8 @@ # https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax # Note: This file is autogenerated. To make changes to the codeowner team, please update .repo-metadata.json. -# @googleapis/yoshi-python @googleapis/api-spanner-python are the default owners for changes in this repo -* @googleapis/yoshi-python @googleapis/api-spanner-python +# @googleapis/yoshi-python @googleapis/spanner-client-libraries-python are the default owners for changes in this repo +* @googleapis/yoshi-python @googleapis/spanner-client-libraries-python -# @googleapis/python-samples-reviewers @googleapis/api-spanner-python are the default owners for samples changes -/samples/ @googleapis/python-samples-reviewers @googleapis/api-spanner-python +# @googleapis/python-samples-reviewers @googleapis/spanner-client-libraries-python are the default owners for samples changes +/samples/ @googleapis/python-samples-reviewers @googleapis/spanner-client-libraries-python diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml index b0615bb8c2..97a6f7439f 100644 --- a/.github/blunderbuss.yml +++ b/.github/blunderbuss.yml @@ -4,14 +4,14 @@ # Note: This file is autogenerated. To make changes to the assignee # team, please update `codeowner_team` in `.repo-metadata.json`. assign_issues: - - googleapis/api-spanner-python + - googleapis/spanner-client-libraries-python assign_issues_by: - labels: - "samples" to: - googleapis/python-samples-reviewers - - googleapis/api-spanner-python + - googleapis/spanner-client-libraries-python assign_prs: - - googleapis/api-spanner-python + - googleapis/spanner-client-libraries-python diff --git a/.repo-metadata.json b/.repo-metadata.json index 9fccb137ca..9569af6e31 100644 --- a/.repo-metadata.json +++ b/.repo-metadata.json @@ -12,7 +12,7 @@ "api_id": "spanner.googleapis.com", "requires_billing": true, "default_version": "v1", - "codeowner_team": "@googleapis/api-spanner-python", + "codeowner_team": "@googleapis/spanner-client-libraries-python", "api_shortname": "spanner", "api_description": "is a fully managed, mission-critical, \nrelational database service that offers transactional consistency at global scale, \nschemas, SQL (ANSI 2011 with extensions), and automatic, synchronous replication \nfor high availability.\n\nBe sure to activate the Cloud Spanner API on the Developer's Console to\nuse Cloud Spanner from your project." } From 0839f982a3e7f5142825d10c440005a39cdb39cb Mon Sep 17 00:00:00 2001 From: Sri Harsha CH <57220027+harshachinta@users.noreply.github.com> Date: Wed, 19 Feb 2025 11:23:08 +0530 Subject: [PATCH 18/19] feat: add GCP standard otel attributes for python client (#1308) * chore: add standard otel attributes for GCP python client lib * chore: test fixes * chore: fix tests * chore: test fix * chore: test fixes --- google/cloud/spanner_v1/_opentelemetry_tracing.py | 4 ++++ tests/system/test_session_api.py | 3 +++ tests/unit/test__opentelemetry_tracing.py | 7 +++++++ tests/unit/test_batch.py | 4 ++++ tests/unit/test_pool.py | 10 ++++++++++ tests/unit/test_session.py | 4 ++++ tests/unit/test_snapshot.py | 13 +++++-------- tests/unit/test_transaction.py | 4 ++++ 8 files changed, 41 insertions(+), 8 deletions(-) diff --git a/google/cloud/spanner_v1/_opentelemetry_tracing.py b/google/cloud/spanner_v1/_opentelemetry_tracing.py index e80ddc97ee..5ce23cab74 100644 --- a/google/cloud/spanner_v1/_opentelemetry_tracing.py +++ b/google/cloud/spanner_v1/_opentelemetry_tracing.py @@ -93,6 +93,10 @@ def trace_call(name, session=None, extra_attributes=None, observability_options= "net.host.name": SpannerClient.DEFAULT_ENDPOINT, OTEL_SCOPE_NAME: TRACER_NAME, OTEL_SCOPE_VERSION: TRACER_VERSION, + # Standard GCP attributes for OTel, attributes are used for internal purpose and are subjected to change + "gcp.client.service": "spanner", + "gcp.client.version": TRACER_VERSION, + "gcp.client.repo": "googleapis/python-spanner", } if extra_attributes: diff --git a/tests/system/test_session_api.py b/tests/system/test_session_api.py index d2a86c8ddf..4de0e681f6 100644 --- a/tests/system/test_session_api.py +++ b/tests/system/test_session_api.py @@ -345,6 +345,9 @@ def _make_attributes(db_instance, **kwargs): "db.url": "spanner.googleapis.com", "net.host.name": "spanner.googleapis.com", "db.instance": db_instance, + "gcp.client.service": "spanner", + "gcp.client.version": ot_helpers.LIB_VERSION, + "gcp.client.repo": "googleapis/python-spanner", } ot_helpers.enrich_with_otel_scope(attributes) diff --git a/tests/unit/test__opentelemetry_tracing.py b/tests/unit/test__opentelemetry_tracing.py index 884928a279..b3d49355c0 100644 --- a/tests/unit/test__opentelemetry_tracing.py +++ b/tests/unit/test__opentelemetry_tracing.py @@ -14,6 +14,7 @@ from tests._helpers import ( OpenTelemetryBase, + LIB_VERSION, HAS_OPENTELEMETRY_INSTALLED, enrich_with_otel_scope, ) @@ -64,6 +65,9 @@ def test_trace_call(self): "db.type": "spanner", "db.url": "spanner.googleapis.com", "net.host.name": "spanner.googleapis.com", + "gcp.client.service": "spanner", + "gcp.client.version": LIB_VERSION, + "gcp.client.repo": "googleapis/python-spanner", } ) expected_attributes.update(extra_attributes) @@ -91,6 +95,9 @@ def test_trace_error(self): "db.type": "spanner", "db.url": "spanner.googleapis.com", "net.host.name": "spanner.googleapis.com", + "gcp.client.service": "spanner", + "gcp.client.version": LIB_VERSION, + "gcp.client.repo": "googleapis/python-spanner", } ) expected_attributes.update(extra_attributes) diff --git a/tests/unit/test_batch.py b/tests/unit/test_batch.py index eb5069b497..ff05bf6307 100644 --- a/tests/unit/test_batch.py +++ b/tests/unit/test_batch.py @@ -17,6 +17,7 @@ from unittest.mock import MagicMock from tests._helpers import ( OpenTelemetryBase, + LIB_VERSION, StatusCode, enrich_with_otel_scope, ) @@ -33,6 +34,9 @@ "db.url": "spanner.googleapis.com", "db.instance": "testing", "net.host.name": "spanner.googleapis.com", + "gcp.client.service": "spanner", + "gcp.client.version": LIB_VERSION, + "gcp.client.repo": "googleapis/python-spanner", } enrich_with_otel_scope(BASE_ATTRIBUTES) diff --git a/tests/unit/test_pool.py b/tests/unit/test_pool.py index 9b5d2c9885..a9593b3651 100644 --- a/tests/unit/test_pool.py +++ b/tests/unit/test_pool.py @@ -22,6 +22,7 @@ from google.cloud.spanner_v1._opentelemetry_tracing import trace_call from tests._helpers import ( OpenTelemetryBase, + LIB_VERSION, StatusCode, enrich_with_otel_scope, HAS_OPENTELEMETRY_INSTALLED, @@ -147,6 +148,9 @@ class TestFixedSizePool(OpenTelemetryBase): "db.url": "spanner.googleapis.com", "db.instance": "name", "net.host.name": "spanner.googleapis.com", + "gcp.client.service": "spanner", + "gcp.client.version": LIB_VERSION, + "gcp.client.repo": "googleapis/python-spanner", } enrich_with_otel_scope(BASE_ATTRIBUTES) @@ -483,6 +487,9 @@ class TestBurstyPool(OpenTelemetryBase): "db.url": "spanner.googleapis.com", "db.instance": "name", "net.host.name": "spanner.googleapis.com", + "gcp.client.service": "spanner", + "gcp.client.version": LIB_VERSION, + "gcp.client.repo": "googleapis/python-spanner", } enrich_with_otel_scope(BASE_ATTRIBUTES) @@ -721,6 +728,9 @@ class TestPingingPool(OpenTelemetryBase): "db.url": "spanner.googleapis.com", "db.instance": "name", "net.host.name": "spanner.googleapis.com", + "gcp.client.service": "spanner", + "gcp.client.version": LIB_VERSION, + "gcp.client.repo": "googleapis/python-spanner", } enrich_with_otel_scope(BASE_ATTRIBUTES) diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index 55c91435f8..ff8e9dad12 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -19,6 +19,7 @@ import mock from tests._helpers import ( OpenTelemetryBase, + LIB_VERSION, StatusCode, enrich_with_otel_scope, ) @@ -46,6 +47,9 @@ class TestSession(OpenTelemetryBase): "db.url": "spanner.googleapis.com", "db.instance": DATABASE_NAME, "net.host.name": "spanner.googleapis.com", + "gcp.client.service": "spanner", + "gcp.client.version": LIB_VERSION, + "gcp.client.repo": "googleapis/python-spanner", } enrich_with_otel_scope(BASE_ATTRIBUTES) diff --git a/tests/unit/test_snapshot.py b/tests/unit/test_snapshot.py index 02cc35e017..6dc14fb7cd 100644 --- a/tests/unit/test_snapshot.py +++ b/tests/unit/test_snapshot.py @@ -19,6 +19,7 @@ from google.cloud.spanner_v1 import RequestOptions, DirectedReadOptions from tests._helpers import ( OpenTelemetryBase, + LIB_VERSION, StatusCode, HAS_OPENTELEMETRY_INSTALLED, enrich_with_otel_scope, @@ -46,6 +47,9 @@ "db.url": "spanner.googleapis.com", "db.instance": "testing", "net.host.name": "spanner.googleapis.com", + "gcp.client.service": "spanner", + "gcp.client.version": LIB_VERSION, + "gcp.client.repo": "googleapis/python-spanner", } enrich_with_otel_scope(BASE_ATTRIBUTES) @@ -533,14 +537,7 @@ def test_iteration_w_multiple_span_creation(self): self.assertEqual(span.name, name) self.assertEqual( dict(span.attributes), - enrich_with_otel_scope( - { - "db.type": "spanner", - "db.url": "spanner.googleapis.com", - "db.instance": "testing", - "net.host.name": "spanner.googleapis.com", - } - ), + enrich_with_otel_scope(BASE_ATTRIBUTES), ) diff --git a/tests/unit/test_transaction.py b/tests/unit/test_transaction.py index 9707632421..d355d283fe 100644 --- a/tests/unit/test_transaction.py +++ b/tests/unit/test_transaction.py @@ -23,6 +23,7 @@ from tests._helpers import ( HAS_OPENTELEMETRY_INSTALLED, + LIB_VERSION, OpenTelemetryBase, StatusCode, enrich_with_otel_scope, @@ -62,6 +63,9 @@ class TestTransaction(OpenTelemetryBase): "db.url": "spanner.googleapis.com", "db.instance": "testing", "net.host.name": "spanner.googleapis.com", + "gcp.client.service": "spanner", + "gcp.client.version": LIB_VERSION, + "gcp.client.repo": "googleapis/python-spanner", } enrich_with_otel_scope(BASE_ATTRIBUTES) From a6be0eb74a3001c6536c26c6861d3c1636e65321 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 20 Feb 2025 12:53:10 +0530 Subject: [PATCH 19/19] chore(main): release 3.52.0 (#1258) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- .release-please-manifest.json | 2 +- CHANGELOG.md | 21 +++++++++++++++++++ .../gapic_version.py | 2 +- .../gapic_version.py | 2 +- google/cloud/spanner_v1/gapic_version.py | 2 +- ...data_google.spanner.admin.database.v1.json | 2 +- ...data_google.spanner.admin.instance.v1.json | 2 +- .../snippet_metadata_google.spanner.v1.json | 2 +- 8 files changed, 28 insertions(+), 7 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index b4ec2efce5..8be9b88803 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "3.51.0" + ".": "3.52.0" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d2eb31d6a..aef63c02e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,27 @@ [1]: https://pypi.org/project/google-cloud-spanner/#history +## [3.52.0](https://github.com/googleapis/python-spanner/compare/v3.51.0...v3.52.0) (2025-02-19) + + +### Features + +* Add additional opentelemetry span events for session pool ([a6811af](https://github.com/googleapis/python-spanner/commit/a6811afefa6739caa20203048635d94f9b85c4c8)) +* Add GCP standard otel attributes for python client ([#1308](https://github.com/googleapis/python-spanner/issues/1308)) ([0839f98](https://github.com/googleapis/python-spanner/commit/0839f982a3e7f5142825d10c440005a39cdb39cb)) +* Add updated span events + trace more methods ([#1259](https://github.com/googleapis/python-spanner/issues/1259)) ([ad69c48](https://github.com/googleapis/python-spanner/commit/ad69c48f01b09cbc5270b9cefde23715d5ac54b6)) +* MetricsTracer implementation ([#1291](https://github.com/googleapis/python-spanner/issues/1291)) ([8fbde6b](https://github.com/googleapis/python-spanner/commit/8fbde6b84d11db12ee4d536f0d5b8064619bdaa9)) +* Support GRAPH and pipe syntax in dbapi ([#1285](https://github.com/googleapis/python-spanner/issues/1285)) ([959bb9c](https://github.com/googleapis/python-spanner/commit/959bb9cda953eead89ffc271cb2a472e7139f81c)) +* Support transaction and request tags in dbapi ([#1262](https://github.com/googleapis/python-spanner/issues/1262)) ([ee9662f](https://github.com/googleapis/python-spanner/commit/ee9662f57dbb730afb08b9b9829e4e19bda5e69a)) +* **x-goog-spanner-request-id:** Introduce AtomicCounter ([#1275](https://github.com/googleapis/python-spanner/issues/1275)) ([f2483e1](https://github.com/googleapis/python-spanner/commit/f2483e11ba94f8bd1e142d1a85347d90104d1a19)) + + +### Bug Fixes + +* Retry UNAVAILABLE errors for streaming RPCs ([#1278](https://github.com/googleapis/python-spanner/issues/1278)) ([ab31078](https://github.com/googleapis/python-spanner/commit/ab310786baf09033a28c76e843b654e98a21613d)), closes [#1150](https://github.com/googleapis/python-spanner/issues/1150) +* **tracing:** Ensure nesting of Transaction.begin under commit + fix suggestions from feature review ([#1287](https://github.com/googleapis/python-spanner/issues/1287)) ([d9ee75a](https://github.com/googleapis/python-spanner/commit/d9ee75ac9ecfbf37a95c95a56295bdd79da3006d)) +* **tracing:** Only set span.status=OK if UNSET ([#1248](https://github.com/googleapis/python-spanner/issues/1248)) ([1d393fe](https://github.com/googleapis/python-spanner/commit/1d393fedf3be8b36c91d0f52a5f23cfa5c05f835)), closes [#1246](https://github.com/googleapis/python-spanner/issues/1246) +* Update retry strategy for mutation calls to handle aborted transactions ([#1279](https://github.com/googleapis/python-spanner/issues/1279)) ([0887eb4](https://github.com/googleapis/python-spanner/commit/0887eb43b6ea8bd9076ca81977d1446011335853)) + ## [3.51.0](https://github.com/googleapis/python-spanner/compare/v3.50.1...v3.51.0) (2024-12-05) diff --git a/google/cloud/spanner_admin_database_v1/gapic_version.py b/google/cloud/spanner_admin_database_v1/gapic_version.py index 99e11c0cb5..5ea820ffea 100644 --- a/google/cloud/spanner_admin_database_v1/gapic_version.py +++ b/google/cloud/spanner_admin_database_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "3.51.0" # {x-release-please-version} +__version__ = "3.52.0" # {x-release-please-version} diff --git a/google/cloud/spanner_admin_instance_v1/gapic_version.py b/google/cloud/spanner_admin_instance_v1/gapic_version.py index 99e11c0cb5..5ea820ffea 100644 --- a/google/cloud/spanner_admin_instance_v1/gapic_version.py +++ b/google/cloud/spanner_admin_instance_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "3.51.0" # {x-release-please-version} +__version__ = "3.52.0" # {x-release-please-version} diff --git a/google/cloud/spanner_v1/gapic_version.py b/google/cloud/spanner_v1/gapic_version.py index 99e11c0cb5..5ea820ffea 100644 --- a/google/cloud/spanner_v1/gapic_version.py +++ b/google/cloud/spanner_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "3.51.0" # {x-release-please-version} +__version__ = "3.52.0" # {x-release-please-version} diff --git a/samples/generated_samples/snippet_metadata_google.spanner.admin.database.v1.json b/samples/generated_samples/snippet_metadata_google.spanner.admin.database.v1.json index 7c35814b17..aef1015b66 100644 --- a/samples/generated_samples/snippet_metadata_google.spanner.admin.database.v1.json +++ b/samples/generated_samples/snippet_metadata_google.spanner.admin.database.v1.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-spanner-admin-database", - "version": "3.51.0" + "version": "3.52.0" }, "snippets": [ { diff --git a/samples/generated_samples/snippet_metadata_google.spanner.admin.instance.v1.json b/samples/generated_samples/snippet_metadata_google.spanner.admin.instance.v1.json index 261a7d44f3..6d216a11b2 100644 --- a/samples/generated_samples/snippet_metadata_google.spanner.admin.instance.v1.json +++ b/samples/generated_samples/snippet_metadata_google.spanner.admin.instance.v1.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-spanner-admin-instance", - "version": "3.51.0" + "version": "3.52.0" }, "snippets": [ { diff --git a/samples/generated_samples/snippet_metadata_google.spanner.v1.json b/samples/generated_samples/snippet_metadata_google.spanner.v1.json index ddb4419273..09626918ec 100644 --- a/samples/generated_samples/snippet_metadata_google.spanner.v1.json +++ b/samples/generated_samples/snippet_metadata_google.spanner.v1.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-spanner", - "version": "3.51.0" + "version": "3.52.0" }, "snippets": [ { pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy