googleapis · kiraksi · Mar 4, 2024 · Mar 4, 2024 · Mar 5, 2024 · Mar 6, 2024
@@ -0,0 +1,203 @@
+import sys
+from google.cloud.bigquery.client import *
+from google.cloud.bigquery.client import (
+    _add_server_timeout_header,
+    _extract_job_reference,
+)
+from google.cloud.bigquery.opentelemetry_tracing import async_create_span
+from google.cloud.bigquery import _job_helpers
+from google.cloud.bigquery.table import *
+from google.cloud.bigquery.table import _table_arg_to_table_ref
+from google.api_core.page_iterator import HTTPIterator
+from google.cloud.bigquery.query import _QueryResults
+from google.cloud.bigquery.retry import (
+    DEFAULT_ASYNC_JOB_RETRY,
+    DEFAULT_ASYNC_RETRY,
+    DEFAULT_TIMEOUT,
+)
+from google.api_core import retry_async as retries
+
+if sys.version_info >= (3, 9):
+    import asyncio
+    import aiohttp
+    from google.auth.transport import _aiohttp_requests
+
+# This code is experimental
+
+_MIN_GET_QUERY_RESULTS_TIMEOUT = 120
+
+
+class AsyncClient:
+    def __init__(self, *args, **kwargs):
+        self._client = Client(*args, **kwargs)
+
+    async def get_job(
+        self,
+        job_id: Union[str, job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob],
+        project: Optional[str] = None,
+        location: Optional[str] = None,
+        retry: retries.AsyncRetry = DEFAULT_ASYNC_RETRY,
+        timeout: TimeoutType = DEFAULT_TIMEOUT,
+    ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]:
+        extra_params = {"projection": "full"}
+
+        project, location, job_id = _extract_job_reference(
+            job_id, project=project, location=location
+        )
+
+        if project is None:
+            project = self._client.project
+
+        if location is None:
+            location = self._client.location
+
+        if location is not None:
+            extra_params["location"] = location
+
+        path = "/projects/{}/jobs/{}".format(project, job_id)
+
+        span_attributes = {"path": path, "job_id": job_id, "location": location}
+
+        resource = await self._call_api(
+            retry,
+            span_name="BigQuery.getJob",
+            span_attributes=span_attributes,
+            method="GET",
+            path=path,
+            query_params=extra_params,
+            timeout=timeout,
+        )
+
+        return self._client.job_from_resource(await resource)
+
+    async def _get_query_results(  # make async
+        self,
+        job_id: str,
+        retry: retries.AsyncRetry,
+        project: Optional[str] = None,
+        timeout_ms: Optional[int] = None,
+        location: Optional[str] = None,
+        timeout: TimeoutType = DEFAULT_TIMEOUT,
+    ) -> _QueryResults:
+        extra_params: Dict[str, Any] = {"maxResults": 0}
+
+        if timeout is not None:
+            if not isinstance(timeout, (int, float)):
+                timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT
+            else:
+                timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT)
+
+        if project is None:
+            project = self._client.project
+
+        if timeout_ms is not None:
+            extra_params["timeoutMs"] = timeout_ms
+
+        if location is None:
+            location = self._client.location
+
+        if location is not None:
+            extra_params["location"] = location
+
+        path = "/projects/{}/queries/{}".format(project, job_id)
+
+        # This call is typically made in a polling loop that checks whether the
+        # job is complete (from QueryJob.done(), called ultimately from
+        # QueryJob.result()). So we don't need to poll here.
+        span_attributes = {"path": path}
+        resource = await self._call_api(
+            retry,
+            span_name="BigQuery.getQueryResults",
+            span_attributes=span_attributes,
+            method="GET",
+            path=path,
+            query_params=extra_params,
+            timeout=timeout,
+        )
+        return _QueryResults.from_api_repr(resource)
+
+    async def get_table(  # make async
+        self,
+        table: Union[Table, TableReference, TableListItem, str],
+        retry: retries.AsyncRetry = DEFAULT_ASYNC_RETRY,
+        timeout: TimeoutType = DEFAULT_TIMEOUT,
+    ) -> Table:
+        table_ref = _table_arg_to_table_ref(table, default_project=self._client.project)
+        path = table_ref.path
+        span_attributes = {"path": path}
+        api_response = await self._call_api(
+            retry,
+            span_name="BigQuery.getTable",
+            span_attributes=span_attributes,
+            method="GET",
+            path=path,
+            timeout=timeout,
+        )
+
+        return Table.from_api_repr(api_response)
+
+    async def list_partitions(  # make async
+        self,
+        table: Union[Table, TableReference, TableListItem, str],
+        retry: retries.AsyncRetry = DEFAULT_ASYNC_RETRY,
+        timeout: TimeoutType = DEFAULT_TIMEOUT,
+    ) -> Sequence[str]:
+        table = _table_arg_to_table_ref(table, default_project=self._client.project)
+        meta_table = await self.get_table(
+            TableReference(
+                DatasetReference(table.project, table.dataset_id),
+                "%s$__PARTITIONS_SUMMARY__" % table.table_id,
+            ),
+            retry=retry,
+            timeout=timeout,
+        )
+
+        subset = [col for col in meta_table.schema if col.name == "partition_id"]
+        return [
+            row[0]
+            for row in self._client.list_rows(
+                meta_table, selected_fields=subset, retry=retry, timeout=timeout
+            )
+        ]
+
+    async def _call_api(
+        self,
+        retry: Optional[retries.AsyncRetry] = None,
+        span_name: Optional[str] = None,
+        span_attributes: Optional[Dict] = None,
+        job_ref=None,
+        headers: Optional[Dict[str, str]] = None,
+        **kwargs,
+    ):
+
+        kwargs = _add_server_timeout_header(headers, kwargs)
+
+        # CREATE THIN WRAPPER OVER _AIOHTTP_REQUESTS (wip)
+
+        DEFAULT_API_ENDPOINT = "https://bigquery.googleapis.com"
+
+        kwargs['url'] = DEFAULT_API_ENDPOINT + kwargs.pop('path')
+
+        if kwargs.get('query_params'):
+            kwargs['params'] = kwargs.pop('query_params')
+
+        async with _aiohttp_requests.AuthorizedSession(self._client._credentials) as authed_session:
+            response = await authed_session.request(
+                **kwargs
+            )
+
+
+        if retry:
+            response = retry(response)
+
+        if span_name is not None:
+            async with async_create_span(
+                name=span_name,
+                attributes=span_attributes,
+                client=self._client,
+                job_ref=job_ref,
+            ):
+                return response()  # Await the asynchronous call
+
+        return response()  # Await the asynchronous call
+
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import logging
-from contextlib import contextmanager
+from contextlib import contextmanager, asynccontextmanager
 from google.api_core.exceptions import GoogleAPICallError  # type: ignore
 
 logger = logging.getLogger(__name__)
@@ -86,6 +86,37 @@ def create_span(name, attributes=None, client=None, job_ref=None):
             raise
 
 
+@asynccontextmanager
+async def async_create_span(name, attributes=None, client=None, job_ref=None):
+    """Asynchronous context manager for creating and exporting OpenTelemetry spans."""
+    global _warned_telemetry
+    final_attributes = _get_final_span_attributes(attributes, client, job_ref)
+
+    if not HAS_OPENTELEMETRY:
+        if not _warned_telemetry:
+            logger.debug(
+                "This service is instrumented using OpenTelemetry. "
+                "OpenTelemetry or one of its components could not be imported; "
+                "please add compatible versions of opentelemetry-api and "
+                "opentelemetry-instrumentation packages in order to get BigQuery "
+                "Tracing data."
+            )
+            _warned_telemetry = True
+        yield None
+        return
+    tracer = trace.get_tracer(__name__)
+
+    async with tracer.start_as_current_span(
+        name=name, attributes=final_attributes
+    ) as span:
+        try:
+            yield span
+        except GoogleAPICallError as error:
+            if error.code is not None:
+                span.set_status(Status(http_status_to_status_code(error.code)))
+            raise
+
+
 def _get_final_span_attributes(attributes=None, client=None, job_ref=None):
     """Compiles attributes from: client, job_ref, user-provided attributes.
 

@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from google.api_core import exceptions
-from google.api_core import retry
+from google.api_core import retry, retry_async
 from google.auth import exceptions as auth_exceptions  # type: ignore
 import requests.exceptions
 
@@ -90,3 +90,15 @@ def _job_should_retry(exc):
 """
 The default job retry object.
 """
+
+DEFAULT_ASYNC_RETRY = retry_async.AsyncRetry(
+    predicate=_should_retry, deadline=_DEFAULT_RETRY_DEADLINE
+)  # deadline is deprecated
+
+DEFAULT_ASYNC_JOB_RETRY = retry_async.AsyncRetry(
+    predicate=_job_should_retry,
+    deadline=_DEFAULT_JOB_DEADLINE,  # deadline is deprecated
+)
+# additional predicate cases for async modes?
+# timeout?
+# how is that expressed?, maximum retry based?
@@ -79,9 +79,10 @@ def default(session, install_extras=True):
         "-c",
         constraints_path,
     )
+    session.install("asyncmock", "pytest-asyncio")
 
-    if install_extras and session.python in ["3.11", "3.12"]:
-        install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]"
+    if install_extras and session.python in ["3.12"]:
+        install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry,aiohttp]"
     elif install_extras:
         install_target = ".[all]"
     else:
@@ -104,6 +105,9 @@ def default(session, install_extras=True):
         *session.posargs,
     )
 
+    # Having positional arguments means the user wants to run specific tests.
+    # Best not to add additional tests to that list.
+
 
 @nox.session(python=UNIT_TEST_PYTHON_VERSIONS)
 def unit(session):
@@ -188,8 +192,8 @@ def system(session):
     # Data Catalog needed for the column ACL test with a real Policy Tag.
     session.install("google-cloud-datacatalog", "-c", constraints_path)
 
-    if session.python in ["3.11", "3.12"]:
-        extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]"
+    if session.python in ["3.12"]:
+        extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry,aiohttp]" # look at geopandas to see if it supports 3.11/3.12 (up to 3.11)
     else:
         extras = "[all]"
     session.install("-e", f".{extras}", "-c", constraints_path)
@@ -254,8 +258,8 @@ def snippets(session):
     session.install("google-cloud-storage", "-c", constraints_path)
     session.install("grpcio", "-c", constraints_path)
 
-    if session.python in ["3.11", "3.12"]:
-        extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]"
+    if session.python in ["3.12"]:
+        extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry,aiohttp]"
     else:
         extras = "[all]"
     session.install("-e", f".{extras}", "-c", constraints_path)

@@ -84,6 +84,9 @@
         "proto-plus >= 1.15.0, <2.0.0dev",
         "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5",  # For the legacy proto-based types.
     ],
+    "aiohttp": [
+        "google-auth[aiohttp]",
+    ],
 }
 
 all_extras = []

@@ -4,5 +4,6 @@
 #
 # NOTE: Not comprehensive yet, will eventually be maintained semi-automatically by
 # the renovate bot.
+aiohttp==3.6.2
 grpcio==1.47.0
 pyarrow>=4.0.0