Skip to content

feat: AsyncClient project get_job, get_query_results, get_table, list_partitions #1853

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 203 additions & 0 deletions google/cloud/bigquery/async_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
import sys
from google.cloud.bigquery.client import *
from google.cloud.bigquery.client import (
_add_server_timeout_header,
_extract_job_reference,
)
from google.cloud.bigquery.opentelemetry_tracing import async_create_span
from google.cloud.bigquery import _job_helpers
from google.cloud.bigquery.table import *
from google.cloud.bigquery.table import _table_arg_to_table_ref
from google.api_core.page_iterator import HTTPIterator
from google.cloud.bigquery.query import _QueryResults
from google.cloud.bigquery.retry import (
DEFAULT_ASYNC_JOB_RETRY,
DEFAULT_ASYNC_RETRY,
DEFAULT_TIMEOUT,
)
from google.api_core import retry_async as retries

if sys.version_info >= (3, 9):
import asyncio
import aiohttp
from google.auth.transport import _aiohttp_requests

# This code is experimental

_MIN_GET_QUERY_RESULTS_TIMEOUT = 120


class AsyncClient:
def __init__(self, *args, **kwargs):
self._client = Client(*args, **kwargs)

async def get_job(
self,
job_id: Union[str, job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob],
project: Optional[str] = None,
location: Optional[str] = None,
retry: retries.AsyncRetry = DEFAULT_ASYNC_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]:
extra_params = {"projection": "full"}

project, location, job_id = _extract_job_reference(
job_id, project=project, location=location
)

if project is None:
project = self._client.project

if location is None:
location = self._client.location

if location is not None:
extra_params["location"] = location

path = "/projects/{}/jobs/{}".format(project, job_id)

span_attributes = {"path": path, "job_id": job_id, "location": location}

resource = await self._call_api(
retry,
span_name="BigQuery.getJob",
span_attributes=span_attributes,
method="GET",
path=path,
query_params=extra_params,
timeout=timeout,
)

return self._client.job_from_resource(await resource)

async def _get_query_results( # make async
self,
job_id: str,
retry: retries.AsyncRetry,
project: Optional[str] = None,
timeout_ms: Optional[int] = None,
location: Optional[str] = None,
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> _QueryResults:
extra_params: Dict[str, Any] = {"maxResults": 0}

if timeout is not None:
if not isinstance(timeout, (int, float)):
timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT
else:
timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT)

if project is None:
project = self._client.project

if timeout_ms is not None:
extra_params["timeoutMs"] = timeout_ms

if location is None:
location = self._client.location

if location is not None:
extra_params["location"] = location

path = "/projects/{}/queries/{}".format(project, job_id)

# This call is typically made in a polling loop that checks whether the
# job is complete (from QueryJob.done(), called ultimately from
# QueryJob.result()). So we don't need to poll here.
span_attributes = {"path": path}
resource = await self._call_api(
retry,
span_name="BigQuery.getQueryResults",
span_attributes=span_attributes,
method="GET",
path=path,
query_params=extra_params,
timeout=timeout,
)
return _QueryResults.from_api_repr(resource)

async def get_table( # make async
self,
table: Union[Table, TableReference, TableListItem, str],
retry: retries.AsyncRetry = DEFAULT_ASYNC_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> Table:
table_ref = _table_arg_to_table_ref(table, default_project=self._client.project)
path = table_ref.path
span_attributes = {"path": path}
api_response = await self._call_api(
retry,
span_name="BigQuery.getTable",
span_attributes=span_attributes,
method="GET",
path=path,
timeout=timeout,
)

return Table.from_api_repr(api_response)

async def list_partitions( # make async
self,
table: Union[Table, TableReference, TableListItem, str],
retry: retries.AsyncRetry = DEFAULT_ASYNC_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> Sequence[str]:
table = _table_arg_to_table_ref(table, default_project=self._client.project)
meta_table = await self.get_table(
TableReference(
DatasetReference(table.project, table.dataset_id),
"%s$__PARTITIONS_SUMMARY__" % table.table_id,
),
retry=retry,
timeout=timeout,
)

subset = [col for col in meta_table.schema if col.name == "partition_id"]
return [
row[0]
for row in self._client.list_rows(
meta_table, selected_fields=subset, retry=retry, timeout=timeout
)
]

async def _call_api(
self,
retry: Optional[retries.AsyncRetry] = None,
span_name: Optional[str] = None,
span_attributes: Optional[Dict] = None,
job_ref=None,
headers: Optional[Dict[str, str]] = None,
**kwargs,
):

kwargs = _add_server_timeout_header(headers, kwargs)

# CREATE THIN WRAPPER OVER _AIOHTTP_REQUESTS (wip)

DEFAULT_API_ENDPOINT = "https://bigquery.googleapis.com"

kwargs['url'] = DEFAULT_API_ENDPOINT + kwargs.pop('path')

if kwargs.get('query_params'):
kwargs['params'] = kwargs.pop('query_params')

async with _aiohttp_requests.AuthorizedSession(self._client._credentials) as authed_session:
response = await authed_session.request(
**kwargs
)


if retry:
response = retry(response)

if span_name is not None:
async with async_create_span(
name=span_name,
attributes=span_attributes,
client=self._client,
job_ref=job_ref,
):
return response() # Await the asynchronous call

return response() # Await the asynchronous call

33 changes: 32 additions & 1 deletion google/cloud/bigquery/opentelemetry_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

import logging
from contextlib import contextmanager
from contextlib import contextmanager, asynccontextmanager
from google.api_core.exceptions import GoogleAPICallError # type: ignore

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -86,6 +86,37 @@ def create_span(name, attributes=None, client=None, job_ref=None):
raise


@asynccontextmanager
async def async_create_span(name, attributes=None, client=None, job_ref=None):
"""Asynchronous context manager for creating and exporting OpenTelemetry spans."""
global _warned_telemetry
final_attributes = _get_final_span_attributes(attributes, client, job_ref)

if not HAS_OPENTELEMETRY:
if not _warned_telemetry:
logger.debug(
"This service is instrumented using OpenTelemetry. "
"OpenTelemetry or one of its components could not be imported; "
"please add compatible versions of opentelemetry-api and "
"opentelemetry-instrumentation packages in order to get BigQuery "
"Tracing data."
)
_warned_telemetry = True
yield None
return
tracer = trace.get_tracer(__name__)

async with tracer.start_as_current_span(
name=name, attributes=final_attributes
) as span:
try:
yield span
except GoogleAPICallError as error:
if error.code is not None:
span.set_status(Status(http_status_to_status_code(error.code)))
raise


def _get_final_span_attributes(attributes=None, client=None, job_ref=None):
"""Compiles attributes from: client, job_ref, user-provided attributes.

Expand Down
14 changes: 13 additions & 1 deletion google/cloud/bigquery/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

from google.api_core import exceptions
from google.api_core import retry
from google.api_core import retry, retry_async
from google.auth import exceptions as auth_exceptions # type: ignore
import requests.exceptions

Expand Down Expand Up @@ -90,3 +90,15 @@ def _job_should_retry(exc):
"""
The default job retry object.
"""

DEFAULT_ASYNC_RETRY = retry_async.AsyncRetry(
predicate=_should_retry, deadline=_DEFAULT_RETRY_DEADLINE
) # deadline is deprecated

DEFAULT_ASYNC_JOB_RETRY = retry_async.AsyncRetry(
predicate=_job_should_retry,
deadline=_DEFAULT_JOB_DEADLINE, # deadline is deprecated
)
# additional predicate cases for async modes?
# timeout?
# how is that expressed?, maximum retry based?
16 changes: 10 additions & 6 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,10 @@ def default(session, install_extras=True):
"-c",
constraints_path,
)
session.install("asyncmock", "pytest-asyncio")

if install_extras and session.python in ["3.11", "3.12"]:
install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]"
if install_extras and session.python in ["3.12"]:
install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry,aiohttp]"
elif install_extras:
install_target = ".[all]"
else:
Expand All @@ -104,6 +105,9 @@ def default(session, install_extras=True):
*session.posargs,
)

# Having positional arguments means the user wants to run specific tests.
# Best not to add additional tests to that list.


@nox.session(python=UNIT_TEST_PYTHON_VERSIONS)
def unit(session):
Expand Down Expand Up @@ -188,8 +192,8 @@ def system(session):
# Data Catalog needed for the column ACL test with a real Policy Tag.
session.install("google-cloud-datacatalog", "-c", constraints_path)

if session.python in ["3.11", "3.12"]:
extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]"
if session.python in ["3.12"]:
extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry,aiohttp]" # look at geopandas to see if it supports 3.11/3.12 (up to 3.11)
else:
extras = "[all]"
session.install("-e", f".{extras}", "-c", constraints_path)
Expand Down Expand Up @@ -254,8 +258,8 @@ def snippets(session):
session.install("google-cloud-storage", "-c", constraints_path)
session.install("grpcio", "-c", constraints_path)

if session.python in ["3.11", "3.12"]:
extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]"
if session.python in ["3.12"]:
extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry,aiohttp]"
else:
extras = "[all]"
session.install("-e", f".{extras}", "-c", constraints_path)
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@
"proto-plus >= 1.15.0, <2.0.0dev",
"protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types.
],
"aiohttp": [
"google-auth[aiohttp]",
],
}

all_extras = []
Expand Down
1 change: 1 addition & 0 deletions testing/constraints-3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
#
# NOTE: Not comprehensive yet, will eventually be maintained semi-automatically by
# the renovate bot.
aiohttp==3.6.2
grpcio==1.47.0
pyarrow>=4.0.0
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy