1
1
from google .cloud .bigquery .client import *
2
+ from google .cloud .bigquery .client import (
3
+ _add_server_timeout_header ,
4
+ _extract_job_reference ,
5
+ )
6
+ from google .cloud .bigquery .opentelemetry_tracing import async_create_span
2
7
from google .cloud .bigquery import _job_helpers
3
- from google .cloud .bigquery import table
8
+ from google .cloud .bigquery .table import *
9
+ from google .api_core .page_iterator import HTTPIterator
4
10
from google .cloud .bigquery .retry import (
5
11
DEFAULT_ASYNC_JOB_RETRY ,
6
12
DEFAULT_ASYNC_RETRY ,
7
13
DEFAULT_TIMEOUT ,
8
14
)
9
15
from google .api_core import retry_async as retries
10
16
import asyncio
17
+ from google .auth .transport import _aiohttp_requests
18
+
19
+ # This code is experimental
11
20
12
21
13
22
class AsyncClient :
14
23
def __init__ (self , * args , ** kwargs ):
15
24
self ._client = Client (* args , ** kwargs )
16
25
26
+ async def get_job (
27
+ self ,
28
+ job_id : Union [str , job .LoadJob , job .CopyJob , job .ExtractJob , job .QueryJob ],
29
+ project : Optional [str ] = None ,
30
+ location : Optional [str ] = None ,
31
+ retry : retries .AsyncRetry = DEFAULT_ASYNC_RETRY ,
32
+ timeout : TimeoutType = DEFAULT_TIMEOUT ,
33
+ ) -> Union [job .LoadJob , job .CopyJob , job .ExtractJob , job .QueryJob , job .UnknownJob ]:
34
+ extra_params = {"projection" : "full" }
35
+
36
+ project , location , job_id = _extract_job_reference (
37
+ job_id , project = project , location = location
38
+ )
39
+
40
+ if project is None :
41
+ project = self ._client .project
42
+
43
+ if location is None :
44
+ location = self ._client .location
45
+
46
+ if location is not None :
47
+ extra_params ["location" ] = location
48
+
49
+ path = "/projects/{}/jobs/{}" .format (project , job_id )
50
+
51
+ span_attributes = {"path" : path , "job_id" : job_id , "location" : location }
52
+
53
+ resource = await self ._call_api (
54
+ retry ,
55
+ span_name = "BigQuery.getJob" ,
56
+ span_attributes = span_attributes ,
57
+ method = "GET" ,
58
+ path = path ,
59
+ query_params = extra_params ,
60
+ timeout = timeout ,
61
+ )
62
+
63
+ return await asyncio .to_thread (self ._client .job_from_resource (await resource ))
64
+
17
65
async def query_and_wait (
18
66
self ,
19
67
query ,
@@ -46,7 +94,7 @@ async def query_and_wait(
46
94
)
47
95
48
96
return await async_query_and_wait (
49
- self . _client ,
97
+ self ,
50
98
query ,
51
99
job_config = job_config ,
52
100
location = location ,
@@ -59,9 +107,41 @@ async def query_and_wait(
59
107
max_results = max_results ,
60
108
)
61
109
110
+ async def _call_api (
111
+ self ,
112
+ retry : Optional [retries .AsyncRetry ] = None ,
113
+ span_name : Optional [str ] = None ,
114
+ span_attributes : Optional [Dict ] = None ,
115
+ job_ref = None ,
116
+ headers : Optional [Dict [str , str ]] = None ,
117
+ ** kwargs ,
118
+ ):
119
+ kwargs = _add_server_timeout_header (headers , kwargs )
120
+
121
+ # Prepare the asynchronous request function
122
+ # async with _aiohttp_requests.Request(**kwargs) as response:
123
+ # response.raise_for_status()
124
+ # response = await response.json() # or response.text()
125
+
126
+ async_call = functools .partial (self ._client ._connection .api_request , ** kwargs )
127
+
128
+ if retry :
129
+ async_call = retry (async_call )
130
+
131
+ if span_name is not None :
132
+ async with async_create_span (
133
+ name = span_name ,
134
+ attributes = span_attributes ,
135
+ client = self ._client ,
136
+ job_ref = job_ref ,
137
+ ):
138
+ return async_call () # Await the asynchronous call
139
+
140
+ return async_call () # Await the asynchronous call
141
+
62
142
63
143
async def async_query_and_wait (
64
- client : "Client " ,
144
+ client : "AsyncClient " ,
65
145
query : str ,
66
146
* ,
67
147
job_config : Optional [job .QueryJobConfig ],
@@ -73,14 +153,12 @@ async def async_query_and_wait(
73
153
job_retry : Optional [retries .AsyncRetry ],
74
154
page_size : Optional [int ] = None ,
75
155
max_results : Optional [int ] = None ,
76
- ) -> table .RowIterator :
77
- # Some API parameters aren't supported by the jobs.query API. In these
78
- # cases, fallback to a jobs.insert call.
156
+ ) -> RowIterator :
79
157
if not _job_helpers ._supported_by_jobs_query (job_config ):
80
158
return await async_wait_or_cancel (
81
159
asyncio .to_thread (
82
160
_job_helpers .query_jobs_insert (
83
- client = client ,
161
+ client = client . _client ,
84
162
query = query ,
85
163
job_id = None ,
86
164
job_id_prefix = None ,
@@ -116,7 +194,7 @@ async def async_query_and_wait(
116
194
span_attributes = {"path" : path }
117
195
118
196
if retry is not None :
119
- response = client ._call_api ( # ASYNCHRONOUS HTTP CALLS aiohttp (optional of google-auth), add back retry()
197
+ response = await client ._call_api ( # ASYNCHRONOUS HTTP CALLS aiohttp (optional of google-auth), add back retry()
120
198
retry = None , # We're calling the retry decorator ourselves, async_retries, need to implement after making HTTP calls async
121
199
span_name = "BigQuery.query" ,
122
200
span_attributes = span_attributes ,
@@ -127,7 +205,7 @@ async def async_query_and_wait(
127
205
)
128
206
129
207
else :
130
- response = client ._call_api (
208
+ response = await client ._call_api (
131
209
retry = None ,
132
210
span_name = "BigQuery.query" ,
133
211
span_attributes = span_attributes ,
@@ -149,17 +227,28 @@ async def async_query_and_wait(
149
227
# client._list_rows_from_query_results directly. Need to update
150
228
# RowIterator to fetch destination table via the job ID if needed.
151
229
result = await async_wait_or_cancel (
152
- _job_helpers ._to_query_job (client , query , job_config , response ),
153
- api_timeout = api_timeout ,
154
- wait_timeout = wait_timeout ,
155
- retry = retry ,
156
- page_size = page_size ,
157
- max_results = max_results ,
230
+ asyncio .to_thread (
231
+ _job_helpers ._to_query_job (client ._client , query , job_config , response ),
232
+ api_timeout = api_timeout ,
233
+ wait_timeout = wait_timeout ,
234
+ retry = retry ,
235
+ page_size = page_size ,
236
+ max_results = max_results ,
237
+ )
238
+ )
239
+
240
+ def api_request (* args , ** kwargs ):
241
+ return client ._call_api (
242
+ span_name = "BigQuery.query" ,
243
+ span_attributes = span_attributes ,
244
+ * args ,
245
+ timeout = api_timeout ,
246
+ ** kwargs ,
158
247
)
159
248
160
- result = table . RowIterator ( # async of RowIterator? async version without all the pandas stuff
161
- client = client ,
162
- api_request = functools . partial ( client . _call_api , retry , timeout = api_timeout ) ,
249
+ result = AsyncRowIterator ( # async of RowIterator? async version without all the pandas stuff
250
+ client = client . _client ,
251
+ api_request = api_request ,
163
252
path = None ,
164
253
schema = query_results .schema ,
165
254
max_results = max_results ,
@@ -186,10 +275,10 @@ async def async_wait_or_cancel(
186
275
retry : Optional [retries .AsyncRetry ],
187
276
page_size : Optional [int ],
188
277
max_results : Optional [int ],
189
- ) -> table . RowIterator :
278
+ ) -> RowIterator :
190
279
try :
191
280
return asyncio .to_thread (
192
- job .result ( # run in a background thread
281
+ job .result (
193
282
page_size = page_size ,
194
283
max_results = max_results ,
195
284
retry = retry ,
@@ -204,3 +293,29 @@ async def async_wait_or_cancel(
204
293
# Don't eat the original exception if cancel fails.
205
294
pass
206
295
raise
296
+
297
+
298
+ class AsyncRowIterator (RowIterator ):
299
+ async def _get_next_page_response (self ):
300
+ """Asynchronous version of fetching the next response page."""
301
+ if self ._first_page_response :
302
+ rows = self ._first_page_response .get (self ._items_key , [])[
303
+ : self .max_results
304
+ ]
305
+ response = {
306
+ self ._items_key : rows ,
307
+ }
308
+ if self ._next_token in self ._first_page_response :
309
+ response [self ._next_token ] = self ._first_page_response [self ._next_token ]
310
+
311
+ self ._first_page_response = None
312
+ return response
313
+
314
+ params = self ._get_query_params ()
315
+ if self ._page_size is not None :
316
+ if self .page_number and "startIndex" in params :
317
+ del params ["startIndex" ]
318
+ params ["maxResults" ] = self ._page_size
319
+ return await self .api_request (
320
+ method = self ._HTTP_METHOD , path = self .path , query_params = params
321
+ )
0 commit comments