From aa6b9ebcdae0f7cbb08dc743d312e110d9701e23 Mon Sep 17 00:00:00 2001 From: Brian Hulette Date: Thu, 1 May 2025 12:54:23 -0700 Subject: [PATCH 1/2] Add ability to set autodetect_schema query_param --- google/cloud/bigquery/client.py | 11 ++++++++ tests/system/test_client.py | 48 +++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index e7cafc47e..7d6ba03a4 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1389,6 +1389,7 @@ def update_table( self, table: Table, fields: Sequence[str], + autodetect_schema: bool = False, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Table: @@ -1419,6 +1420,10 @@ def update_table( fields (Sequence[str]): The fields of ``table`` to change, spelled as the :class:`~google.cloud.bigquery.table.Table` properties. + autodetect_schema (bool): + Specifies if the schema of the table should be autodetected when + updating the table from the underlying source. Only applicable + for external tables. retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1438,12 +1443,18 @@ def update_table( path = table.path span_attributes = {"path": path, "fields": fields} + if autodetect_schema: + query_params = {"autodetect_schema": True} + else: + query_params = None + api_response = self._call_api( retry, span_name="BigQuery.updateTable", span_attributes=span_attributes, method="PATCH", path=path, + query_params=query_params, data=partial, headers=headers, timeout=timeout, diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 9df572b14..196b80092 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -978,6 +978,54 @@ def test_update_table_constraints(self): ) self.assertIsNone(reference_table3.table_constraints, None) + def test_update_table_autodetect_schema(self): + dataset = self.temp_dataset(_make_dataset_id("bq_update_table_test")) + + # Create an external table, restrict schema to one field + TABLE_NAME = "test_table" + set_schema = [bigquery.SchemaField("username", "STRING", mode="NULLABLE")] + table_arg = Table(dataset.table(TABLE_NAME)) + external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO) + external_config.source_uris = SOURCE_URIS_AVRO + external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO + external_config.schema = set_schema + table_arg.external_data_configuration = external_config + + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + self.assertTrue(_table_exists(table)) + + self.assertEqual(table.schema, set_schema) + + # Update table with schema autodetection + updated_table_arg = Table(dataset.table(TABLE_NAME)) + updated_external_config = bigquery.ExternalConfig( + bigquery.ExternalSourceFormat.AVRO + ) + updated_external_config.source_uris = SOURCE_URIS_AVRO + updated_external_config.reference_file_schema_uri = ( + REFERENCE_FILE_SCHEMA_URI_AVRO + ) + updated_external_config.autodetect = True + updated_external_config.schema = None + updated_table_arg.external_data_configuration = updated_external_config + + updated_table = Config.CLIENT.update_table( + updated_table_arg, ["external_data_configuration"], autodetect_schema=True + ) + + # The updated table shlould have a schema inferred from the reference + # file, which has all four fields. + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + self.assertEqual(updated_table.schema, expected_schema) + @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) From 85c7b9f6cb1105ddffd084a88975235e81f722dc Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 2 May 2025 18:39:49 +0000 Subject: [PATCH 2/2] suggested updates: reduce duplicate code, change query_params --- google/cloud/bigquery/client.py | 5 ++--- tests/system/test_client.py | 14 ++++++-------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 7d6ba03a4..5d91a4263 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1443,10 +1443,9 @@ def update_table( path = table.path span_attributes = {"path": path, "fields": fields} + query_params = {} if autodetect_schema: - query_params = {"autodetect_schema": True} - else: - query_params = None + query_params["autodetect_schema"] = True api_response = self._call_api( retry, diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 196b80092..ea8cc4046 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -985,6 +985,8 @@ def test_update_table_autodetect_schema(self): TABLE_NAME = "test_table" set_schema = [bigquery.SchemaField("username", "STRING", mode="NULLABLE")] table_arg = Table(dataset.table(TABLE_NAME)) + + # Create an external_config and include it in the table arguments external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO) external_config.source_uris = SOURCE_URIS_AVRO external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO @@ -1001,13 +1003,9 @@ def test_update_table_autodetect_schema(self): # Update table with schema autodetection updated_table_arg = Table(dataset.table(TABLE_NAME)) - updated_external_config = bigquery.ExternalConfig( - bigquery.ExternalSourceFormat.AVRO - ) - updated_external_config.source_uris = SOURCE_URIS_AVRO - updated_external_config.reference_file_schema_uri = ( - REFERENCE_FILE_SCHEMA_URI_AVRO - ) + + # Update the external_config and include it in the updated table arguments + updated_external_config = copy.deepcopy(external_config) updated_external_config.autodetect = True updated_external_config.schema = None updated_table_arg.external_data_configuration = updated_external_config @@ -1016,7 +1014,7 @@ def test_update_table_autodetect_schema(self): updated_table_arg, ["external_data_configuration"], autodetect_schema=True ) - # The updated table shlould have a schema inferred from the reference + # The updated table should have a schema inferred from the reference # file, which has all four fields. expected_schema = [ bigquery.SchemaField("username", "STRING", mode="NULLABLE"), pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy