Skip to content

Commit b2300d0

Browse files
authored
feat: adds time_zone to external config and load job (#2229)
This commit introduces new configuration options for BigQuery load jobs and external table definitions, aligning with recent updates to the underlying protos. New options added: `time_zone`: Time zone used when parsing timestamp values that do not have specific time zone information. (Applies to `LoadJobConfig`, `LoadJob`, and `ExternalConfig`) Changes include: Added corresponding properties (getters/setters) to `LoadJobConfig`, `LoadJob`, and `ExternalConfig`. Updated docstrings and type hints for all new attributes. Updated unit tests to cover the new options, ensuring they are correctly handled during object initialization, serialization to API representation, and deserialization from API responses.
1 parent 7ed9fd2 commit b2300d0

File tree

5 files changed

+204
-0
lines changed

5 files changed

+204
-0
lines changed

google/cloud/bigquery/external_config.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,23 @@ def schema(self, value):
848848
prop = {"fields": [field.to_api_repr() for field in value]}
849849
self._properties["schema"] = prop
850850

851+
@property
852+
def time_zone(self) -> Optional[str]:
853+
"""Optional[str]: Time zone used when parsing timestamp values that do not
854+
have specific time zone information (e.g. 2024-04-20 12:34:56). The expected
855+
format is an IANA timezone string (e.g. America/Los_Angeles).
856+
857+
See:
858+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone
859+
"""
860+
861+
result = self._properties.get("timeZone")
862+
return typing.cast(str, result)
863+
864+
@time_zone.setter
865+
def time_zone(self, value: Optional[str]):
866+
self._properties["timeZone"] = value
867+
851868
@property
852869
def connection_id(self):
853870
"""Optional[str]: [Experimental] ID of a BigQuery Connection API

google/cloud/bigquery/job/load.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,20 @@ def source_format(self):
548548
def source_format(self, value):
549549
self._set_sub_prop("sourceFormat", value)
550550

551+
@property
552+
def time_zone(self) -> Optional[str]:
553+
"""Optional[str]: Default time zone that will apply when parsing timestamp
554+
values that have no specific time zone.
555+
556+
See:
557+
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_zone
558+
"""
559+
return self._get_sub_prop("timeZone")
560+
561+
@time_zone.setter
562+
def time_zone(self, value: Optional[str]):
563+
self._set_sub_prop("timeZone", value)
564+
551565
@property
552566
def time_partitioning(self):
553567
"""Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based
@@ -889,6 +903,13 @@ def clustering_fields(self):
889903
"""
890904
return self.configuration.clustering_fields
891905

906+
@property
907+
def time_zone(self):
908+
"""See
909+
:attr:`google.cloud.bigquery.job.LoadJobConfig.time_zone`.
910+
"""
911+
return self.configuration.time_zone
912+
892913
@property
893914
def schema_update_options(self):
894915
"""See

tests/unit/job/test_load.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,14 @@ def _setUpConstants(self):
3838
self.OUTPUT_ROWS = 345
3939
self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference"
4040

41+
self.TIME_ZONE = "UTC"
42+
4143
def _make_resource(self, started=False, ended=False):
4244
resource = super(TestLoadJob, self)._make_resource(started, ended)
4345
config = resource["configuration"]["load"]
4446
config["sourceUris"] = [self.SOURCE1]
47+
48+
config["timeZone"] = self.TIME_ZONE
4549
config["destinationTable"] = {
4650
"projectId": self.PROJECT,
4751
"datasetId": self.DS_ID,
@@ -152,6 +156,10 @@ def _verifyResourceProperties(self, job, resource):
152156
)
153157
else:
154158
self.assertIsNone(job.destination_encryption_configuration)
159+
if "timeZone" in config:
160+
self.assertEqual(job.time_zone, config["timeZone"])
161+
else:
162+
self.assertIsNone(job.time_zone)
155163

156164
def test_ctor(self):
157165
client = _make_client(project=self.PROJECT)
@@ -195,6 +203,8 @@ def test_ctor(self):
195203
self.assertIsNone(job.schema_update_options)
196204
self.assertIsNone(job.reference_file_schema_uri)
197205

206+
self.assertIsNone(job.time_zone)
207+
198208
def test_ctor_w_config(self):
199209
from google.cloud.bigquery.schema import SchemaField
200210
from google.cloud.bigquery.job import LoadJobConfig
@@ -431,6 +441,24 @@ def test_from_api_repr_w_properties(self):
431441
self.assertIs(job._client, client)
432442
self._verifyResourceProperties(job, RESOURCE)
433443

444+
def test_to_api_repr(self):
445+
self._setUpConstants()
446+
client = _make_client(project=self.PROJECT)
447+
RESOURCE = self._make_resource(ended=False)
448+
449+
klass = self._get_target_class()
450+
job = klass.from_api_repr(RESOURCE, client)
451+
api_repr = job.to_api_repr()
452+
453+
# as per the documentation in load.py -> LoadJob.to_api_repr(),
454+
# the return value from to_api_repr should not include statistics
455+
expected = {
456+
"jobReference": RESOURCE["jobReference"],
457+
"configuration": RESOURCE["configuration"],
458+
}
459+
460+
self.assertEqual(api_repr, expected)
461+
434462
def test_begin_w_already_running(self):
435463
conn = make_connection()
436464
client = _make_client(project=self.PROJECT, connection=conn)
@@ -571,6 +599,7 @@ def test_begin_w_alternate_client(self):
571599
]
572600
},
573601
"schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION],
602+
"timeZone": self.TIME_ZONE,
574603
}
575604
RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION
576605
conn1 = make_connection()
@@ -599,6 +628,9 @@ def test_begin_w_alternate_client(self):
599628
config.write_disposition = WriteDisposition.WRITE_TRUNCATE
600629
config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION]
601630
config.reference_file_schema_uri = "gs://path/to/reference"
631+
632+
config.time_zone = self.TIME_ZONE
633+
602634
with mock.patch(
603635
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
604636
) as final_attributes:

tests/unit/job/test_load_config.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -828,6 +828,22 @@ def test_write_disposition_setter(self):
828828
config._properties["load"]["writeDisposition"], write_disposition
829829
)
830830

831+
def test_time_zone_missing(self):
832+
config = self._get_target_class()()
833+
self.assertIsNone(config.time_zone)
834+
835+
def test_time_zone_hit(self):
836+
time_zone = "UTC"
837+
config = self._get_target_class()()
838+
config._properties["load"]["timeZone"] = time_zone
839+
self.assertEqual(config.time_zone, time_zone)
840+
841+
def test_time_zone_setter(self):
842+
time_zone = "America/New_York"
843+
config = self._get_target_class()()
844+
config.time_zone = time_zone
845+
self.assertEqual(config._properties["load"]["timeZone"], time_zone)
846+
831847
def test_parquet_options_missing(self):
832848
config = self._get_target_class()()
833849
self.assertIsNone(config.parquet_options)
@@ -901,3 +917,114 @@ def test_column_name_character_map_none(self):
901917
config._properties["load"]["columnNameCharacterMap"],
902918
ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
903919
)
920+
921+
RESOURCE = {
922+
"load": {
923+
"allowJaggedRows": True,
924+
"createDisposition": "CREATE_NEVER",
925+
"encoding": "UTF-8",
926+
"fieldDelimiter": ",",
927+
"ignoreUnknownValues": True,
928+
"maxBadRecords": 10,
929+
"nullMarker": "\\N",
930+
"quote": '"',
931+
"schema": {
932+
"fields": [
933+
{"name": "name", "type": "STRING", "mode": "NULLABLE"},
934+
{"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
935+
]
936+
},
937+
"skipLeadingRows": "1",
938+
"sourceFormat": "CSV",
939+
"timePartitioning": {
940+
"type": "DAY",
941+
"field": "transaction_date",
942+
},
943+
"useAvroLogicalTypes": True,
944+
"writeDisposition": "WRITE_TRUNCATE",
945+
"timeZone": "America/New_York",
946+
"parquetOptions": {"enableListInference": True},
947+
"columnNameCharacterMap": "V2",
948+
"someNewField": "some-value",
949+
}
950+
}
951+
952+
def test_from_api_repr(self):
953+
from google.cloud.bigquery.job import (
954+
CreateDisposition,
955+
LoadJobConfig,
956+
SourceFormat,
957+
WriteDisposition,
958+
)
959+
from google.cloud.bigquery.schema import SchemaField
960+
from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType
961+
962+
from google.cloud.bigquery.job.load import ColumnNameCharacterMap
963+
964+
config = LoadJobConfig.from_api_repr(self.RESOURCE)
965+
966+
self.assertTrue(config.allow_jagged_rows)
967+
self.assertEqual(config.create_disposition, CreateDisposition.CREATE_NEVER)
968+
self.assertEqual(config.encoding, "UTF-8")
969+
self.assertEqual(config.field_delimiter, ",")
970+
self.assertTrue(config.ignore_unknown_values)
971+
self.assertEqual(config.max_bad_records, 10)
972+
self.assertEqual(config.null_marker, "\\N")
973+
self.assertEqual(config.quote_character, '"')
974+
self.assertEqual(
975+
config.schema,
976+
[SchemaField("name", "STRING"), SchemaField("age", "INTEGER")],
977+
)
978+
self.assertEqual(config.skip_leading_rows, 1)
979+
self.assertEqual(config.source_format, SourceFormat.CSV)
980+
self.assertEqual(
981+
config.time_partitioning,
982+
TimePartitioning(type_=TimePartitioningType.DAY, field="transaction_date"),
983+
)
984+
self.assertTrue(config.use_avro_logical_types)
985+
self.assertEqual(config.write_disposition, WriteDisposition.WRITE_TRUNCATE)
986+
self.assertEqual(config.time_zone, "America/New_York")
987+
self.assertTrue(config.parquet_options.enable_list_inference)
988+
self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2)
989+
self.assertEqual(config._properties["load"]["someNewField"], "some-value")
990+
991+
def test_to_api_repr(self):
992+
from google.cloud.bigquery.job import (
993+
CreateDisposition,
994+
LoadJobConfig,
995+
SourceFormat,
996+
WriteDisposition,
997+
)
998+
from google.cloud.bigquery.schema import SchemaField
999+
from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType
1000+
from google.cloud.bigquery.format_options import ParquetOptions
1001+
from google.cloud.bigquery.job.load import ColumnNameCharacterMap
1002+
1003+
config = LoadJobConfig()
1004+
config.allow_jagged_rows = True
1005+
config.create_disposition = CreateDisposition.CREATE_NEVER
1006+
config.encoding = "UTF-8"
1007+
config.field_delimiter = ","
1008+
config.ignore_unknown_values = True
1009+
config.max_bad_records = 10
1010+
config.null_marker = r"\N"
1011+
config.quote_character = '"'
1012+
config.schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")]
1013+
config.skip_leading_rows = 1
1014+
config.source_format = SourceFormat.CSV
1015+
config.time_partitioning = TimePartitioning(
1016+
type_=TimePartitioningType.DAY, field="transaction_date"
1017+
)
1018+
config.use_avro_logical_types = True
1019+
config.write_disposition = WriteDisposition.WRITE_TRUNCATE
1020+
config.time_zone = "America/New_York"
1021+
parquet_options = ParquetOptions()
1022+
parquet_options.enable_list_inference = True
1023+
config.parquet_options = parquet_options
1024+
config.column_name_character_map = ColumnNameCharacterMap.V2
1025+
config._properties["load"]["someNewField"] = "some-value"
1026+
1027+
api_repr = config.to_api_repr()
1028+
1029+
expected = self.RESOURCE
1030+
self.assertEqual(api_repr, expected)

tests/unit/test_external_config.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,16 @@
2626
class TestExternalConfig(unittest.TestCase):
2727
SOURCE_URIS = ["gs://foo", "gs://bar"]
2828

29+
TIME_ZONE = "America/Los_Angeles"
30+
2931
BASE_RESOURCE = {
3032
"sourceFormat": "",
3133
"sourceUris": SOURCE_URIS,
3234
"maxBadRecords": 17,
3335
"autodetect": True,
3436
"ignoreUnknownValues": False,
3537
"compression": "compression",
38+
"timeZone": TIME_ZONE,
3639
}
3740

3841
def test_from_api_repr_base(self):
@@ -79,6 +82,7 @@ def test_to_api_repr_base(self):
7982
ec.connection_id = "path/to/connection"
8083
ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")]
8184

85+
ec.time_zone = self.TIME_ZONE
8286
exp_schema = {
8387
"fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}]
8488
}
@@ -92,6 +96,7 @@ def test_to_api_repr_base(self):
9296
"compression": "compression",
9397
"connectionId": "path/to/connection",
9498
"schema": exp_schema,
99+
"timeZone": self.TIME_ZONE,
95100
}
96101
self.assertEqual(got_resource, exp_resource)
97102

@@ -128,6 +133,8 @@ def _verify_base(self, ec):
128133
self.assertEqual(ec.max_bad_records, 17)
129134
self.assertEqual(ec.source_uris, self.SOURCE_URIS)
130135

136+
self.assertEqual(ec.time_zone, self.TIME_ZONE)
137+
131138
def test_to_api_repr_source_format(self):
132139
ec = external_config.ExternalConfig("CSV")
133140
got = ec.to_api_repr()

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy