Skip to content

Commit e2cb42e

Browse files
authored
fix: serialize Pandas NaN values into LineProtocol (influxdata#648)
1 parent a645ea9 commit e2cb42e

File tree

3 files changed

+42
-20
lines changed

3 files changed

+42
-20
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
## 1.42.0 [unreleased]
22

3+
### Bug Fixes
4+
1. [#648](https://github.com/influxdata/influxdb-client-python/pull/648): Fix `DataFrame` serialization with `NaN` values
5+
36
## 1.41.0 [2024-03-01]
47

58
### Features

influxdb_client/client/write/dataframe_serializer.py

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,6 @@ def _itertuples(data_frame):
1919
return zip(data_frame.index, *cols)
2020

2121

22-
def _not_nan(x):
23-
return x == x
24-
25-
26-
def _any_not_nan(p, indexes):
27-
return any(map(lambda x: _not_nan(p[x]), indexes))
28-
29-
3022
class DataframeSerializer:
3123
"""Serialize DataFrame into LineProtocols."""
3224

@@ -77,7 +69,7 @@ def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION
7769
# When NaNs are present, the expression looks like this (split
7870
# across two lines to satisfy the code-style checker)
7971
#
80-
# lambda p: f"""{measurement_name} {"" if math.isnan(p[1])
72+
# lambda p: f"""{measurement_name} {"" if pd.isna(p[1])
8173
# else f"{keys[0]}={p[1]}"},{keys[1]}={p[2]}i {p[0].value}"""
8274
#
8375
# When there's a NaN value in column a, we'll end up with a comma at the start of the
@@ -175,7 +167,7 @@ def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION
175167
# This column is a tag column.
176168
if null_columns.iloc[index]:
177169
key_value = f"""{{
178-
'' if {val_format} == '' or type({val_format}) == float and math.isnan({val_format}) else
170+
'' if {val_format} == '' or pd.isna({val_format}) else
179171
f',{key_format}={{str({val_format}).translate(_ESCAPE_STRING)}}'
180172
}}"""
181173
else:
@@ -192,19 +184,16 @@ def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION
192184
# field column has no nulls, we don't run the comma-removal
193185
# regexp substitution step.
194186
sep = '' if len(field_indexes) == 0 else ','
195-
if issubclass(value.type, np.integer):
196-
field_value = f"{sep}{key_format}={{{val_format}}}i"
197-
elif issubclass(value.type, np.bool_):
198-
field_value = f'{sep}{key_format}={{{val_format}}}'
199-
elif issubclass(value.type, np.floating):
187+
if issubclass(value.type, np.integer) or issubclass(value.type, np.floating) or issubclass(value.type, np.bool_): # noqa: E501
188+
suffix = 'i' if issubclass(value.type, np.integer) else ''
200189
if null_columns.iloc[index]:
201-
field_value = f"""{{"" if math.isnan({val_format}) else f"{sep}{key_format}={{{val_format}}}"}}"""
190+
field_value = f"""{{"" if pd.isna({val_format}) else f"{sep}{key_format}={{{val_format}}}{suffix}"}}""" # noqa: E501
202191
else:
203-
field_value = f'{sep}{key_format}={{{val_format}}}'
192+
field_value = f"{sep}{key_format}={{{val_format}}}{suffix}"
204193
else:
205194
if null_columns.iloc[index]:
206195
field_value = f"""{{
207-
'' if type({val_format}) == float and math.isnan({val_format}) else
196+
'' if pd.isna({val_format}) else
208197
f'{sep}{key_format}="{{str({val_format}).translate(_ESCAPE_STRING)}}"'
209198
}}"""
210199
else:
@@ -229,17 +218,21 @@ def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION
229218
'_ESCAPE_KEY': _ESCAPE_KEY,
230219
'_ESCAPE_STRING': _ESCAPE_STRING,
231220
'keys': keys,
232-
'math': math,
221+
'pd': pd,
233222
})
234223

235224
for k, v in dict(data_frame.dtypes).items():
236225
if k in data_frame_tag_columns:
237226
data_frame = data_frame.replace({k: ''}, np.nan)
238227

228+
def _any_not_nan(p, indexes):
229+
return any(map(lambda x: not pd.isna(p[x]), indexes))
230+
239231
self.data_frame = data_frame
240232
self.f = f
241233
self.field_indexes = field_indexes
242234
self.first_field_maybe_null = null_columns.iloc[field_indexes[0] - 1]
235+
self._any_not_nan = _any_not_nan
243236

244237
#
245238
# prepare chunks
@@ -266,7 +259,7 @@ def serialize(self, chunk_idx: int = None):
266259
# When the first field is null (None/NaN), we'll have
267260
# a spurious leading comma which needs to be removed.
268261
lp = (re.sub('^(( |[^ ])* ),([a-zA-Z0-9])(.*)', '\\1\\3\\4', self.f(p))
269-
for p in filter(lambda x: _any_not_nan(x, self.field_indexes), _itertuples(chunk)))
262+
for p in filter(lambda x: self._any_not_nan(x, self.field_indexes), _itertuples(chunk)))
270263
return list(lp)
271264
else:
272265
return list(map(self.f, _itertuples(chunk)))

tests/test_WriteApiDataFrame.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,32 @@ def test_write_object_field_nan(self):
159159
self.assertEqual("measurement val=2i 1586046600000000000",
160160
points[1])
161161

162+
def test_write_missing_values(self):
163+
from influxdb_client.extras import pd
164+
165+
data_frame = pd.DataFrame({
166+
"a_bool": [True, None, False],
167+
"b_int": [None, 1, 2],
168+
"c_float": [1.0, 2.0, None],
169+
"d_str": ["a", "b", None],
170+
})
171+
172+
data_frame['a_bool'] = data_frame['a_bool'].astype(pd.BooleanDtype())
173+
data_frame['b_int'] = data_frame['b_int'].astype(pd.Int64Dtype())
174+
data_frame['c_float'] = data_frame['c_float'].astype(pd.Float64Dtype())
175+
data_frame['d_str'] = data_frame['d_str'].astype(pd.StringDtype())
176+
177+
print(data_frame)
178+
points = data_frame_to_list_of_points(
179+
data_frame=data_frame,
180+
point_settings=PointSettings(),
181+
data_frame_measurement_name='measurement')
182+
183+
self.assertEqual(3, len(points))
184+
self.assertEqual("measurement a_bool=True,c_float=1.0,d_str=\"a\" 0", points[0])
185+
self.assertEqual("measurement b_int=1i,c_float=2.0,d_str=\"b\" 1", points[1])
186+
self.assertEqual("measurement a_bool=False,b_int=2i 2", points[2])
187+
162188
def test_write_field_bool(self):
163189
from influxdb_client.extras import pd
164190

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy