Skip to content

Commit d284254

Browse files
authored
chore: optimize appending new columns to Pandas DataFrame (influxdata#348)
1 parent 242444d commit d284254

File tree

3 files changed

+36
-7
lines changed

3 files changed

+36
-7
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
1. [#335](https://github.com/influxdata/influxdb-client-python/pull/335): Add support for custom precision for index specified as number [DataFrame]
66
1. [#341](https://github.com/influxdata/influxdb-client-python/pull/341): Add support for handling batch events
77

8+
### Bug Fixes
9+
1. [#348](https://github.com/influxdata/influxdb-client-python/pull/348): Optimize appending new columns to Pandas DataFrame [DataFrame]
10+
811
### Documentation
912
1. [#331](https://github.com/influxdata/influxdb-client-python/pull/331): Add [Migration Guide](MIGRATION_GUIDE.rst)
1013
1. [#341](https://github.com/influxdata/influxdb-client-python/pull/341): How to handle client errors

influxdb_client/client/flux_csv_parser.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,8 @@ def _parse_flux_response(self):
133133
# Create DataFrame with default values
134134
if self._serialization_mode is FluxSerializationMode.dataFrame:
135135
from ..extras import pd
136-
self._data_frame = pd.DataFrame(data=[], columns=[], index=None)
137-
for column in table.columns:
138-
self._data_frame[column.label] = column.default_value
136+
labels = list(map(lambda it: it.label, table.columns))
137+
self._data_frame = pd.DataFrame(data=[], columns=labels, index=None)
139138
pass
140139
continue
141140

tests/test_FluxCSVParser.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,38 @@ def test_to_json(self):
212212
import json
213213
self.assertEqual(query_output, json.dumps(tables, cls=FluxStructureEncoder, indent=2))
214214

215+
def test_pandas_lot_of_columns(self):
216+
data_types = ""
217+
groups = ""
218+
defaults = ""
219+
columns = ""
220+
values = ""
221+
for i in range(0, 200):
222+
data_types += f",long"
223+
groups += f",false"
224+
defaults += f","
225+
columns += f",column_{i}"
226+
values += f",{i}"
227+
228+
data = f"#datatype,string,long,string,string,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,double,string{data_types}\n" \
229+
f"#group,false,false,true,true,true,true,false,false,true{groups}\n" \
230+
f"#default,_result,,,,,,,,{defaults}\n" \
231+
f",result,table,_field,_measurement,_start,_stop,_time,_value,tag{columns}\n" \
232+
f",,0,value,python_client_test,2010-02-27T04:48:32.752600083Z,2020-02-27T16:48:32.752600083Z,2020-02-27T16:20:00Z,2,test1{values}\n" \
233+
234+
parser = self._parse(data=data, serialization_mode=FluxSerializationMode.dataFrame)
235+
_dataFrames = list(parser.generator())
236+
self.assertEqual(1, _dataFrames.__len__())
237+
215238
@staticmethod
216-
def _parse_to_tables(data: str):
217-
fp = BytesIO(str.encode(data))
218-
_parser = FluxCsvParser(response=HTTPResponse(fp, preload_content=False),
219-
serialization_mode=FluxSerializationMode.tables)
239+
def _parse_to_tables(data: str, serialization_mode=FluxSerializationMode.tables):
240+
_parser = FluxCsvParserTest._parse(data, serialization_mode)
220241
list(_parser.generator())
221242
tables = _parser.tables
222243
return tables
244+
245+
@staticmethod
246+
def _parse(data, serialization_mode):
247+
fp = BytesIO(str.encode(data))
248+
return FluxCsvParser(response=HTTPResponse(fp, preload_content=False),
249+
serialization_mode=serialization_mode)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy