Skip to content
This repository was archived by the owner on Oct 29, 2024. It is now read-only.

Pythonic changes #133

Merged
merged 7 commits into from
Mar 25, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions influxdb/_dataframe_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# -*- coding: utf-8 -*-
"""
DataFrame client for InfluxDB
"""
import math
import warnings

from .client import InfluxDBClient

import pandas as pd


class DataFrameClient(InfluxDBClient):
"""
The ``DataFrameClient`` object holds information necessary to connect
to InfluxDB. Requests can be made to InfluxDB directly through the client.
The client reads and writes from pandas DataFrames.
"""

EPOCH = pd.Timestamp('1970-01-01 00:00:00.000+00:00')

def write_points(self, data, *args, **kwargs):
"""
Write to multiple time series names.

:param data: A dictionary mapping series names to pandas DataFrames
:param time_precision: [Optional, default 's'] Either 's', 'm', 'ms'
or 'u'.
:param batch_size: [Optional] Value to write the points in batches
instead of all at one time. Useful for when doing data dumps from
one database to another or when doing a massive write operation
:type batch_size: int
"""

batch_size = kwargs.get('batch_size')
time_precision = kwargs.get('time_precision', 's')
if batch_size:
kwargs.pop('batch_size') # don't hand over to InfluxDBClient
for key, data_frame in data.items():
number_batches = int(math.ceil(
len(data_frame) / float(batch_size)))
for batch in range(number_batches):
start_index = batch * batch_size
end_index = (batch + 1) * batch_size
data = [self._convert_dataframe_to_json(
name=key,
dataframe=data_frame.ix[start_index:end_index].copy(),
time_precision=time_precision)]
super(DataFrameClient, self).write_points(data,
*args, **kwargs)
return True
else:
data = [self._convert_dataframe_to_json(
name=key, dataframe=dataframe, time_precision=time_precision)
for key, dataframe in data.items()]
return super(DataFrameClient, self).write_points(data,
*args, **kwargs)

def write_points_with_precision(self, data, time_precision='s'):
"""
DEPRECATED. Write to multiple time series names

"""
warnings.warn(
"write_points_with_precision is deprecated, and will be removed "
"in future versions. Please use "
"``DataFrameClient.write_points(time_precision='..')`` instead.",
FutureWarning)
return self.write_points(data, time_precision='s')

def query(self, query, time_precision='s', chunked=False):
"""
Quering data into a DataFrame.

:param time_precision: [Optional, default 's'] Either 's', 'm', 'ms'
or 'u'.
:param chunked: [Optional, default=False] True if the data shall be
retrieved in chunks, False otherwise.

"""
results = super(DataFrameClient, self).query(query, database=database)
if len(results) > 0:
return self._to_dataframe(results, time_precision)
else:
return results

def _to_dataframe(self, json_result, time_precision):
dataframe = pd.DataFrame(data=json_result['points'],
columns=json_result['columns'])
if 'sequence_number' in dataframe.keys():
dataframe.sort(['time', 'sequence_number'], inplace=True)
else:
dataframe.sort(['time'], inplace=True)
pandas_time_unit = time_precision
if time_precision == 'm':
pandas_time_unit = 'ms'
elif time_precision == 'u':
pandas_time_unit = 'us'
dataframe.index = pd.to_datetime(list(dataframe['time']),
unit=pandas_time_unit,
utc=True)
del dataframe['time']
return dataframe

def _convert_dataframe_to_json(self, dataframe, name, time_precision='s'):
if not isinstance(dataframe, pd.DataFrame):
raise TypeError('Must be DataFrame, but type was: {}.'
.format(type(dataframe)))
if not (isinstance(dataframe.index, pd.tseries.period.PeriodIndex) or
isinstance(dataframe.index, pd.tseries.index.DatetimeIndex)):
raise TypeError('Must be DataFrame with DatetimeIndex or \
PeriodIndex.')
dataframe.index = dataframe.index.to_datetime()
if dataframe.index.tzinfo is None:
dataframe.index = dataframe.index.tz_localize('UTC')
dataframe['time'] = [self._datetime_to_epoch(dt, time_precision)
for dt in dataframe.index]
data = {'name': name,
'columns': [str(column) for column in dataframe.columns],
'points': list([list(x) for x in dataframe.values])}
return data

def _datetime_to_epoch(self, datetime, time_precision='s'):
seconds = (datetime - self.EPOCH).total_seconds()
if time_precision == 's':
return seconds
elif time_precision == 'm' or time_precision == 'ms':
return seconds * 1000
elif time_precision == 'u':
return seconds * 1000000
18 changes: 8 additions & 10 deletions influxdb/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,9 @@ def format_query_response(response):
if 'columns' in row.keys() and 'values' in row.keys():
for value in row['values']:
item = {}
current_col = 0
for field in value:
item[row['columns'][current_col]] = field
current_col += 1
for cur_col, field in enumerate(value):
item[row['columns'][cur_col]] = field
cur_col += 1
items.append(item)
return series

Expand Down Expand Up @@ -237,8 +236,7 @@ def write_points(self,
time_precision=None,
database=None,
retention_policy=None,
*args,
**kwargs):
):
"""
Write to multiple time series names.

Expand All @@ -261,12 +259,12 @@ def _write_points(self,
database,
retention_policy):
if time_precision not in ['n', 'u', 'ms', 's', 'm', 'h', None]:
raise Exception(
raise ValueError(
"Invalid time precision is given. "
"(use 'n', 'u', 'ms', 's', 'm' or 'h')")

if self.use_udp and time_precision and time_precision != 's':
raise Exception(
raise ValueError(
"InfluxDB only supports seconds precision for udp writes"
)

Expand Down Expand Up @@ -325,7 +323,7 @@ def create_retention_policy(
query_string = \
"CREATE RETENTION POLICY %s ON %s " \
"DURATION %s REPLICATION %s" % \
(name, (database or self._database), duration, replication)
(name, database or self._database, duration, replication)

if default is True:
query_string += " DEFAULT"
Expand All @@ -344,7 +342,7 @@ def get_list_series(self, database=None):
"""
Get the list of series
"""
return self.query("SHOW SERIES", database=(database or self._database))
return self.query("SHOW SERIES", database=database)

def get_list_users(self):
"""
Expand Down
146 changes: 12 additions & 134 deletions influxdb/dataframe_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,140 +2,18 @@
"""
DataFrame client for InfluxDB
"""
import math
import warnings

from .client import InfluxDBClient
__all__ = ['DataFrameClient']

try:
import pandas as pd
except ImportError:
pd = None


class DataFrameClient(InfluxDBClient):
"""
The ``DataFrameClient`` object holds information necessary to connect
to InfluxDB. Requests can be made to InfluxDB directly through the client.
The client reads and writes from pandas DataFrames.
"""

def __init__(self, *args, **kwargs):
super(DataFrameClient, self).__init__(*args, **kwargs)
if not pd:
raise ImportError(
'DataFrameClient requires Pandas'
)

self.EPOCH = pd.Timestamp('1970-01-01 00:00:00.000+00:00')

def write_points(self, data, *args, **kwargs):
"""
Write to multiple time series names.

:param data: A dictionary mapping series names to pandas DataFrames
:param time_precision: [Optional, default 's'] Either 's', 'm', 'ms'
or 'u'.
:param batch_size: [Optional] Value to write the points in batches
instead of all at one time. Useful for when doing data dumps from
one database to another or when doing a massive write operation
:type batch_size: int
"""

batch_size = kwargs.get('batch_size')
time_precision = kwargs.get('time_precision', 's')
if batch_size:
kwargs.pop('batch_size') # don't hand over to InfluxDBClient
for key, data_frame in data.items():
number_batches = int(math.ceil(
len(data_frame) / float(batch_size)))
for batch in range(number_batches):
start_index = batch * batch_size
end_index = (batch + 1) * batch_size
data = [self._convert_dataframe_to_json(
name=key,
dataframe=data_frame.ix[start_index:end_index].copy(),
time_precision=time_precision)]
InfluxDBClient.write_points(self, data, *args, **kwargs)
return True
else:
data = [self._convert_dataframe_to_json(
name=key, dataframe=dataframe, time_precision=time_precision)
for key, dataframe in data.items()]
return InfluxDBClient.write_points(self, data, *args, **kwargs)

def write_points_with_precision(self, data, time_precision='s'):
"""
DEPRECATED. Write to multiple time series names

"""
warnings.warn(
"write_points_with_precision is deprecated, and will be removed "
"in future versions. Please use "
"``DataFrameClient.write_points(time_precision='..')`` instead.",
FutureWarning)
return self.write_points(data, time_precision='s')

def query(self, query, time_precision='s', chunked=False):
"""
Quering data into a DataFrame.

:param time_precision: [Optional, default 's'] Either 's', 'm', 'ms'
or 'u'.
:param chunked: [Optional, default=False] True if the data shall be
retrieved in chunks, False otherwise.

"""
result = InfluxDBClient.query(self,
query=query,
time_precision=time_precision,
chunked=chunked)
if len(result['results'][0]) > 0:
return self._to_dataframe(result['results'][0], time_precision)
else:
return result

def _to_dataframe(self, json_result, time_precision):
dataframe = pd.DataFrame(data=json_result['points'],
columns=json_result['columns'])
if 'sequence_number' in dataframe.keys():
dataframe.sort(['time', 'sequence_number'], inplace=True)
else:
dataframe.sort(['time'], inplace=True)
pandas_time_unit = time_precision
if time_precision == 'm':
pandas_time_unit = 'ms'
elif time_precision == 'u':
pandas_time_unit = 'us'
dataframe.index = pd.to_datetime(list(dataframe['time']),
unit=pandas_time_unit,
utc=True)
del dataframe['time']
return dataframe

def _convert_dataframe_to_json(self, dataframe, name, time_precision='s'):
if not isinstance(dataframe, pd.DataFrame):
raise TypeError('Must be DataFrame, but type was: {}.'
.format(type(dataframe)))
if not (isinstance(dataframe.index, pd.tseries.period.PeriodIndex) or
isinstance(dataframe.index, pd.tseries.index.DatetimeIndex)):
raise TypeError('Must be DataFrame with DatetimeIndex or \
PeriodIndex.')
dataframe.index = dataframe.index.to_datetime()
if dataframe.index.tzinfo is None:
dataframe.index = dataframe.index.tz_localize('UTC')
dataframe['time'] = [self._datetime_to_epoch(dt, time_precision)
for dt in dataframe.index]
data = {'name': name,
'columns': [str(column) for column in dataframe.columns],
'points': list([list(x) for x in dataframe.values])}
return data

def _datetime_to_epoch(self, datetime, time_precision='s'):
seconds = (datetime - self.EPOCH).total_seconds()
if time_precision == 's':
return seconds
elif time_precision == 'm' or time_precision == 'ms':
return seconds * 1000
elif time_precision == 'u':
return seconds * 1000000
import pandas
del pandas
except ImportError as err:
from .client import InfluxDBClient

class DataFrameClient(InfluxDBClient):
def __init__(self, *a, **kw):
raise ImportError("DataFrameClient requires Pandas "
"which couldn't be imported: %s" % err)
else:
from ._dataframe_client import DataFrameClient
25 changes: 18 additions & 7 deletions tests/influxdb/dataframe_client_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,13 +211,24 @@ def test_query_with_empty_result(self):
assert result == []

def test_list_series(self):
response = [
{
'columns': ['time', 'name'],
'name': 'list_series_result',
'points': [[0, 'seriesA'], [0, 'seriesB']]
}
]
response = {
'results': [
{
'series': [{
'columns': ['id'],
'name': 'seriesA',
'values': [[0]],
}]
},
{
'series': [{
'columns': ['id'],
'name': 'seriesB',
'values': [[1]],
}]
},
]
}
with _mocked_session('get', 200, response):
cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
series_list = cli.get_list_series()
Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy