Skip to content

Commit 66c1ff7

Browse files
committed
add dump in record chunk sizes
Option to dump data in a folder in fragment files of a given chunk size. This works only if the chunksize option is properly implemented in influxdb-python (see influxdata/influxdb-python#753)
1 parent 8ca2533 commit 66c1ff7

File tree

8 files changed

+153
-52
lines changed

8 files changed

+153
-52
lines changed

README.rst

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,25 @@ flexibility on how to load it back in the database.
1212
Usage
1313
=====
1414

15-
Dump data::
15+
Dump all data from a database::
1616

17-
$ influxdump -u root -p -d database > data_dump.json
17+
$ influxdump -u jdoe -W -d database > data_dump.json
18+
19+
Dump data matching a pattern in chunk files of 50,000 records::
20+
21+
$ influxdump -u jdoe -W -d database -f _dump -c 50000 -m "node*"
22+
23+
Load data from a dump folder::
24+
25+
$ influxdump -u jdoe -W -d database -f _dump
26+
27+
28+
Install
29+
=======
30+
31+
.. code-block:: sh
32+
33+
$ pip install influxdump
1834
1935
Packaging
2036
=========

influxdump/__init__.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,36 @@
22
import argparse
33
import getpass
44
import json
5+
import sys
56

6-
from data import dump_data, write_data, load_data
7+
from data import dump_data, load_file, load_folder
78
from db import get_client
89

910

1011
__author__ = 'Stefan Berder <stefan@measureofquality.com>'
1112
__contact__ = 'code+influxdump@measureofquality.com'
1213
__version__ = "1.0.3"
1314

15+
CHUNKSIZE = 50000
16+
1417

1518
def get_args():
1619
parser = argparse.ArgumentParser(description='influxDB data backup tool')
20+
parser.add_argument('-c', '--chunksize',
21+
help='query chunk size, default to {}'.format(CHUNKSIZE),
22+
type=int, default=CHUNKSIZE)
1723
parser.add_argument('-d', '--database', help='database', required=True,
1824
type=str)
1925
parser.add_argument('-F', '--folder', default=None,
20-
help="destination folder for fragmented dump, if this flag is not used then dump on stdoout")
26+
help="destination folder for fragmented dump, if this flag is not used then dump on stdout")
2127
parser.add_argument('-H', '--host', help='server host',
2228
default="localhost", type=str)
2329
parser.add_argument('-i', '--input', default=None,
2430
help="data/metadata input file, will force action to 'load'")
2531
parser.add_argument('-L', '--legacy', action="store_true",
2632
help='influxdb legacy client (<=0.8)')
2733
parser.add_argument('-m', '--measurements', help='measurement pattern')
28-
parser.add_argument('-n', '--dry-run', help='do mot really do anything', action="store_true")
34+
parser.add_argument('-n', '--dry-run', help='do not really do anything', action="store_true")
2935
parser.add_argument('-p', '--port', help='server port', default=8086,
3036
type=int)
3137
parser.add_argument('-u', '--user', help='username', default='', type=str)
@@ -35,16 +41,27 @@ def get_args():
3541
parser.add_argument('-W', '--pwdprompt', help='password prompt',
3642
action="store_true")
3743
parser.add_argument('action', metavar="action", nargs="?", default='dump',
38-
help="action, can be 'dump' or 'load', default to 'dump'",
39-
choices=["load", "dump"])
44+
help="""
45+
action, can be 'dump' or 'load', default to 'dump'. If action is
46+
'load', one input file (--input) or a folder with data to load has
47+
to be provided
48+
""", choices=["load", "dump"])
4049
args = parser.parse_args()
4150

4251
if args.pwdprompt is True:
4352
pwd = getpass.getpass()
4453
else:
4554
pwd = args.password
4655

56+
if args.action == "load" \
57+
and args.input is None and args.folder is None:
58+
sys.stderr.write("Action is load, missing input file or folder\n\n")
59+
parser.print_help()
60+
sys.exit(1)
61+
62+
4763
return {
64+
"chunksize": args.chunksize,
4865
"db": args.database,
4966
"folder": args.folder,
5067
"host": args.host,
@@ -62,12 +79,15 @@ def get_args():
6279

6380
def dump(args, client):
6481
dump_data(client, args["measurements"], args["folder"],
65-
dryrun=args["dryrun"], verbose=args["verbose"])
82+
dryrun=args["dryrun"], verbose=args["verbose"],
83+
chunk_size=args["chunksize"])
6684

6785

6886
def load(args, client):
69-
data = load_data(args["input"])
70-
return write_data(client, data)
87+
if args["input"] is not None:
88+
load_file(client, args["input"], verbose=args["verbose"])
89+
else:
90+
load_folder(client, args["folder"], verbose=args["verbose"])
7191

7292

7393
def main():

influxdump/data.py

Lines changed: 67 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,41 @@
11
# -*- coding: utf-8 -*-
22
from datetime import datetime
33
import json
4+
import os
45
import os.path
56
import sys
67

78
from db import get_queries, data_to_points
89

910

10-
def query_data(c, queries):
11+
def query_data(c, queries, chunk_size):
1112
"""Generator querying the db and sending back data for each query as
1213
elements.
1314
"""
1415
data = []
1516
for q in queries:
16-
res = c.query(q.get_query())
17-
records = []
18-
for point in c.get_points(res):
19-
records.append(point)
20-
yield {
21-
"meta": q.get_meta(),
22-
"records": records
23-
}
17+
res = c.query(q.get_query(),
18+
chunked=True,
19+
chunk_size=chunk_size)
20+
counter = 0
21+
for r in res:
22+
records = []
23+
counter += 1
24+
for point in c.get_points(r):
25+
records.append(point)
2426

27+
yield (counter, {
28+
"meta": q.get_meta(),
29+
"records": records
30+
})
2531

26-
def dump_data(c, pattern=None, folder=None, dryrun=False, verbose=False):
32+
33+
def dump_data(c, pattern=None, folder=None, dryrun=False, verbose=False,
34+
chunk_size=50000):
2735
"""Get data from the database, return an `influxdb.ResultSet`
2836
2937
:param c: an influxdb client instance
3038
:type c: InfluxDBClient
31-
:param measurements: a list of measurements to query
32-
:type measurements: list
3339
"""
3440
measurements = c.get_measurements(pattern)
3541
if verbose is True or dryrun is True:
@@ -43,30 +49,68 @@ def dump_data(c, pattern=None, folder=None, dryrun=False, verbose=False):
4349
for m in measurements:
4450
sys.stdout.write(" {}\n".format(m))
4551
else:
46-
for data in query_data(c, queries):
52+
for (counter, data) in query_data(c, queries, chunk_size):
4753
if folder is None:
4854
if verbose is True:
4955
sys.stdout.write("> dumping {}\n".format(
5056
data["meta"]["measurement"]))
5157
print(json.dumps(data))
5258
else:
53-
filename = data["meta"]["measurement"] + ".json"
54-
dumpfile = os.path.join(folder, filename)
59+
bundle = os.path.join(folder,
60+
data["meta"]["measurement"])
61+
if not os.path.exists(bundle):
62+
os.makedirs(bundle)
63+
64+
fragment = "{}-{:05d}.json".format(
65+
data["meta"]["measurement"],
66+
counter)
67+
dumpfile = os.path.join(bundle, fragment)
68+
data["meta"]["chunk_count"] = counter
69+
5570
if verbose is True:
56-
sys.stdout.write("> dumping {} to {} ({} records) [{}]\n".format(
57-
data["meta"]["measurement"], filename,
71+
sys.stdout.write(
72+
"> dumping {} (chunk {:05d}) to {} ({} records) [{}]\n".format(
73+
data["meta"]["measurement"], counter, dumpfile,
5874
len(data["records"]), datetime.now().isoformat()))
75+
5976
with open(dumpfile, "w") as fd:
6077
json.dump(data, fd)
6178

6279

6380
def write_data(c, data):
64-
for chunk in data:
65-
points = data_to_points(chunk["meta"]["measurement"],
66-
chunk["records"])
67-
c.write_points(points, batch_size=10000)
81+
#for chunk in data:
82+
points = data_to_points(data["meta"]["measurement"],
83+
data["records"])
84+
c.write_points(points, batch_size=10000)
6885

6986

70-
def load_data(datafile):
87+
def load_file(c, datafile, verbose=False):
7188
with open(datafile, 'r') as fh:
72-
return json.load(fh)
89+
data = json.load(fh)
90+
91+
if verbose is True:
92+
sys.stdout.write(
93+
"> loading {} in {} ({} records) [{}]\n".format(
94+
datafile, data["meta"]["measurement"],
95+
len(data["records"]), datetime.now().isoformat()))
96+
97+
write_data(c, data)
98+
99+
100+
def load_folder(c, folder, verbose=False):
101+
for (dirpath, dirnames, filenames) in os.walk(folder):
102+
filenames.sort()
103+
for filename in filenames:
104+
if filename.endswith('.json'):
105+
datafile = os.path.join(dirpath, filename)
106+
107+
with open(datafile, 'r') as fh:
108+
data = json.load(fh)
109+
if verbose is True:
110+
sys.stdout.write(
111+
"> loading {} in {} ({} records) [{}]\n".format(
112+
datafile, data["meta"]["measurement"],
113+
len(data["records"]), datetime.now().isoformat()))
114+
115+
write_data(c, data)
116+
del data

influxdump/db.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ def get_measurements(self, pattern=None):
4545

4646
return measurements
4747

48+
def write_points(self, *args, **kwargs):
49+
return self._client.write_points(*args, **kwargs)
50+
4851

4952
class InfluxDB08Client(InfluxDBClient):
5053
def __init__(self, host, port, user, pwd, db):

requirements-dev.in

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
-r requirements.in
2-
pip-tools==1.9.0
3-
bumpr==0.3.6
4-
twine==1.8.1
2+
bumpr==0.3.7
3+
pip-tools==4.1.0
4+
twine==1.13.0

requirements-dev.txt

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,31 @@
22
# This file is autogenerated by pip-compile
33
# To update, run:
44
#
5-
# pip-compile --output-file requirements-dev.txt requirements-dev.in
5+
# pip-compile requirements-dev.in
66
#
7-
args==0.1.0 # via clint
8-
bumpr==0.3.6
7+
--index-url https://pypi.priv.measureofquality.com/gams/prod/+simple/
8+
9+
bleach==3.1.0 # via readme-renderer
10+
bumpr==0.3.7
11+
certifi==2019.9.11 # via requests
12+
chardet==3.0.4 # via requests
913
click==6.6 # via pip-tools
10-
clint==0.5.1 # via twine
11-
first==2.0.1 # via pip-tools
12-
influxdb==4.0.0
13-
pip-tools==1.9.0
14-
pkginfo==1.4.1 # via twine
14+
docutils==0.15.2 # via readme-renderer
15+
idna==2.8 # via requests
16+
influxdb==5.2.3
17+
pip-tools==4.1.0
18+
pkginfo==1.5.0.1 # via twine
19+
pygments==2.4.2 # via readme-renderer
1520
python-dateutil==2.6.0 # via influxdb
1621
pytz==2016.10 # via influxdb
17-
requests-toolbelt==0.7.1 # via twine
18-
requests==2.12.3 # via influxdb, requests-toolbelt, twine
19-
six==1.10.0 # via influxdb, pip-tools, python-dateutil
20-
twine==1.8.1
22+
readme-renderer==24.0 # via twine
23+
requests-toolbelt==0.9.1 # via twine
24+
requests==2.22.0 # via influxdb, requests-toolbelt, twine
25+
six==1.10.0 # via bleach, influxdb, pip-tools, python-dateutil, readme-renderer
26+
tqdm==4.35.0 # via twine
27+
twine==1.13.0
28+
urllib3==1.25.3 # via requests
29+
webencodings==0.5.1 # via bleach
30+
31+
# The following packages are considered to be unsafe in a requirements file:
32+
# setuptools==41.2.0 # via twine

requirements.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
influxdb==4.0.0
1+
influxdb==5.2.3

requirements.txt

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,16 @@
22
# This file is autogenerated by pip-compile
33
# To update, run:
44
#
5-
# pip-compile --output-file requirements.txt requirements.in
5+
# pip-compile requirements.in
66
#
7-
influxdb==4.0.0
7+
--index-url https://pypi.priv.measureofquality.com/gams/prod/+simple/
8+
9+
certifi==2019.9.11 # via requests
10+
chardet==3.0.4 # via requests
11+
idna==2.8 # via requests
12+
influxdb==5.2.3
813
python-dateutil==2.6.0 # via influxdb
914
pytz==2016.10 # via influxdb
10-
requests==2.12.3 # via influxdb
15+
requests==2.22.0 # via influxdb
1116
six==1.10.0 # via influxdb, python-dateutil
17+
urllib3==1.25.3 # via requests

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy