From 0efaba68046f3e681bba1d103b3c7575f743bcf1 Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Tue, 21 Jan 2020 23:06:52 +0100 Subject: [PATCH] Remove the private, unused _csv2rec. It was only left as a helper for the deprecated and now removed plotfile(). --- doc/faq/howto_faq.rst | 38 +--- lib/matplotlib/mlab.py | 281 ------------------------------ lib/matplotlib/pyplot.py | 2 +- lib/matplotlib/tests/test_mlab.py | 49 ------ 4 files changed, 8 insertions(+), 362 deletions(-) diff --git a/doc/faq/howto_faq.rst b/doc/faq/howto_faq.rst index 212bd6badb3f..6ffa28963670 100644 --- a/doc/faq/howto_faq.rst +++ b/doc/faq/howto_faq.rst @@ -336,37 +336,13 @@ setting in the right subplots. Skip dates where there is no data --------------------------------- -When plotting time series, e.g., financial time series, one often wants -to leave out days on which there is no data, e.g., weekends. By passing -in dates on the x-xaxis, you get large horizontal gaps on periods when -there is not data. The solution is to pass in some proxy x-data, e.g., -evenly sampled indices, and then use a custom formatter to format -these as dates. The example below shows how to use an 'index formatter' -to achieve the desired plot:: - - import numpy as np - import matplotlib.pyplot as plt - import matplotlib.mlab as mlab - import matplotlib.ticker as ticker - - r = mlab.csv2rec('../data/aapl.csv') - r.sort() - r = r[-30:] # get the last 30 days - - N = len(r) - ind = np.arange(N) # the evenly spaced plot indices - - def format_date(x, pos=None): - thisind = np.clip(int(x+0.5), 0, N-1) - return r.date[thisind].strftime('%Y-%m-%d') - - fig = plt.figure() - ax = fig.add_subplot(111) - ax.plot(ind, r.adj_close, 'o-') - ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_date)) - fig.autofmt_xdate() - - plt.show() +When plotting time series, e.g., financial time series, one often wants to +leave out days on which there is no data, e.g., weekends. By passing in +dates on the x-xaxis, you get large horizontal gaps on periods when there +is not data. The solution is to pass in some proxy x-data, e.g., evenly +sampled indices, and then use a custom formatter to format these as dates. +:doc:`/gallery/text_labels_and_annotations/date_index_formatter` demonstrates +how to use an 'index formatter' to achieve the desired plot. .. _howto-set-zorder: diff --git a/lib/matplotlib/mlab.py b/lib/matplotlib/mlab.py index 7c1fb194f7de..fe9be26c0002 100644 --- a/lib/matplotlib/mlab.py +++ b/lib/matplotlib/mlab.py @@ -53,7 +53,6 @@ Apply a window along a given axis """ -import csv import functools from numbers import Number @@ -985,286 +984,6 @@ def cohere(x, y, NFFT=256, Fs=2, detrend=detrend_none, window=window_hanning, return Cxy, f -def _csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',', - converterd=None, names=None, missing='', missingd=None, - use_mrecords=False, dayfirst=False, yearfirst=False): - """ - Load data from comma/space/tab delimited file in *fname* into a - numpy record array and return the record array. - - If *names* is *None*, a header row is required to automatically - assign the recarray names. The headers will be lower cased, - spaces will be converted to underscores, and illegal attribute - name characters removed. If *names* is not *None*, it is a - sequence of names to use for the column names. In this case, it - is assumed there is no header row. - - - - *fname*: can be a filename or a file handle. Support for gzipped - files is automatic, if the filename ends in '.gz' - - - *comments*: the character used to indicate the start of a comment - in the file, or *None* to switch off the removal of comments - - - *skiprows*: is the number of rows from the top to skip - - - *checkrows*: is the number of rows to check to validate the column - data type. When set to zero all rows are validated. - - - *converterd*: if not *None*, is a dictionary mapping column number or - munged column name to a converter function. - - - *names*: if not None, is a list of header names. In this case, no - header will be read from the file - - - *missingd* is a dictionary mapping munged column names to field values - which signify that the field does not contain actual data and should - be masked, e.g., '0000-00-00' or 'unused' - - - *missing*: a string whose value signals a missing field regardless of - the column it appears in - - - *use_mrecords*: if True, return an mrecords.fromrecords record array if - any of the data are missing - - - *dayfirst*: default is False so that MM-DD-YY has precedence over - DD-MM-YY. See - http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 - for further information. - - - *yearfirst*: default is False so that MM-DD-YY has precedence over - YY-MM-DD. See - http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 - for further information. - - If no rows are found, *None* is returned - """ - - if converterd is None: - converterd = dict() - - if missingd is None: - missingd = {} - - import dateutil.parser - import datetime - - fh = cbook.to_filehandle(fname) - - delimiter = str(delimiter) - - class FH: - """ - For space-delimited files, we want different behavior than - comma or tab. Generally, we want multiple spaces to be - treated as a single separator, whereas with comma and tab we - want multiple commas to return multiple (empty) fields. The - join/strip trick below effects this. - """ - def __init__(self, fh): - self.fh = fh - - def close(self): - self.fh.close() - - def seek(self, arg): - self.fh.seek(arg) - - def fix(self, s): - return ' '.join(s.split()) - - def __next__(self): - return self.fix(next(self.fh)) - - def __iter__(self): - for line in self.fh: - yield self.fix(line) - - if delimiter == ' ': - fh = FH(fh) - - reader = csv.reader(fh, delimiter=delimiter) - - def process_skiprows(reader): - if skiprows: - for i, row in enumerate(reader): - if i >= (skiprows-1): - break - - return fh, reader - - process_skiprows(reader) - - def ismissing(name, val): - """Return whether the value val in column name should be masked.""" - return val == missing or val == missingd.get(name) or val == '' - - def with_default_value(func, default): - def newfunc(name, val): - if ismissing(name, val): - return default - else: - return func(val) - return newfunc - - def mybool(x): - if x == 'True': - return True - elif x == 'False': - return False - else: - raise ValueError('invalid bool') - - dateparser = dateutil.parser.parse - - def mydateparser(x): - # try and return a datetime object - d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst) - return d - - mydateparser = with_default_value(mydateparser, datetime.datetime(1, 1, 1)) - - myfloat = with_default_value(float, np.nan) - myint = with_default_value(int, -1) - mystr = with_default_value(str, '') - mybool = with_default_value(mybool, None) - - def mydate(x): - # try and return a date object - d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst) - - if d.hour > 0 or d.minute > 0 or d.second > 0: - raise ValueError('not a date') - return d.date() - mydate = with_default_value(mydate, datetime.date(1, 1, 1)) - - def get_func(name, item, func): - # promote functions in this order - funcs = [mybool, myint, myfloat, mydate, mydateparser, mystr] - for func in funcs[funcs.index(func):]: - try: - func(name, item) - except Exception: - continue - return func - raise ValueError('Could not find a working conversion function') - - # map column names that clash with builtins -- TODO - extend this list - itemd = { - 'return': 'return_', - 'file': 'file_', - 'print': 'print_', - } - - def get_converters(reader, comments): - - converters = None - i = 0 - for row in reader: - if (len(row) and comments is not None and - row[0].startswith(comments)): - continue - if i == 0: - converters = [mybool]*len(row) - if checkrows and i > checkrows: - break - i += 1 - - for j, (name, item) in enumerate(zip(names, row)): - func = converterd.get(j) - if func is None: - func = converterd.get(name) - if func is None: - func = converters[j] - if len(item.strip()): - func = get_func(name, item, func) - else: - # how should we handle custom converters and defaults? - func = with_default_value(func, None) - converters[j] = func - return converters - - # Get header and remove invalid characters - needheader = names is None - - if needheader: - for row in reader: - if (len(row) and comments is not None and - row[0].startswith(comments)): - continue - headers = row - break - - # remove these chars - delete = set(r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""") - delete.add('"') - - names = [] - seen = dict() - for i, item in enumerate(headers): - item = item.strip().lower().replace(' ', '_') - item = ''.join([c for c in item if c not in delete]) - if not len(item): - item = 'column%d' % i - - item = itemd.get(item, item) - cnt = seen.get(item, 0) - if cnt > 0: - names.append(item + '_%d' % cnt) - else: - names.append(item) - seen[item] = cnt+1 - - else: - if isinstance(names, str): - names = [n.strip() for n in names.split(',')] - - # get the converter functions by inspecting checkrows - converters = get_converters(reader, comments) - if converters is None: - raise ValueError('Could not find any valid data in CSV file') - - # reset the reader and start over - fh.seek(0) - reader = csv.reader(fh, delimiter=delimiter) - process_skiprows(reader) - - if needheader: - while True: - # skip past any comments and consume one line of column header - row = next(reader) - if (len(row) and comments is not None and - row[0].startswith(comments)): - continue - break - - # iterate over the remaining rows and convert the data to date - # objects, ints, or floats as appropriate - rows = [] - rowmasks = [] - for i, row in enumerate(reader): - if not len(row): - continue - if comments is not None and row[0].startswith(comments): - continue - # Ensure that the row returned always has the same nr of elements - row.extend([''] * (len(converters) - len(row))) - rows.append([func(name, val) - for func, name, val in zip(converters, names, row)]) - rowmasks.append([ismissing(name, val) - for name, val in zip(names, row)]) - fh.close() - - if not len(rows): - return None - - if use_mrecords and np.any(rowmasks): - r = np.ma.mrecords.fromrecords(rows, names=names, mask=rowmasks) - else: - r = np.rec.fromrecords(rows, names=names) - return r - - class GaussianKDE: """ Representation of a kernel-density estimate using Gaussian kernels. diff --git a/lib/matplotlib/pyplot.py b/lib/matplotlib/pyplot.py index 01b196045886..1d1e294e6a0a 100644 --- a/lib/matplotlib/pyplot.py +++ b/lib/matplotlib/pyplot.py @@ -45,7 +45,7 @@ from matplotlib.artist import Artist from matplotlib.axes import Axes, Subplot from matplotlib.projections import PolarAxes -from matplotlib import mlab # for _csv2rec, detrend_none, window_hanning +from matplotlib import mlab # for detrend_none, window_hanning from matplotlib.scale import get_scale_docs, get_scale_names from matplotlib import cm diff --git a/lib/matplotlib/tests/test_mlab.py b/lib/matplotlib/tests/test_mlab.py index 86be071c2a4f..745045708531 100644 --- a/lib/matplotlib/tests/test_mlab.py +++ b/lib/matplotlib/tests/test_mlab.py @@ -1,9 +1,6 @@ -import tempfile - from numpy.testing import (assert_allclose, assert_almost_equal, assert_array_equal, assert_array_almost_equal_nulp) import numpy as np -import datetime as datetime import pytest import matplotlib.mlab as mlab @@ -140,52 +137,6 @@ def test_stride_ensure_integer_type(self): assert_array_equal(y_strided, 0.3) -@pytest.fixture -def tempcsv(): - with tempfile.TemporaryFile(suffix='csv', mode="w+", newline='') as fd: - yield fd - - -def test_csv2rec_names_with_comments(tempcsv): - tempcsv.write('# comment\n1,2,3\n4,5,6\n') - tempcsv.seek(0) - array = mlab._csv2rec(tempcsv, names='a,b,c') - assert len(array) == 2 - assert len(array.dtype) == 3 - - -@pytest.mark.parametrize('input, kwargs', [ - ('01/11/14\n' - '03/05/76 12:00:01 AM\n' - '07/09/83 5:17:34 PM\n' - '06/20/2054 2:31:45 PM\n' - '10/31/00 11:50:23 AM\n', - {}), - ('11/01/14\n' - '05/03/76 12:00:01 AM\n' - '09/07/83 5:17:34 PM\n' - '20/06/2054 2:31:45 PM\n' - '31/10/00 11:50:23 AM\n', - {'dayfirst': True}), - ('14/01/11\n' - '76/03/05 12:00:01 AM\n' - '83/07/09 5:17:34 PM\n' - '2054/06/20 2:31:45 PM\n' - '00/10/31 11:50:23 AM\n', - {'yearfirst': True}), -], ids=['usdate', 'dayfirst', 'yearfirst']) -def test_csv2rec_dates(tempcsv, input, kwargs): - tempcsv.write(input) - expected = [datetime.datetime(2014, 1, 11, 0, 0), - datetime.datetime(1976, 3, 5, 0, 0, 1), - datetime.datetime(1983, 7, 9, 17, 17, 34), - datetime.datetime(2054, 6, 20, 14, 31, 45), - datetime.datetime(2000, 10, 31, 11, 50, 23)] - tempcsv.seek(0) - array = mlab._csv2rec(tempcsv, names='a', **kwargs) - assert_array_equal(array['a'].tolist(), expected) - - def _apply_window(*args, **kwargs): with pytest.warns(MatplotlibDeprecationWarning): return mlab.apply_window(*args, **kwargs) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy