diff --git a/lib/matplotlib/__init__.py b/lib/matplotlib/__init__.py index b2b0eb1b571..097ba8f3232 100644 --- a/lib/matplotlib/__init__.py +++ b/lib/matplotlib/__init__.py @@ -1489,6 +1489,7 @@ def _jupyter_nbextension_paths(): 'matplotlib.tests.test_backend_svg', 'matplotlib.tests.test_basic', 'matplotlib.tests.test_bbox_tight', + 'matplotlib.tests.test_category', 'matplotlib.tests.test_cbook', 'matplotlib.tests.test_coding_standards', 'matplotlib.tests.test_collections', diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index 960ba9e96f5..e24bf1a6097 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -21,6 +21,7 @@ import matplotlib.collections as mcoll import matplotlib.colors as mcolors import matplotlib.contour as mcontour +import matplotlib.category as _ # <-registers a category unit converter import matplotlib.dates as _ # <-registers a date unit converter from matplotlib import docstring import matplotlib.image as mimage diff --git a/lib/matplotlib/axis.py b/lib/matplotlib/axis.py index dc3f0bb4332..298ab177c16 100644 --- a/lib/matplotlib/axis.py +++ b/lib/matplotlib/axis.py @@ -662,6 +662,7 @@ def __init__(self, axes, pickradius=15): self.offsetText = self._get_offset_text() self.majorTicks = [] self.minorTicks = [] + self.unit_data = [] self.pickradius = pickradius # Initialize here for testing; later add API @@ -712,6 +713,17 @@ def _set_scale(self, value, **kwargs): def limit_range_for_scale(self, vmin, vmax): return self._scale.limit_range_for_scale(vmin, vmax, self.get_minpos()) + @property + def unit_data(self): + """Holds data that a ConversionInterface subclass relys on + to convert between labels and indexes + """ + return self._unit_data + + @unit_data.setter + def unit_data(self, data): + self._unit_data = data + def get_children(self): children = [self.label, self.offsetText] majorticks = self.get_major_ticks() diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py new file mode 100644 index 00000000000..bfac242149c --- /dev/null +++ b/lib/matplotlib/category.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 OA-*-za +""" +catch all for categorical functions +""" +from __future__ import (absolute_import, division, print_function, + unicode_literals) + +import six + +import numpy as np + +import matplotlib.units as units +import matplotlib.ticker as ticker + + +# pure hack for numpy 1.6 support +from distutils.version import LooseVersion + +NP_NEW = (LooseVersion(np.version.version) >= LooseVersion('1.7')) + + +def to_array(data, maxlen=100): + if NP_NEW: + return np.array(data, dtype=np.unicode) + try: + vals = np.array(data, dtype=('|S', maxlen)) + except UnicodeEncodeError: + # pure hack + vals = np.array([convert_to_string(d) for d in data]) + return vals + + +class StrCategoryConverter(units.ConversionInterface): + @staticmethod + def convert(value, unit, axis): + """Uses axis.unit_data map to encode + data as floats + """ + vmap = dict(axis.unit_data) + + if isinstance(value, six.string_types): + return vmap[value] + + vals = to_array(value) + for lab, loc in axis.unit_data: + vals[vals == lab] = loc + + return vals.astype('float') + + @staticmethod + def axisinfo(unit, axis): + seq, locs = zip(*axis.unit_data) + majloc = StrCategoryLocator(locs) + majfmt = StrCategoryFormatter(seq) + return units.AxisInfo(majloc=majloc, majfmt=majfmt) + + @staticmethod + def default_units(data, axis): + # the conversion call stack is: + # default_units->axis_info->convert + axis.unit_data = map_categories(data, axis.unit_data) + return None + + +class StrCategoryLocator(ticker.FixedLocator): + def __init__(self, locs): + super(StrCategoryLocator, self).__init__(locs, None) + + +class StrCategoryFormatter(ticker.FixedFormatter): + def __init__(self, seq): + super(StrCategoryFormatter, self).__init__(seq) + + +def convert_to_string(value): + """Helper function for numpy 1.6, can be replaced with + np.array(...,dtype=unicode) for all later versions of numpy""" + + if isinstance(value, six.string_types): + return value + if np.isfinite(value): + value = np.asarray(value, dtype=str)[np.newaxis][0] + elif np.isnan(value): + value = 'nan' + elif np.isposinf(value): + value = 'inf' + elif np.isneginf(value): + value = '-inf' + else: + raise ValueError("Unconvertable {}".format(value)) + return value + + +def map_categories(data, old_map=None): + """Create mapping between unique categorical + values and numerical identifier. + + Paramters + --------- + data: iterable + sequence of values + old_map: list of tuple, optional + if not `None`, than old_mapping will be updated with new values and + previous mappings will remain unchanged) + sort: bool, optional + sort keys by ASCII value + + Returns + ------- + list of tuple + [(label, ticklocation),...] + + """ + + # code typical missing data in the negative range because + # everything else will always have positive encoding + # question able if it even makes sense + spdict = {'nan': -1.0, 'inf': -2.0, '-inf': -3.0} + + if isinstance(data, six.string_types): + data = [data] + + # will update this post cbook/dict support + strdata = to_array(data) + uniq = np.unique(strdata) + + if old_map: + olabs, okeys = zip(*old_map) + svalue = max(okeys) + 1 + else: + old_map, olabs, okeys = [], [], [] + svalue = 0 + + category_map = old_map[:] + + new_labs = [u for u in uniq if u not in olabs] + missing = [nl for nl in new_labs if nl in spdict.keys()] + + category_map.extend([(m, spdict[m]) for m in missing]) + + new_labs = [nl for nl in new_labs if nl not in missing] + + new_locs = np.arange(svalue, svalue + len(new_labs), dtype='float') + category_map.extend(list(zip(new_labs, new_locs))) + return category_map + + +# Connects the convertor to matplotlib +units.registry[str] = StrCategoryConverter() +units.registry[bytes] = StrCategoryConverter() +units.registry[six.text_type] = StrCategoryConverter() diff --git a/lib/matplotlib/tests/baseline_images/test_axes/units_strings.pdf b/lib/matplotlib/tests/baseline_images/test_axes/units_strings.pdf deleted file mode 100644 index a5ad772d25a..00000000000 Binary files a/lib/matplotlib/tests/baseline_images/test_axes/units_strings.pdf and /dev/null differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/units_strings.png b/lib/matplotlib/tests/baseline_images/test_axes/units_strings.png deleted file mode 100644 index 7fc60cb3023..00000000000 Binary files a/lib/matplotlib/tests/baseline_images/test_axes/units_strings.png and /dev/null differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/units_strings.svg b/lib/matplotlib/tests/baseline_images/test_axes/units_strings.svg deleted file mode 100644 index 79446b2e853..00000000000 --- a/lib/matplotlib/tests/baseline_images/test_axes/units_strings.svg +++ /dev/null @@ -1,544 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lib/matplotlib/tests/test_axes.py b/lib/matplotlib/tests/test_axes.py index 478f5844100..73c42579d1b 100644 --- a/lib/matplotlib/tests/test_axes.py +++ b/lib/matplotlib/tests/test_axes.py @@ -905,17 +905,6 @@ def test_arc_ellipse(): ax.add_patch(e2) -@image_comparison(baseline_images=['units_strings']) -def test_units_strings(): - # Make sure passing in sequences of strings doesn't cause the unit - # conversion registry to recurse infinitely - Id = ['50', '100', '150', '200', '250'] - pout = ['0', '7.4', '11.4', '14.2', '16.3'] - fig = plt.figure() - ax = fig.add_subplot(111) - ax.plot(Id, pout) - - @image_comparison(baseline_images=['markevery'], remove_text=True) def test_markevery(): diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py new file mode 100644 index 00000000000..02db774e4ff --- /dev/null +++ b/lib/matplotlib/tests/test_category.py @@ -0,0 +1,255 @@ +# -*- coding: utf-8 -*- +"""Catch all for categorical functions""" +from __future__ import (absolute_import, division, print_function, + unicode_literals) +import unittest + +import numpy as np + +import matplotlib.pyplot as plt +from matplotlib.testing.decorators import cleanup +import matplotlib.category as cat + + +class TestConvertToString(unittest.TestCase): + def setUp(self): + pass + + def test_string(self): + self.assertEqual("abc", cat.convert_to_string("abc")) + + def test_unicode(self): + self.assertEqual("Здравствуйте мир", + cat.convert_to_string("Здравствуйте мир")) + + def test_decimal(self): + self.assertEqual("3.14", cat.convert_to_string(3.14)) + + def test_nan(self): + self.assertEqual("nan", cat.convert_to_string(np.nan)) + + def test_posinf(self): + self.assertEqual("inf", cat.convert_to_string(np.inf)) + + def test_neginf(self): + self.assertEqual("-inf", cat.convert_to_string(-np.inf)) + + +class TestMapCategories(unittest.TestCase): + def test_map_unicode(self): + act = cat.map_categories("Здравствуйте мир") + exp = [("Здравствуйте мир", 0)] + self.assertListEqual(act, exp) + + def test_map_data(self): + act = cat.map_categories("hello world") + exp = [("hello world", 0)] + self.assertListEqual(act, exp) + + def test_map_data_basic(self): + data = ['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'] + exp = [('a', 0), ('b', 1), ('c', 2)] + act = cat.map_categories(data) + self.assertListEqual(sorted(act), sorted(exp)) + + def test_map_data_mixed(self): + data = ['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf] + exp = [('nan', -1), ('3.14', 0), + ('A', 1), ('B', 2), ('-inf', -3), ('inf', -2)] + + act = cat.map_categories(data) + self.assertListEqual(sorted(act), sorted(exp)) + + @unittest.SkipTest + def test_update_map(self): + data = ['b', 'd', 'e', np.inf] + old_map = [('a', 0), ('d', 1)] + exp = [('inf', -2), ('a', 0), ('d', 1), + ('b', 2), ('e', 3)] + act = cat.map_categories(data, old_map) + self.assertListEqual(sorted(act), sorted(exp)) + + +class FakeAxis(object): + def __init__(self): + self.unit_data = [] + + +class TestStrCategoryConverter(unittest.TestCase): + """Based on the pandas conversion and factorization tests: + + ref: /pandas/tseries/tests/test_converter.py + /pandas/tests/test_algos.py:TestFactorize + """ + + def setUp(self): + self.cc = cat.StrCategoryConverter() + self.axis = FakeAxis() + + def test_convert_unicode(self): + self.axis.unit_data = [("Здравствуйте мир", 42)] + act = self.cc.convert("Здравствуйте мир", None, self.axis) + exp = 42 + self.assertEqual(act, exp) + + def test_convert_single(self): + self.axis.unit_data = [("hello world", 42)] + act = self.cc.convert("hello world", None, self.axis) + exp = 42 + self.assertEqual(act, exp) + + def test_convert_basic(self): + data = ['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'] + exp = [0, 1, 1, 0, 0, 2, 2, 2] + self.axis.unit_data = [('a', 0), ('b', 1), ('c', 2)] + act = self.cc.convert(data, None, self.axis) + np.testing.assert_array_equal(act, exp) + + def test_convert_mixed(self): + data = ['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf] + exp = [1, 1, -1, 2, 100, 0, 200] + self.axis.unit_data = [('nan', -1), ('3.14', 0), + ('A', 1), ('B', 2), + ('-inf', 100), ('inf', 200)] + act = self.cc.convert(data, None, self.axis) + np.testing.assert_array_equal(act, exp) + + def test_axisinfo(self): + self.axis.unit_data = [('a', 0)] + ax = self.cc.axisinfo(None, self.axis) + self.assertTrue(isinstance(ax.majloc, cat.StrCategoryLocator)) + self.assertTrue(isinstance(ax.majfmt, cat.StrCategoryFormatter)) + + def test_default_units(self): + self.assertEqual(self.cc.default_units(["a"], self.axis), None) + + +class TestStrCategoryLocator(unittest.TestCase): + def setUp(self): + self.locs = list(range(10)) + + def test_StrCategoryLocator(self): + ticks = cat.StrCategoryLocator(self.locs) + np.testing.assert_equal(ticks.tick_values(None, None), + self.locs) + + +class TestStrCategoryFormatter(unittest.TestCase): + def setUp(self): + self.seq = ["hello", "world", "hi"] + + def test_StrCategoryFormatter(self): + labels = cat.StrCategoryFormatter(self.seq) + self.assertEqual(labels('a', 1), "world") + + +def lt(tl): + return [l.get_text() for l in tl] + + +class TestPlot(unittest.TestCase): + + def setUp(self): + self.d = ['a', 'b', 'c', 'a'] + self.dticks = [0, 1, 2] + self.dlabels = ['a', 'b', 'c'] + self.dunit_data = [('a', 0), ('b', 1), ('c', 2)] + + self.dm = ['here', np.nan, 'here', 'there'] + self.dmticks = [-1, 0, 1] + self.dmlabels = ['nan', 'here', 'there'] + self.dmunit_data = [('nan', -1), ('here', 0), ('there', 1)] + + @cleanup + def test_plot_unicode(self): + # needs image test - works but + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + words = ['Здравствуйте', 'привет'] + locs = [0.0, 1.0] + ax.plot(words) + fig.canvas.draw() + + self.assertListEqual(ax.yaxis.unit_data, + list(zip(words, locs))) + np.testing.assert_array_equal(ax.get_yticks(), locs) + self.assertListEqual(lt(ax.get_yticklabels()), words) + + @cleanup + def test_plot_1d(self): + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + ax.plot(self.d) + fig.canvas.draw() + + np.testing.assert_array_equal(ax.get_yticks(), self.dticks) + self.assertListEqual(lt(ax.get_yticklabels()), + self.dlabels) + self.assertListEqual(ax.yaxis.unit_data, self.dunit_data) + + @cleanup + def test_plot_1d_missing(self): + + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + ax.plot(self.dm) + fig.canvas.draw() + + np.testing.assert_array_equal(ax.get_yticks(), self.dmticks) + self.assertListEqual(lt(ax.get_yticklabels()), + self.dmlabels) + self.assertListEqual(ax.yaxis.unit_data, self.dmunit_data) + + @cleanup + def test_plot_2d(self): + + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + ax.plot(self.dm, self.d) + fig.canvas.draw() + + np.testing.assert_array_equal(ax.get_xticks(), self.dmticks) + self.assertListEqual(lt(ax.get_xticklabels()), + self.dmlabels) + self.assertListEqual(ax.xaxis.unit_data, self.dmunit_data) + + np.testing.assert_array_equal(ax.get_yticks(), self.dticks) + self.assertListEqual(lt(ax.get_yticklabels()), + self.dlabels) + self.assertListEqual(ax.yaxis.unit_data, self.dunit_data) + + @cleanup + def test_scatter_2d(self): + + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + ax.scatter(self.dm, self.d) + fig.canvas.draw() + + np.testing.assert_array_equal(ax.get_xticks(), self.dmticks) + self.assertListEqual(lt(ax.get_xticklabels()), + self.dmlabels) + self.assertListEqual(ax.xaxis.unit_data, self.dmunit_data) + + np.testing.assert_array_equal(ax.get_yticks(), self.dticks) + self.assertListEqual(lt(ax.get_yticklabels()), + self.dlabels) + self.assertListEqual(ax.yaxis.unit_data, self.dunit_data) + + @unittest.SkipTest + @cleanup + def test_plot_update(self): + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + + ax.plot(['a', 'b']) + ax.plot(['a', 'b', 'd']) + ax.plot(['b', 'c', 'd']) + fig.canvas.draw() + + labels_new = ['a', 'b', 'd', 'c'] + ticks_new = [0, 1, 2, 3] + self.assertListEqual(ax.yaxis.unit_data, + list(zip(labels_new, ticks_new))) + np.testing.assert_array_equal(ax.get_yticks(), ticks_new) + self.assertListEqual(lt(ax.get_yticklabels()), labels_new) diff --git a/lib/matplotlib/units.py b/lib/matplotlib/units.py index 081827910b5..c700e257627 100644 --- a/lib/matplotlib/units.py +++ b/lib/matplotlib/units.py @@ -1,16 +1,15 @@ """ The classes here provide support for using custom classes with matplotlib, e.g., those that do not expose the array interface but know -how to converter themselves to arrays. It also supoprts classes with +how to convert themselves to arrays. It also supports classes with units and units conversion. Use cases include converters for custom objects, e.g., a list of datetime objects, as well as for objects that -are unit aware. We don't assume any particular units implementation, -rather a units implementation must provide a ConversionInterface, and -the register with the Registry converter dictionary. For example, +are unit aware. We don't assume any particular units implementation; +rather a units implementation must provide the register with the Registry +converter dictionary and a ConversionInterface. For example, here is a complete implementation which supports plotting with native datetime objects:: - import matplotlib.units as units import matplotlib.dates as dates import matplotlib.ticker as ticker @@ -45,8 +44,6 @@ def default_units(x, axis): from __future__ import (absolute_import, division, print_function, unicode_literals) -import six - from matplotlib.cbook import iterable, is_numlike import numpy as np @@ -127,9 +124,9 @@ def get_converter(self, x): if not len(self): return None # nothing registered - #DISABLED idx = id(x) - #DISABLED cached = self._cached.get(idx) - #DISABLED if cached is not None: return cached + # DISABLED idx = id(x) + # DISABLED cached = self._cached.get(idx) + # DISABLED if cached is not None: return cached converter = None classx = getattr(x, '__class__', None) @@ -167,7 +164,7 @@ def get_converter(self, x): converter = self.get_converter(thisx) return converter - #DISABLED self._cached[idx] = converter + # DISABLED self._cached[idx] = converter return converter