From 85bc5d082e551ce2faf79476b3875f1397609fbd Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Mon, 21 Nov 2011 22:05:19 -0500 Subject: [PATCH 1/2] Handle masked arrays in DataFrame constructor --- pandas/core/frame.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f2c888d537..f37483c6f8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -20,6 +20,7 @@ from numpy import nan import numpy as np +import numpy.ma as ma from pandas.core.common import (isnull, notnull, PandasError, adjoin, _try_sort, _pfixed, _default_index, @@ -186,6 +187,12 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, mgr = mgr.astype(dtype) elif isinstance(data, dict): mgr = self._init_dict(data, index, columns, dtype=dtype) + elif isinstance(data, ma.MaskedArray): + mask = ma.getmaskarray(data) + datacopy = ma.copy(data) + datacopy[mask] = np.nan + mgr = self._init_ndarray(datacopy, index, columns, dtype=dtype, + copy=copy) elif isinstance(data, np.ndarray): if data.dtype.names: data_columns, data = _rec_to_dict(data) From 6fd803194956a50e5d82c087c7a21a8fdf870608 Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Mon, 21 Nov 2011 22:50:52 -0500 Subject: [PATCH 2/2] Add unittests for DataFrame masked array constructor --- pandas/tests/test_frame.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index e89a3315e4..83e0c35ca4 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10,6 +10,7 @@ from numpy import random, nan from numpy.random import randn import numpy as np +import numpy.ma as ma import pandas.core.common as common import pandas.core.datetools as datetools @@ -1040,6 +1041,64 @@ def test_constructor_ndarray(self): frame = DataFrame(np.empty((3, 0))) self.assert_(len(frame.columns) == 0) + def test_constructor_maskedarray(self): + mat = ma.masked_all((2, 3), dtype=float) + + # 2-D input + frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) + + self.assertEqual(len(frame.index), 2) + self.assertEqual(len(frame.columns), 3) + self.assertTrue(np.all(~np.asarray(frame == frame))) + + # cast type + frame = DataFrame(mat, columns=['A', 'B', 'C'], + index=[1, 2], dtype=int) + self.assert_(frame.values.dtype == np.int64) + + # Check non-masked values + mat2 = ma.copy(mat) + mat2[0,0] = 1.0 + mat2[1,2] = 2.0 + frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2]) + self.assertEqual(1.0, frame['A'][1]) + self.assertEqual(2.0, frame['C'][2]) + + # 1-D input + frame = DataFrame(ma.masked_all((3,)), columns=['A'], index=[1, 2, 3]) + self.assertEqual(len(frame.index), 3) + self.assertEqual(len(frame.columns), 1) + self.assertTrue(np.all(~np.asarray(frame == frame))) + + # higher dim raise exception + self.assertRaises(Exception, DataFrame, ma.masked_all((3, 3, 3)), + columns=['A', 'B', 'C'], index=[1]) + + # wrong size axis labels + self.assertRaises(Exception, DataFrame, mat, + columns=['A', 'B', 'C'], index=[1]) + + self.assertRaises(Exception, DataFrame, mat, + columns=['A', 'B'], index=[1, 2]) + + # automatic labeling + frame = DataFrame(mat) + self.assert_(np.array_equal(frame.index, range(2))) + self.assert_(np.array_equal(frame.columns, range(3))) + + frame = DataFrame(mat, index=[1, 2]) + self.assert_(np.array_equal(frame.columns, range(3))) + + frame = DataFrame(mat, columns=['A', 'B', 'C']) + self.assert_(np.array_equal(frame.index, range(2))) + + # 0-length axis + frame = DataFrame(ma.masked_all((0, 3))) + self.assert_(frame.index is NULL_INDEX) + + frame = DataFrame(ma.masked_all((3, 0))) + self.assert_(len(frame.columns) == 0) + def test_constructor_corner(self): df = DataFrame(index=[]) self.assertEqual(df.values.shape, (0, 0))