From 2fd944f0ae6790ea6b625ff28b66957a30ed30b0 Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 11 Jan 2013 11:29:07 -0500 Subject: [PATCH 1/3] BUG: series assignment with a boolean indexer AND a series as a value would give incorrect results --- pandas/core/series.py | 8 +++++++- pandas/tests/test_series.py | 12 ++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 56b6e06844..49190a8de4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -695,7 +695,13 @@ def _set_with(self, key, value): else: return self._set_values(key, value) elif key_type == 'boolean': - self._set_values(key, value) + + # scalar setting with boolean key + if np.isscalar(value): + self._set_values(key, value) + # we have a key mask and a value that is np.array like + else: + np.putmask(self.values, key, value) else: self._set_labels(key, value) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 335d747e96..3527e6e29a 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1044,6 +1044,14 @@ def test_ix_setitem(self): self.assertEquals(self.series[d1], 4) self.assertEquals(self.series[d2], 6) + def test_setitem_boolean(self): + mask = self.series > self.series.median() + + result = self.series.copy() + result[mask] = self.series*2 + expected = self.series*2 + assert_series_equal(result[mask], expected[mask]) + def test_ix_setitem_boolean(self): mask = self.series > self.series.median() @@ -3211,9 +3219,9 @@ def test_interpolate_index_values(self): expected = s.copy() bad = isnull(expected.values) good = -bad - expected[bad] = np.interp(vals[bad], vals[good], s.values[good]) + expected = Series(np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad]) - assert_series_equal(result, expected) + assert_series_equal(result[bad], expected) def test_weekday(self): # Just run the function From 76d982c66b0e520ce145ddb3285000c07197763f Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 11 Jan 2013 14:28:00 -0500 Subject: [PATCH 2/3] ENH: added neg/inv operators to series fixed setitem with boolean indexer to accept an object needing alignment (e.g. a partial series) --- pandas/core/series.py | 31 ++++++++++++++++++++++--------- pandas/tests/test_series.py | 14 ++++++++++++++ 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 49190a8de4..977b72b190 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -604,6 +604,9 @@ def where(self, cond, other=nan, inplace=False): if len(cond) != len(self): raise ValueError('condition must have same length as series') + if not cond.dtype == np.bool_: + cond = cond.astype(np.bool_) + ser = self if inplace else self.copy() if not isinstance(other, (list, tuple, np.ndarray)): ser._set_with(~cond, other) @@ -660,10 +663,11 @@ def __setitem__(self, key, value): # Could not hash item if _is_bool_indexer(key): + #import pdb; pdb.set_ key = self._check_bool_indexer(key) - key = np.asarray(key, dtype=bool) - - self._set_with(key, value) + self.where(~key,value,inplace=True) + else: + self._set_with(key, value) def _set_with(self, key, value): # other: fancy integer or otherwise @@ -695,13 +699,7 @@ def _set_with(self, key, value): else: return self._set_values(key, value) elif key_type == 'boolean': - - # scalar setting with boolean key - if np.isscalar(value): self._set_values(key, value) - # we have a key mask and a value that is np.array like - else: - np.putmask(self.values, key, value) else: self._set_labels(key, value) @@ -746,6 +744,12 @@ def _check_bool_indexer(self, key): raise ValueError('cannot index with vector containing ' 'NA / NaN values') + # coerce to bool type + if not hasattr(result, 'shape'): + result = np.array(result) + if not result.dtype == np.bool_: + result = result.astype(np.bool_) + return result def __setslice__(self, i, j, value): @@ -1103,6 +1107,15 @@ def iteritems(self): __le__ = _comp_method(operator.le, '__le__') __eq__ = _comp_method(operator.eq, '__eq__') __ne__ = _comp_method(operator.ne, '__ne__') + + # inversion + def __neg__(self): + arr = operator.neg(self.values) + return Series(arr, self.index, name=self.name) + + def __invert__(self): + arr = operator.inv(self.values) + return Series(arr, self.index, name=self.name) # binary logic __or__ = _bool_method(operator.or_, '__or__') diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 3527e6e29a..c21c7588b6 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1047,11 +1047,19 @@ def test_ix_setitem(self): def test_setitem_boolean(self): mask = self.series > self.series.median() + # similiar indexed series result = self.series.copy() result[mask] = self.series*2 expected = self.series*2 assert_series_equal(result[mask], expected[mask]) + # needs alignment + result = self.series.copy() + result[mask] = (self.series*2)[0:5] + expected = (self.series*2)[0:5].reindex_like(self.series) + expected[-mask] = self.series[mask] + assert_series_equal(result[mask], expected[mask]) + def test_ix_setitem_boolean(self): mask = self.series > self.series.median() @@ -1525,6 +1533,12 @@ def check(series, other): check(self.ts, self.ts[::2]) check(self.ts, 5) + def test_neg(self): + assert_series_equal(-self.series, -1 * self.series) + + def test_invert(self): + assert_series_equal(-(self.series < 0), ~(self.series < 0)) + def test_operators(self): def _check_op(series, other, op, pos_only=False): From 7ec49f618ba41a96a7f9cb2d11fde69bd8bc673d Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 12 Jan 2013 14:14:23 -0500 Subject: [PATCH 3/3] DOC: update release notes --- RELEASE.rst | 3 +++ pandas/core/series.py | 5 ++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 59a86221d1..3db8f5702e 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -55,6 +55,7 @@ pandas 0.10.1 - Add ``logx`` option to DataFrame/Series.plot (GH2327_, #2565) - Support reading gzipped data from file-like object - ``pivot_table`` aggfunc can be anything used in GroupBy.aggregate (GH2643_) + - Add methods ``neg`` and ``inv`` to Series **Bug fixes** @@ -78,6 +79,7 @@ pandas 0.10.1 - Exclude non-numeric data from DataFrame.quantile by default (GH2625_) - Fix a Cython C int64 boxing issue causing read_csv to return incorrect results (GH2599_) + - Fix setitem on a Series with a boolean key and a non-scalar as value (GH2686_) **API Changes** @@ -98,6 +100,7 @@ pandas 0.10.1 .. _GH2625: https://github.com/pydata/pandas/issues/2625 .. _GH2643: https://github.com/pydata/pandas/issues/2643 .. _GH2637: https://github.com/pydata/pandas/issues/2637 +.. _GH2686: https://github.com/pydata/pandas/issues/2686 pandas 0.10.0 ============= diff --git a/pandas/core/series.py b/pandas/core/series.py index 977b72b190..b2703b96e6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -604,7 +604,7 @@ def where(self, cond, other=nan, inplace=False): if len(cond) != len(self): raise ValueError('condition must have same length as series') - if not cond.dtype == np.bool_: + if cond.dtype != np.bool_: cond = cond.astype(np.bool_) ser = self if inplace else self.copy() @@ -663,7 +663,6 @@ def __setitem__(self, key, value): # Could not hash item if _is_bool_indexer(key): - #import pdb; pdb.set_ key = self._check_bool_indexer(key) self.where(~key,value,inplace=True) else: @@ -747,7 +746,7 @@ def _check_bool_indexer(self, key): # coerce to bool type if not hasattr(result, 'shape'): result = np.array(result) - if not result.dtype == np.bool_: + if result.dtype != np.bool_: result = result.astype(np.bool_) return result