From d1235385f374f92d2be23f5dd12ca486752d2790 Mon Sep 17 00:00:00 2001 From: Benjamin Adams Date: Wed, 18 Jun 2014 16:35:37 -0400 Subject: [PATCH 1/2] Added properly functioning case insensitive match without regex pandas.core.strings.str_contains should now match case insensitivity properly if given regex=False and case=False. Also added test cases for case insensitive pandas.core.strings.str_contains with and without regex. --- pandas/core/strings.py | 7 ++++++- pandas/tests/test_strings.py | 11 +++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index ad64d2bf6b..3e730942ff 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -189,7 +189,12 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True): f = lambda x: bool(regex.search(x)) else: - f = lambda x: pat in x + if case: + f = lambda x: pat in x + else: + upper_pat = pat.upper() + f = lambda x: upper_pat in x + return _na_map(f, str_upper(arr), na, dtype=bool) return _na_map(f, arr, na, dtype=bool) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 55ab906544..971d7acf73 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -189,6 +189,17 @@ def test_contains(self): self.assertEqual(result.dtype, np.bool_) tm.assert_almost_equal(result, expected) + # case insensitive using regex + values = ['Foo', 'xYz', 'fOOomMm__fOo', 'MMM_'] + result = strings.str_contains(values, 'FOO|mmm', case=False) + expected = [True, False, True, True] + tm.assert_almost_equal(result, expected) + + # case insensitive without regex + result = strings.str_contains(values, 'foo', regex=False, case=False) + expected = [True, False, True, False] + tm.assert_almost_equal(result, expected) + # mixed mixed = ['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.] rs = strings.str_contains(mixed, 'o') From 6116d431ef12cb56c588442a9bd1040611f01ad1 Mon Sep 17 00:00:00 2001 From: Benjamin Adams Date: Wed, 18 Jun 2014 18:56:31 -0400 Subject: [PATCH 2/2] Documentation for str_contains bug in changelog --- doc/source/v0.14.1.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index c1e5877d09..0d1606f701 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -249,3 +249,4 @@ Bug Fixes - Bug in non-monotonic ``Index.union`` may preserve ``name`` incorrectly (:issue:`7458`) - Bug in ``DatetimeIndex.intersection`` doesn't preserve timezone (:issue:`4690`) +- Bug in ``pandas.core.strings.str_contains`` does not properly match in a case insensitive fashion when ``regex=False`` and ``case=False`` (:issue:`7505`)