From a8723a4bd0b15ff2e02daad11a0d64c292b7ef3b Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 6 May 2013 10:48:08 -0400 Subject: [PATCH] ENH: read-html fixes --- README.rst | 11 +- RELEASE.rst | 13 +- doc/source/install.rst | 11 +- doc/source/io.rst | 8 +- doc/source/v0.11.1.txt | 27 +- pandas/io/html.py | 445 ++++++++++++------ pandas/io/tests/data/banklist.csv | 503 +++++++++++++++++++++ .../data/{failed_banklist.html => banklist.html} | 21 +- pandas/io/tests/test_html.py | 256 +++++++++-- pandas/util/testing.py | 10 +- 10 files changed, 1095 insertions(+), 210 deletions(-) create mode 100644 pandas/io/tests/data/banklist.csv rename pandas/io/tests/data/{failed_banklist.html => banklist.html} (97%) diff --git a/README.rst b/README.rst index 3cdb2bf5b3..2d49c168ea 100644 --- a/README.rst +++ b/README.rst @@ -92,12 +92,11 @@ Optional dependencies - openpyxl version 1.6.1 or higher, for writing .xlsx files - xlrd >= 0.9.0 - Needed for Excel I/O - - `lxml `__, or `Beautiful Soup 4 `__: for reading HTML tables - - The differences between lxml and Beautiful Soup 4 are mostly speed (lxml - is faster), however sometimes Beautiful Soup returns what you might - intuitively expect. Both backends are implemented, so try them both to - see which one you like. They should return very similar results. - - Note that lxml requires Cython to build successfully + - Both `html5lib `__ **and** + `Beautiful Soup 4 `__: for + reading HTML tables + - These can both easily be installed by ``pip install html5lib`` and ``pip + install beautifulsoup4``. - `boto `__: necessary for Amazon S3 access. diff --git a/RELEASE.rst b/RELEASE.rst index 85cb4d9f40..bbb04cecd6 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -30,8 +30,9 @@ pandas 0.11.1 **New features** - - pd.read_html() can now parse HTML string, files or urls and return dataframes - courtesy of @cpcloud. (GH3477_) + - ``pandas.read_html()`` can now parse HTML strings, files or urls and + returns a list of ``DataFrame`` s courtesy of @cpcloud. (GH3477_, GH3605_, + GH3606_) - Support for reading Amazon S3 files. (GH3504_) - Added module for reading and writing Stata files: pandas.io.stata (GH1512_) - Added support for writing in ``to_csv`` and reading in ``read_csv``, @@ -48,7 +49,7 @@ pandas 0.11.1 **Improvements to existing features** - Fixed various issues with internal pprinting code, the repr() for various objects - including TimeStamp and *Index now produces valid python code strings and + including TimeStamp and Index now produces valid python code strings and can be used to recreate the object, (GH3038_, GH3379_, GH3251_, GH3460_) - ``convert_objects`` now accepts a ``copy`` parameter (defaults to ``True``) - ``HDFStore`` @@ -146,6 +147,9 @@ pandas 0.11.1 - ``sql.write_frame`` failing when writing a single column to sqlite (GH3628_), thanks to @stonebig - Fix pivoting with ``nan`` in the index (GH3558_) + - Fix running of bs4 tests when it is not installed (GH3605_) + - Fix parsing of html table (GH3606_) + - ``read_html()`` now only allows a single backend: ``html5lib`` (GH3616_) .. _GH3164: https://github.com/pydata/pandas/issues/3164 .. _GH2786: https://github.com/pydata/pandas/issues/2786 @@ -209,6 +213,9 @@ pandas 0.11.1 .. _GH3141: https://github.com/pydata/pandas/issues/3141 .. _GH3628: https://github.com/pydata/pandas/issues/3628 .. _GH3638: https://github.com/pydata/pandas/issues/3638 +.. _GH3605: https://github.com/pydata/pandas/issues/3605 +.. _GH3606: https://github.com/pydata/pandas/issues/3606 +.. _Gh3616: https://github.com/pydata/pandas/issues/3616 pandas 0.11.0 ============= diff --git a/doc/source/install.rst b/doc/source/install.rst index 9d14d1b11c..658d9d78d5 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -99,12 +99,11 @@ Optional Dependencies * `openpyxl `__, `xlrd/xlwt `__ * openpyxl version 1.6.1 or higher * Needed for Excel I/O - * `lxml `__, or `Beautiful Soup 4 `__: for reading HTML tables - * The differences between lxml and Beautiful Soup 4 are mostly speed (lxml - is faster), however sometimes Beautiful Soup returns what you might - intuitively expect. Both backends are implemented, so try them both to - see which one you like. They should return very similar results. - * Note that lxml requires Cython to build successfully + * Both `html5lib `__ **and** + `Beautiful Soup 4 `__: for + reading HTML tables + * These can both easily be installed by ``pip install html5lib`` and ``pip + install beautifulsoup4``. .. note:: diff --git a/doc/source/io.rst b/doc/source/io.rst index 42ea4a2ca5..3dbf297dea 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -918,18 +918,18 @@ which, if set to ``True``, will additionally output the length of the Series. HTML ---- -Reading HTML format +Reading HTML Content ~~~~~~~~~~~~~~~~~~~~~~ .. _io.read_html: .. versionadded:: 0.11.1 -The toplevel :func:`~pandas.io.parsers.read_html` function can accept an HTML string/file/url -and will parse HTML tables into pandas DataFrames. +The toplevel :func:`~pandas.io.parsers.read_html` function can accept an HTML +string/file/url and will parse HTML tables into list of pandas DataFrames. -Writing to HTML format +Writing to HTML files ~~~~~~~~~~~~~~~~~~~~~~ .. _io.html: diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt index a42765591c..40fda1305e 100644 --- a/doc/source/v0.11.1.txt +++ b/doc/source/v0.11.1.txt @@ -64,9 +64,27 @@ API changes Enhancements ~~~~~~~~~~~~ - - - ``pd.read_html()`` can now parse HTML string, files or urls and return dataframes - courtesy of @cpcloud. (GH3477_) + - ``pd.read_html()`` can now parse HTML strings, files or urls and return + DataFrames + courtesy of @cpcloud. (GH3477_, GH3605_, GH3606_) + - ``read_html()`` (GH3616_) + - now works with only a *single* parser backend, that is: + - BeautifulSoup4 + html5lib + - does *not* and will never support using the html parsing library + included with Python as a parser backend + - is a bit smarter about the parent table elements of matched text: if + multiple matches are found then only the *unique* parents of the result + are returned (uniqueness is determined using ``set``). + - no longer tries to guess about what you want to do with empty table cells + - argument ``infer_types`` now defaults to ``False``. + - now returns DataFrames whose default column index is the elements of + ```` elements in the HTML soup, if any exist. + - considers all ```` and ```` elements inside of ```` + elements. + - tests are now correctly skipped if the proper libraries are not + installed. + - tests now include a ground-truth csv file from the FDIC failed bank list + data set. - ``HDFStore`` - will retain index attributes (freq,tz,name) on recreation (GH3499_) @@ -203,3 +221,6 @@ on GitHub for a complete list. .. _GH1651: https://github.com/pydata/pandas/issues/1651 .. _GH3141: https://github.com/pydata/pandas/issues/3141 .. _GH3638: https://github.com/pydata/pandas/issues/3638 +.. _GH3616: https://github.com/pydata/pandas/issues/3616 +.. _GH3605: https://github.com/pydata/pandas/issues/3605 +.. _GH3606: https://github.com/pydata/pandas/issues/3606 diff --git a/pandas/io/html.py b/pandas/io/html.py index c29d16db81..732bd57bec 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -16,6 +16,8 @@ except ImportError: import_module = __import__ +import numpy as np + from pandas import DataFrame, MultiIndex from pandas.io.parsers import _is_url @@ -78,8 +80,34 @@ def _get_skiprows_iter(skiprows): raise TypeError('{0} is not a valid type for skipping' ' rows'.format(type(skiprows))) - def _parse_columns(self, row): - return row.xpath('.//td|.//th') + +def _read(io): + """Try to read from a url, file or string. + + Parameters + ---------- + io : str, unicode, or file-like + + Returns + ------- + raw_text : str + """ + if _is_url(io): + try: + with contextlib.closing(urllib2.urlopen(io)) as url: + raw_text = url.read() + except urllib2.URLError: + raise ValueError('Invalid URL: "{0}"'.format(io)) + elif hasattr(io, 'read'): + raw_text = io.read() + elif os.path.isfile(io): + with open(io) as f: + raw_text = f.read() + elif isinstance(io, basestring): + raw_text = io + else: + raise ValueError("Cannot read object of type '{0}'".format(type(io))) + return raw_text class _HtmlFrameParser(object): @@ -114,9 +142,12 @@ class _HtmlFrameParser(object): To subclass this class effectively you must override the following methods: * :func:`_build_doc` * :func:`_text_getter` - * :func:`_parse_columns` - * :func:`_parse_table` - * :func:`_parse_rows` + * :func:`_parse_td` + * :func:`_parse_tables` + * :func:`_parse_tr` + * :func:`_parse_thead` + * :func:`_parse_tbody` + * :func:`_parse_tfoot` See each method's respective documentation for details on their functionality. """ @@ -125,33 +156,11 @@ def __init__(self, io, match, attrs): self.match = match self.attrs = attrs - def parse_rows(self): - """Return a list of list of each table's rows. - - Returns - ------- - row_list : list of list of node-like - A list of each table's rows, which are DOM nodes (usually or - elements). - """ + def parse_tables(self): tables = self._parse_tables(self._build_doc(), self.match, self.attrs) - assert tables, 'No tables found' - return (self._parse_rows(table) for table in tables) - - def parse_raw_data(self): - """Return a list of the raw data from each table. - - Returns - ------- - data : list of list of lists of str or unicode - Each table's data is contained in a list of lists of str or - unicode. - """ - return [self._parse_raw_data(rows, self._text_getter, - self._parse_columns) - for rows in self.parse_rows()] + return (self._build_table(table) for table in tables) - def _parse_raw_data(self, rows, text_getter, column_finder): + def _parse_raw_data(self, rows): """Parse the raw data into a list of lists. Parameters @@ -177,23 +186,8 @@ def _parse_raw_data(self, rows, text_getter, column_finder): ------- data : list of list of strings """ - # callable is back in Python 3.2 - assert callable(text_getter), '"text_getter" must be callable' - assert callable(column_finder), '"column_finder" must be callable' - - data = [] - - for row in rows: - if _remove_whitespace(text_getter(row)): - col = [] - - for el in column_finder(row): - t = _remove_whitespace(text_getter(el)) - - if t: - col.append(t) - data.append(col) - + data = [[_remove_whitespace(self._text_getter(col)) for col in + self._parse_td(row)] for row in rows] return data def _text_getter(self, obj): @@ -211,8 +205,8 @@ def _text_getter(self, obj): """ raise NotImplementedError - def _parse_columns(self, obj): - """Return the column elements from a row element. + def _parse_td(self, obj): + """Return the td elements from a row element. Parameters ---------- @@ -252,7 +246,7 @@ def _parse_tables(self, doc, match, attrs): """ raise NotImplementedError - def _parse_rows(self, table): + def _parse_tr(self, table): """Return the list of row elements from the parsed table element. Parameters @@ -267,6 +261,51 @@ def _parse_rows(self, table): """ raise NotImplementedError + def _parse_thead(self, table): + """Return the header of a table. + + Parameters + ---------- + table : node-like + A table element that contains row elements. + + Returns + ------- + thead : node-like + A ... element. + """ + raise NotImplementedError + + def _parse_tbody(self, table): + """Return the body of the table. + + Parameters + ---------- + table : node-like + A table element that contains row elements. + + Returns + ------- + tbody : node-like + A ... element. + """ + raise NotImplementedError + + def _parse_tfoot(self, table): + """Return the footer of the table if any. + + Parameters + ---------- + table : node-like + A table element that contains row elements. + + Returns + ------- + tfoot : node-like + A ... element. + """ + raise NotImplementedError + def _build_doc(self): """Return a tree-like object that can be used to iterate over the DOM. @@ -276,8 +315,37 @@ def _build_doc(self): """ raise NotImplementedError + def _build_table(self, table): + header = self._parse_raw_thead(table) + body = self._parse_raw_tbody(table) + footer = self._parse_raw_tfoot(table) + return header, body, footer + + def _parse_raw_thead(self, table): + thead = self._parse_thead(table) + res = [] + if thead: + res = map(self._text_getter, self._parse_th(thead[0])) + return np.array(res).squeeze() if res and len(res) == 1 else res + + def _parse_raw_tfoot(self, table): + tfoot = self._parse_tfoot(table) + res = [] + if tfoot: + res = map(self._text_getter, self._parse_td(tfoot[0])) + return np.array(res).squeeze() if res and len(res) == 1 else res + + def _parse_raw_tbody(self, table): + tbody = self._parse_tbody(table) + + try: + res = self._parse_tr(tbody[0]) + except IndexError: + res = self._parse_tr(table) + return self._parse_raw_data(res) + -class _BeautifulSoupFrameParser(_HtmlFrameParser): +class _BeautifulSoupLxmlFrameParser(_HtmlFrameParser): """HTML to DataFrame parser that uses BeautifulSoup under the hood. See Also @@ -291,48 +359,68 @@ class _BeautifulSoupFrameParser(_HtmlFrameParser): :class:`pandas.io.html._HtmlFrameParser`. """ def __init__(self, *args, **kwargs): - super(_BeautifulSoupFrameParser, self).__init__(*args, **kwargs) + super(_BeautifulSoupLxmlFrameParser, self).__init__(*args, **kwargs) + from bs4 import SoupStrainer + self._strainer = SoupStrainer('table') def _text_getter(self, obj): return obj.text - def _parse_columns(self, row): + def _parse_td(self, row): return row.find_all(('td', 'th')) - def _parse_rows(self, table): - return table.find_all(('tr', 'thead', 'tfoot')) + def _parse_tr(self, element): + return element.find_all('tr') - def _parse_tables(self, doc, match, attrs): - tables = doc.find_all('table', attrs=attrs) - assert tables, 'No tables found' + def _parse_th(self, element): + return element.find_all('th') + + def _parse_thead(self, table): + return table.find_all('thead') + + def _parse_tbody(self, table): + return table.find_all('tbody') + + def _parse_tfoot(self, table): + return table.find_all('tfoot') - tables = [table for table in tables - if table.find(text=match) is not None] - assert tables, "No tables found matching '{0}'".format(match.pattern) + def _parse_tables(self, doc, match, attrs): + element_name = self._strainer.name + tables = doc.find_all(element_name, attrs=attrs) + if not tables: + raise AssertionError('No tables found') + + mts = [table.find(text=match) for table in tables] + matched_tables = [mt for mt in mts if mt is not None] + tables = list(set(mt.find_parent(element_name) + for mt in matched_tables)) + + if not tables: + raise AssertionError("No tables found matching " + "'{0}'".format(match.pattern)) + #import ipdb; ipdb.set_trace() return tables + def _setup_build_doc(self): + raw_text = _read(self.io) + if not raw_text: + raise AssertionError('No text parsed from document') + return raw_text + def _build_doc(self): - if _is_url(self.io): - try: - with contextlib.closing(urllib2.urlopen(self.io)) as url: - raw_text = url.read() - except urllib2.URLError: - raise ValueError('Invalid URL: "{0}"'.format(self.io)) - elif hasattr(self.io, 'read'): - raw_text = self.io.read() - elif os.path.isfile(self.io): - with open(self.io) as f: - raw_text = f.read() - elif isinstance(self.io, basestring): - raw_text = self.io - else: - raise ValueError("Cannot read object of" - " type '{0}'".format(type(self.io))) - assert raw_text, 'No text parsed from document' + from bs4 import BeautifulSoup + return BeautifulSoup(self._setup_build_doc(), features='lxml', + parse_only=self._strainer) + - from bs4 import BeautifulSoup, SoupStrainer - strainer = SoupStrainer('table') - return BeautifulSoup(raw_text, parse_only=strainer) +class _BeautifulSoupHtml5LibFrameParser(_BeautifulSoupLxmlFrameParser): + def __init__(self, *args, **kwargs): + super(_BeautifulSoupHtml5LibFrameParser, self).__init__(*args, + **kwargs) + + def _build_doc(self): + from bs4 import BeautifulSoup + return BeautifulSoup(self._setup_build_doc(), features='html5lib') def _build_node_xpath_expr(attrs): @@ -358,6 +446,7 @@ def _build_node_xpath_expr(attrs): _re_namespace = {'re': 'http://exslt.org/regular-expressions'} +_valid_schemes = 'http', 'file', 'ftp' class _LxmlFrameParser(_HtmlFrameParser): @@ -370,7 +459,7 @@ class _LxmlFrameParser(_HtmlFrameParser): See Also -------- _HtmlFrameParser - _BeautifulSoupFrameParser + _BeautifulSoupLxmlFrameParser Notes ----- @@ -383,11 +472,12 @@ def __init__(self, *args, **kwargs): def _text_getter(self, obj): return obj.text_content() - def _parse_columns(self, row): + def _parse_td(self, row): return row.xpath('.//td|.//th') - def _parse_rows(self, table): - return table.xpath('(.//tr|.//thead|.//tfoot)[normalize-space()]') + def _parse_tr(self, table): + expr = './/tr[normalize-space()]' + return table.xpath(expr) def _parse_tables(self, doc, match, kwargs): pattern = match.pattern @@ -406,42 +496,68 @@ def _parse_tables(self, doc, match, kwargs): if kwargs: xpath_expr += _build_node_xpath_expr(kwargs) tables = doc.xpath(xpath_expr, namespaces=_re_namespace) - assert tables, "No tables found matching regex '{0}'".format(pattern) + if not tables: + raise AssertionError("No tables found matching regex " + "'{0}'".format(pattern)) return tables def _build_doc(self): """ Raises ------ - IOError - * If a valid URL is detected, but for some reason cannot be parsed. - This is probably due to a faulty or non-existent internet - connection. ValueError * If a URL that lxml cannot parse is passed. + Exception + * Any other ``Exception`` thrown. For example, trying to parse a + URL that is syntactically correct on a machine with no internet + connection will fail. + See Also -------- pandas.io.html._HtmlFrameParser._build_doc """ from lxml.html import parse, fromstring + from lxml.html.clean import clean_html try: # try to parse the input in the simplest way - return parse(self.io) - except (UnicodeDecodeError, IOError): - # something went wrong, check for not-a-url because it's probably a - # huge string blob + r = parse(self.io) + except (UnicodeDecodeError, IOError) as e: + # if the input is a blob of html goop if not _is_url(self.io): - return fromstring(self.io) - elif urlparse.urlparse(self.io).scheme not in ('http', 'ftp', - 'file'): - raise ValueError('"{0}" does not have a valid URL' - ' protocol'.format(self.io)) + r = fromstring(self.io) else: - raise IOError('"{0}" is a valid URL, so you probably are not' - ' properly connected to the' - ' internet'.format(self.io)) + # not a url + scheme = urlparse.urlparse(self.io).scheme + if scheme not in _valid_schemes: + # lxml can't parse it + msg = ('{0} is not a valid url scheme, valid schemes are ' + '{1}').format(scheme, _valid_schemes) + raise ValueError(msg) + else: + # something else happened: maybe a faulty connection + raise e + return clean_html(r) + + def _parse_tbody(self, table): + return table.xpath('.//tbody') + + def _parse_thead(self, table): + return table.xpath('.//thead') + + def _parse_tfoot(self, table): + return table.xpath('.//tfoot') + + def _parse_raw_thead(self, table): + expr = './/thead//th' + return [_remove_whitespace(x.text_content()) for x in + table.xpath(expr)] + + def _parse_raw_tfoot(self, table): + expr = './/tfoot//th' + return [_remove_whitespace(x.text_content()) for x in + table.xpath(expr)] def _data_to_frame(data, header, index_col, infer_types, skiprows): @@ -449,7 +565,7 @@ def _data_to_frame(data, header, index_col, infer_types, skiprows): Parameters ---------- - data : list of lists of str or unicode + data : tuple of lists The raw data to be placed into a DataFrame. This is a list of lists of strings or unicode. If it helps, it can be thought of as a matrix of strings instead. @@ -491,7 +607,9 @@ def _data_to_frame(data, header, index_col, infer_types, skiprows): ----- The `data` parameter is guaranteed not to be a list of empty lists. """ - df = DataFrame(data) + thead, tbody, tfoot = data + columns = thead or None + df = DataFrame(tbody, columns=columns) if skiprows is not None: it = _get_skiprows_iter(skiprows) @@ -530,16 +648,81 @@ def _data_to_frame(data, header, index_col, infer_types, skiprows): # drop by default df.set_index(cols, inplace=True) + if df.index.nlevels == 1: + if not (df.index.name or df.index.name is None): + df.index.name = None + else: + names = [name or None for name in df.index.names] + df.index = MultiIndex.from_tuples(df.index.values, names=names) return df -_possible_parsers = {'lxml': _LxmlFrameParser, - 'bs4': _BeautifulSoupFrameParser} +_invalid_parsers = {'lxml': _LxmlFrameParser, + 'bs4': _BeautifulSoupLxmlFrameParser} +_valid_parsers = {'html5lib': _BeautifulSoupHtml5LibFrameParser} +_all_parsers = _valid_parsers.copy() +_all_parsers.update(_invalid_parsers) -def read_html(io, match='.+', flavor='bs4', header=None, index_col=None, - skiprows=None, infer_types=True, attrs=None): +def _parser_dispatch(flavor): + """Choose the parser based on the input flavor. + + Parameters + ---------- + flavor : str + The type of parser to use. This must be a valid backend. + + Returns + ------- + cls : _HtmlFrameParser subclass + The parser class based on the requested input flavor. + + Raises + ------ + AssertionError + * If `flavor` is not a valid backend. + """ + valid_parsers = _valid_parsers.keys() + if flavor not in valid_parsers: + raise AssertionError('"{0}" is not a valid flavor'.format(flavor)) + + if flavor == 'bs4': + try: + import_module('lxml') + parser_t = _BeautifulSoupLxmlFrameParser + except ImportError: + try: + import_module('html5lib') + parser_t = _BeautifulSoupHtml5LibFrameParser + except ImportError: + raise ImportError("read_html does not support the native " + "Python 'html.parser' backend for bs4, " + "please install either 'lxml' or 'html5lib'") + elif flavor == 'html5lib': + try: + # much better than python's builtin + import_module('html5lib') + parser_t = _BeautifulSoupHtml5LibFrameParser + except ImportError: + raise ImportError("html5lib not found please install it") + else: + parser_t = _LxmlFrameParser + return parser_t + + +def _parse(parser, io, match, flavor, header, index_col, skiprows, infer_types, + attrs): + # bonus: re.compile is idempotent under function iteration so you can pass + # a compiled regex to it and it will return itself + p = parser(io, re.compile(match), attrs) + tables = p.parse_tables() + return [_data_to_frame(table, header, index_col, infer_types, skiprows) + for table in tables] + + +def read_html(io, match='.+', flavor='html5lib', header=None, index_col=None, + skiprows=None, infer_types=False, attrs=None): r"""Read an HTML table into a DataFrame. Parameters @@ -547,7 +730,8 @@ def read_html(io, match='.+', flavor='bs4', header=None, index_col=None, io : str or file-like A string or file like object that can be either a url, a file-like object, or a raw string containing HTML. Note that lxml only accepts - the http, ftp and file url protocols. + the http, ftp and file url protocols. If you have a URI that starts + with ``'https'`` you might removing the ``'s'``. match : str or regex, optional The set of tables containing text matching this regex or string will be @@ -557,10 +741,10 @@ def read_html(io, match='.+', flavor='bs4', header=None, index_col=None, This value is converted to a regular expression so that there is consistent behavior between Beautiful Soup and lxml. - flavor : str, {'lxml', 'bs4'} - The parsing engine to use under the hood. lxml is faster and bs4 - (Beautiful Soup 4) is better at parsing nested tags, which are not - uncommon when parsing tables. Defaults to 'bs4'. + flavor : str, {'html5lib'} + The parsing engine to use under the hood. Right now only ``html5lib`` + is supported because it returns correct output whereas ``lxml`` does + not. header : int or array-like or None, optional The row (or rows for a MultiIndex) to use to make the columns headers. @@ -661,6 +845,7 @@ def read_html(io, match='.+', flavor='bs4', header=None, index_col=None, Parse some spam infomation from the USDA: + >>> from pandas import read_html, DataFrame >>> url = ('http://ndb.nal.usda.gov/ndb/foods/show/1732?fg=&man=&' ... 'lfacet=&format=&count=&max=25&offset=&sort=&qlookup=spam') >>> dfs = read_html(url, match='Water', header=0) @@ -670,32 +855,16 @@ def read_html(io, match='.+', flavor='bs4', header=None, index_col=None, You can pass nothing to the `match` argument: + >>> from pandas import read_html, DataFrame >>> url = 'http://www.fdic.gov/bank/individual/failed/banklist.html' >>> dfs = read_html(url) >>> print(len(dfs)) # this will most likely be greater than 1 - - Try a different parser: - - >>> url = 'http://www.fdic.gov/bank/individual/failed/banklist.html' - >>> dfs = read_html(url, 'Florida', flavor='lxml', attrs={'id': 'table'}) - >>> assert dfs - >>> assert isinstance(dfs, list) - >>> assert all(map(lambda x: isinstance(x, DataFrame), dfs)) """ - # annoying type check here because we don't want to spend time parsing HTML - # only to end up failing because of an invalid value of skiprows - if isinstance(skiprows, numbers.Integral): - assert skiprows >= 0, ('cannot skip rows starting from the end of the ' - 'data (you passed a negative value)') - - valid_backends = _possible_parsers.keys() - assert flavor in valid_backends, ("'{0}' is not a valid backend, the valid" - " backends are " - "{1}".format(flavor, valid_backends)) - parser = _possible_parsers[flavor] - - # bonus: re.compile is idempotent under function iteration so you can pass - # a compiled regex to it and it will return itself - p = parser(io, re.compile(match), attrs) - return [_data_to_frame(data, header, index_col, infer_types, skiprows) - for data in p.parse_raw_data()] + # Type check here. We don't want to parse only to fail because of an + # invalid value of an integer skiprows. + if isinstance(skiprows, numbers.Integral) and skiprows < 0: + raise AssertionError('cannot skip rows starting from the end of the ' + 'data (you passed a negative value)') + parser = _parser_dispatch(flavor) + return _parse(parser, io, match, flavor, header, index_col, skiprows, + infer_types, attrs) diff --git a/pandas/io/tests/data/banklist.csv b/pandas/io/tests/data/banklist.csv new file mode 100644 index 0000000000..6545d31fe5 --- /dev/null +++ b/pandas/io/tests/data/banklist.csv @@ -0,0 +1,503 @@ +Bank Name,City,State,CERT #,Acquiring Institution,Closing Date,Updated Date +Douglas County Bank,Douglasville,GA,21649,Hamilton State Bank,26-Apr-13,30-Apr-13 +Parkway Bank,Lenoir,NC,57158,"CertusBank, National Association",26-Apr-13,30-Apr-13 +Chipola Community Bank,Marianna,FL,58034,First Federal Bank of Florida,19-Apr-13,23-Apr-13 +Heritage Bank of North Florida,Orange Park,FL,26680,FirstAtlantic Bank,19-Apr-13,23-Apr-13 +First Federal Bank,Lexington,KY,29594,Your Community Bank,19-Apr-13,23-Apr-13 +Gold Canyon Bank,Gold Canyon,AZ,58066,"First Scottsdale Bank, National Association",5-Apr-13,9-Apr-13 +Frontier Bank,LaGrange,GA,16431,HeritageBank of the South,8-Mar-13,26-Mar-13 +Covenant Bank,Chicago,IL,22476,Liberty Bank and Trust Company,15-Feb-13,4-Mar-13 +1st Regents Bank,Andover,MN,57157,First Minnesota Bank,18-Jan-13,28-Feb-13 +Westside Community Bank,University Place,WA,33997,Sunwest Bank,11-Jan-13,24-Jan-13 +Community Bank of the Ozarks,Sunrise Beach,MO,27331,Bank of Sullivan,14-Dec-12,24-Jan-13 +Hometown Community Bank,Braselton,GA,57928,"CertusBank, National Association",16-Nov-12,24-Jan-13 +Citizens First National Bank,Princeton,IL,3731,Heartland Bank and Trust Company,2-Nov-12,24-Jan-13 +Heritage Bank of Florida,Lutz,FL,35009,Centennial Bank,2-Nov-12,24-Jan-13 +NOVA Bank,Berwyn,PA,27148,No Acquirer,26-Oct-12,24-Jan-13 +Excel Bank,Sedalia,MO,19189,Simmons First National Bank,19-Oct-12,24-Jan-13 +First East Side Savings Bank,Tamarac,FL,28144,Stearns Bank N.A.,19-Oct-12,24-Jan-13 +GulfSouth Private Bank,Destin,FL,58073,SmartBank,19-Oct-12,24-Jan-13 +First United Bank,Crete,IL,20685,"Old Plank Trail Community Bank, National Association",28-Sep-12,15-Nov-12 +Truman Bank,St. Louis,MO,27316,Simmons First National Bank,14-Sep-12,17-Dec-12 +First Commercial Bank,Bloomington,MN,35246,Republic Bank & Trust Company,7-Sep-12,17-Dec-12 +Waukegan Savings Bank,Waukegan,IL,28243,First Midwest Bank,3-Aug-12,11-Oct-12 +Jasper Banking Company,Jasper,GA,16240,Stearns Bank N.A.,27-Jul-12,17-Dec-12 +Second Federal Savings and Loan Association of Chicago,Chicago,IL,27986,Hinsdale Bank & Trust Company,20-Jul-12,14-Jan-13 +Heartland Bank,Leawood,KS,1361,Metcalf Bank,20-Jul-12,17-Dec-12 +First Cherokee State Bank,Woodstock,GA,32711,Community & Southern Bank,20-Jul-12,31-Oct-12 +Georgia Trust Bank,Buford,GA,57847,Community & Southern Bank,20-Jul-12,17-Dec-12 +The Royal Palm Bank of Florida,Naples,FL,57096,First National Bank of the Gulf Coast,20-Jul-12,7-Jan-13 +Glasgow Savings Bank,Glasgow,MO,1056,Regional Missouri Bank,13-Jul-12,11-Oct-12 +Montgomery Bank & Trust,Ailey,GA,19498,Ameris Bank,6-Jul-12,31-Oct-12 +The Farmers Bank of Lynchburg,Lynchburg,TN,1690,Clayton Bank and Trust,15-Jun-12,31-Oct-12 +Security Exchange Bank,Marietta,GA,35299,Fidelity Bank,15-Jun-12,10-Oct-12 +Putnam State Bank,Palatka,FL,27405,Harbor Community Bank,15-Jun-12,10-Oct-12 +Waccamaw Bank,Whiteville,NC,34515,First Community Bank,8-Jun-12,8-Nov-12 +Farmers' and Traders' State Bank,Shabbona,IL,9257,First State Bank,8-Jun-12,10-Oct-12 +Carolina Federal Savings Bank,Charleston,SC,35372,Bank of North Carolina,8-Jun-12,31-Oct-12 +First Capital Bank,Kingfisher,OK,416,F & M Bank,8-Jun-12,10-Oct-12 +"Alabama Trust Bank, National Association",Sylacauga,AL,35224,Southern States Bank,18-May-12,31-Oct-12 +"Security Bank, National Association",North Lauderdale,FL,23156,Banesco USA,4-May-12,31-Oct-12 +Palm Desert National Bank,Palm Desert,CA,23632,Pacific Premier Bank,27-Apr-12,31-Aug-12 +Plantation Federal Bank,Pawleys Island,SC,32503,First Federal Bank,27-Apr-12,31-Oct-12 +"Inter Savings Bank, fsb D/B/A InterBank, fsb",Maple Grove,MN,31495,Great Southern Bank,27-Apr-12,17-Oct-12 +HarVest Bank of Maryland,Gaithersburg,MD,57766,Sonabank,27-Apr-12,17-Oct-12 +Bank of the Eastern Shore,Cambridge,MD,26759,No Acquirer,27-Apr-12,17-Oct-12 +"Fort Lee Federal Savings Bank, FSB",Fort Lee,NJ,35527,Alma Bank,20-Apr-12,31-Aug-12 +Fidelity Bank,Dearborn,MI,33883,The Huntington National Bank,30-Mar-12,9-Aug-12 +Premier Bank,Wilmette,IL,35419,International Bank of Chicago,23-Mar-12,17-Oct-12 +Covenant Bank & Trust,Rock Spring,GA,58068,"Stearns Bank, N.A.",23-Mar-12,31-Oct-12 +New City Bank ,Chicago,IL,57597,No Acquirer,9-Mar-12,29-Oct-12 +Global Commerce Bank,Doraville,GA,34046,Metro City Bank,2-Mar-12,31-Oct-12 +Home Savings of America,Little Falls,MN,29178,No Acquirer,24-Feb-12,17-Dec-12 +Central Bank of Georgia,Ellaville,GA,5687,Ameris Bank,24-Feb-12,9-Aug-12 +SCB Bank,Shelbyville,IN,29761,"First Merchants Bank, National Association",10-Feb-12,25-Mar-13 +Charter National Bank and Trust,Hoffman Estates,IL,23187,"Barrington Bank & Trust Company, National Association",10-Feb-12,25-Mar-13 +BankEast,Knoxville,TN,19869,U.S.Bank National Association,27-Jan-12,8-Mar-13 +Patriot Bank Minnesota,Forest Lake,MN,34823,First Resource Bank,27-Jan-12,12-Sep-12 +Tennessee Commerce Bank ,Franklin,TN,35296,Republic Bank & Trust Company,27-Jan-12,20-Nov-12 +First Guaranty Bank and Trust Company of Jacksonville,Jacksonville,FL,16579,"CenterState Bank of Florida, N.A.",27-Jan-12,12-Sep-12 +American Eagle Savings Bank,Boothwyn,PA,31581,"Capital Bank, N.A.",20-Jan-12,25-Jan-13 +The First State Bank,Stockbridge,GA,19252,Hamilton State Bank,20-Jan-12,25-Jan-13 +Central Florida State Bank,Belleview,FL,57186,"CenterState Bank of Florida, N.A.",20-Jan-12,25-Jan-13 +Western National Bank,Phoenix,AZ,57917,Washington Federal,16-Dec-11,13-Aug-12 +Premier Community Bank of the Emerald Coast,Crestview,FL,58343,Summit Bank,16-Dec-11,12-Sep-12 +Central Progressive Bank,Lacombe,LA,19657,First NBC Bank,18-Nov-11,13-Aug-12 +Polk County Bank,Johnston,IA,14194,Grinnell State Bank,18-Nov-11,15-Aug-12 +Community Bank of Rockmart,Rockmart,GA,57860,Century Bank of Georgia,10-Nov-11,13-Aug-12 +SunFirst Bank,Saint George,UT,57087,Cache Valley Bank,4-Nov-11,16-Nov-12 +"Mid City Bank, Inc.",Omaha,NE,19397,Premier Bank,4-Nov-11,15-Aug-12 +All American Bank,Des Plaines,IL,57759,International Bank of Chicago,28-Oct-11,15-Aug-12 +Community Banks of Colorado,Greenwood Village,CO,21132,"Bank Midwest, N.A.",21-Oct-11,2-Jan-13 +Community Capital Bank,Jonesboro,GA,57036,State Bank and Trust Company,21-Oct-11,8-Nov-12 +Decatur First Bank,Decatur,GA,34392,Fidelity Bank,21-Oct-11,8-Nov-12 +Old Harbor Bank,Clearwater,FL,57537,1st United Bank,21-Oct-11,8-Nov-12 +Country Bank,Aledo,IL,35395,Blackhawk Bank & Trust,14-Oct-11,15-Aug-12 +First State Bank,Cranford,NJ,58046,Northfield Bank,14-Oct-11,8-Nov-12 +"Blue Ridge Savings Bank, Inc.",Asheville,NC,32347,Bank of North Carolina,14-Oct-11,8-Nov-12 +Piedmont Community Bank,Gray,GA,57256,State Bank and Trust Company,14-Oct-11,22-Jan-13 +Sun Security Bank,Ellington,MO,20115,Great Southern Bank,7-Oct-11,7-Nov-12 +The RiverBank,Wyoming,MN,10216,Central Bank,7-Oct-11,7-Nov-12 +First International Bank,Plano,TX,33513,American First National Bank,30-Sep-11,9-Oct-12 +Citizens Bank of Northern California,Nevada City,CA,33983,Tri Counties Bank,23-Sep-11,9-Oct-12 +Bank of the Commonwealth,Norfolk,VA,20408,Southern Bank and Trust Company,23-Sep-11,9-Oct-12 +The First National Bank of Florida,Milton,FL,25155,CharterBank,9-Sep-11,6-Sep-12 +CreekSide Bank,Woodstock,GA,58226,Georgia Commerce Bank,2-Sep-11,6-Sep-12 +Patriot Bank of Georgia,Cumming,GA,58273,Georgia Commerce Bank,2-Sep-11,2-Nov-12 +First Choice Bank,Geneva,IL,57212,Inland Bank & Trust,19-Aug-11,15-Aug-12 +First Southern National Bank,Statesboro,GA,57239,Heritage Bank of the South,19-Aug-11,2-Nov-12 +Lydian Private Bank,Palm Beach,FL,35356,"Sabadell United Bank, N.A.",19-Aug-11,2-Nov-12 +Public Savings Bank,Huntingdon Valley,PA,34130,"Capital Bank, N.A.",18-Aug-11,15-Aug-12 +The First National Bank of Olathe,Olathe,KS,4744,Enterprise Bank & Trust,12-Aug-11,23-Aug-12 +Bank of Whitman,Colfax,WA,22528,Columbia State Bank,5-Aug-11,16-Aug-12 +Bank of Shorewood,Shorewood,IL,22637,Heartland Bank and Trust Company,5-Aug-11,16-Aug-12 +Integra Bank National Association,Evansville,IN,4392,Old National Bank,29-Jul-11,16-Aug-12 +"BankMeridian, N.A.",Columbia,SC,58222,SCBT National Association,29-Jul-11,2-Nov-12 +Virginia Business Bank,Richmond,VA,58283,Xenith Bank,29-Jul-11,9-Oct-12 +Bank of Choice,Greeley,CO,2994,"Bank Midwest, N.A.",22-Jul-11,12-Sep-12 +LandMark Bank of Florida,Sarasota,FL,35244,American Momentum Bank,22-Jul-11,2-Nov-12 +Southshore Community Bank,Apollo Beach,FL,58056,American Momentum Bank,22-Jul-11,2-Nov-12 +Summit Bank,Prescott,AZ,57442,The Foothills Bank,15-Jul-11,16-Aug-12 +First Peoples Bank,Port St. Lucie,FL,34870,"Premier American Bank, N.A.",15-Jul-11,2-Nov-12 +High Trust Bank,Stockbridge,GA,19554,Ameris Bank,15-Jul-11,2-Nov-12 +One Georgia Bank,Atlanta,GA,58238,Ameris Bank,15-Jul-11,2-Nov-12 +Signature Bank,Windsor,CO,57835,Points West Community Bank,8-Jul-11,26-Oct-12 +Colorado Capital Bank,Castle Rock,CO,34522,First-Citizens Bank & Trust Company,8-Jul-11,15-Jan-13 +First Chicago Bank & Trust,Chicago,IL,27935,Northbrook Bank & Trust Company,8-Jul-11,9-Sep-12 +Mountain Heritage Bank,Clayton,GA,57593,First American Bank and Trust Company,24-Jun-11,2-Nov-12 +First Commercial Bank of Tampa Bay,Tampa,FL,27583,Stonegate Bank,17-Jun-11,2-Nov-12 +McIntosh State Bank,Jackson,GA,19237,Hamilton State Bank,17-Jun-11,2-Nov-12 +Atlantic Bank and Trust,Charleston,SC,58420,"First Citizens Bank and Trust Company, Inc.",3-Jun-11,31-Oct-12 +First Heritage Bank,Snohomish,WA,23626,Columbia State Bank,27-May-11,28-Jan-13 +Summit Bank,Burlington,WA,513,Columbia State Bank,20-May-11,22-Jan-13 +First Georgia Banking Company,Franklin,GA,57647,"CertusBank, National Association",20-May-11,13-Nov-12 +Atlantic Southern Bank,Macon,GA,57213,"CertusBank, National Association",20-May-11,31-Oct-12 +Coastal Bank,Cocoa Beach,FL,34898,"Florida Community Bank, a division of Premier American Bank, N.A.",6-May-11,30-Nov-12 +Community Central Bank,Mount Clemens,MI,34234,Talmer Bank & Trust,29-Apr-11,16-Aug-12 +The Park Avenue Bank,Valdosta,GA,19797,Bank of the Ozarks,29-Apr-11,30-Nov-12 +First Choice Community Bank,Dallas,GA,58539,Bank of the Ozarks,29-Apr-11,22-Jan-13 +Cortez Community Bank,Brooksville,FL,57625,"Florida Community Bank, a division of Premier American Bank, N.A.",29-Apr-11,30-Nov-12 +First National Bank of Central Florida,Winter Park,FL,26297,"Florida Community Bank, a division of Premier American Bank, N.A.",29-Apr-11,30-Nov-12 +Heritage Banking Group,Carthage,MS,14273,Trustmark National Bank,15-Apr-11,30-Nov-12 +Rosemount National Bank,Rosemount,MN,24099,Central Bank,15-Apr-11,16-Aug-12 +Superior Bank,Birmingham,AL,17750,"Superior Bank, National Association",15-Apr-11,30-Nov-12 +Nexity Bank,Birmingham,AL,19794,AloStar Bank of Commerce,15-Apr-11,4-Sep-12 +New Horizons Bank,East Ellijay,GA,57705,Citizens South Bank,15-Apr-11,16-Aug-12 +Bartow County Bank,Cartersville,GA,21495,Hamilton State Bank,15-Apr-11,22-Jan-13 +Nevada Commerce Bank,Las Vegas,NV,35418,City National Bank,8-Apr-11,9-Sep-12 +Western Springs National Bank and Trust,Western Springs,IL,10086,Heartland Bank and Trust Company,8-Apr-11,22-Jan-13 +The Bank of Commerce,Wood Dale,IL,34292,Advantage National Bank Group,25-Mar-11,22-Jan-13 +Legacy Bank,Milwaukee,WI,34818,Seaway Bank and Trust Company,11-Mar-11,12-Sep-12 +First National Bank of Davis,Davis,OK,4077,The Pauls Valley National Bank,11-Mar-11,20-Aug-12 +Valley Community Bank,St. Charles,IL,34187,First State Bank,25-Feb-11,12-Sep-12 +"San Luis Trust Bank, FSB ",San Luis Obispo,CA,34783,First California Bank,18-Feb-11,20-Aug-12 +Charter Oak Bank,Napa,CA,57855,Bank of Marin,18-Feb-11,12-Sep-12 +Citizens Bank of Effingham,Springfield,GA,34601,Heritage Bank of the South,18-Feb-11,2-Nov-12 +Habersham Bank,Clarkesville,GA,151,SCBT National Association,18-Feb-11,2-Nov-12 +Canyon National Bank,Palm Springs,CA,34692,Pacific Premier Bank,11-Feb-11,12-Sep-12 +Badger State Bank,Cassville,WI,13272,Royal Bank,11-Feb-11,12-Sep-12 +Peoples State Bank,Hamtramck,MI,14939,First Michigan Bank,11-Feb-11,22-Jan-13 +Sunshine State Community Bank,Port Orange,FL,35478,"Premier American Bank, N.A.",11-Feb-11,2-Nov-12 +Community First Bank Chicago,Chicago,IL,57948,Northbrook Bank & Trust Company,4-Feb-11,20-Aug-12 +North Georgia Bank,Watkinsville,GA,35242,BankSouth,4-Feb-11,2-Nov-12 +American Trust Bank,Roswell,GA,57432,Renasant Bank,4-Feb-11,31-Oct-12 +First Community Bank,Taos,NM,12261,"U.S. Bank, N.A.",28-Jan-11,12-Sep-12 +FirsTier Bank,Louisville,CO,57646,No Acquirer,28-Jan-11,12-Sep-12 +Evergreen State Bank,Stoughton,WI,5328,McFarland State Bank,28-Jan-11,12-Sep-12 +The First State Bank,Camargo,OK,2303,Bank 7,28-Jan-11,12-Sep-12 +United Western Bank,Denver,CO,31293,First-Citizens Bank & Trust Company,21-Jan-11,12-Sep-12 +The Bank of Asheville,Asheville,NC,34516,First Bank,21-Jan-11,2-Nov-12 +CommunitySouth Bank & Trust,Easley,SC,57868,"CertusBank, National Association",21-Jan-11,2-Nov-12 +Enterprise Banking Company,McDonough,GA,19758,No Acquirer,21-Jan-11,2-Nov-12 +Oglethorpe Bank,Brunswick,GA,57440,Bank of the Ozarks,14-Jan-11,2-Nov-12 +Legacy Bank,Scottsdale,AZ,57820,Enterprise Bank & Trust,7-Jan-11,12-Sep-12 +First Commercial Bank of Florida,Orlando,FL,34965,First Southern Bank,7-Jan-11,2-Nov-12 +Community National Bank,Lino Lakes,MN,23306,Farmers & Merchants Savings Bank,17-Dec-10,20-Aug-12 +First Southern Bank ,Batesville,AR,58052,Southern Bank,17-Dec-10,20-Aug-12 +"United Americas Bank, N.A.",Atlanta,GA,35065,State Bank and Trust Company,17-Dec-10,2-Nov-12 +"Appalachian Community Bank, FSB ",McCaysville,GA,58495,Peoples Bank of East Tennessee,17-Dec-10,31-Oct-12 +Chestatee State Bank,Dawsonville,GA,34578,Bank of the Ozarks,17-Dec-10,2-Nov-12 +"The Bank of Miami,N.A.",Coral Gables,FL,19040,1st United Bank,17-Dec-10,2-Nov-12 +Earthstar Bank,Southampton,PA,35561,Polonia Bank,10-Dec-10,20-Aug-12 +Paramount Bank,Farmington Hills,MI,34673,Level One Bank,10-Dec-10,20-Aug-12 +First Banking Center,Burlington,WI,5287,First Michigan Bank,19-Nov-10,20-Aug-12 +Allegiance Bank of North America,Bala Cynwyd,PA,35078,VIST Bank,19-Nov-10,20-Aug-12 +Gulf State Community Bank,Carrabelle,FL,20340,Centennial Bank,19-Nov-10,2-Nov-12 +Copper Star Bank,Scottsdale,AZ,35463,"Stearns Bank, N.A.",12-Nov-10,20-Aug-12 +Darby Bank & Trust Co.,Vidalia,GA,14580,Ameris Bank,12-Nov-10,15-Jan-13 +Tifton Banking Company,Tifton,GA,57831,Ameris Bank,12-Nov-10,2-Nov-12 +First Vietnamese American Bank,Westminster,CA,57885,Grandpoint Bank,5-Nov-10,12-Sep-12 +Pierce Commercial Bank,Tacoma,WA,34411,Heritage Bank,5-Nov-10,20-Aug-12 +Western Commercial Bank,Woodland Hills,CA,58087,First California Bank,5-Nov-10,12-Sep-12 +K Bank,Randallstown,MD,31263,Manufacturers and Traders Trust Company (M&T Bank),5-Nov-10,20-Aug-12 +"First Arizona Savings, A FSB",Scottsdale,AZ,32582,No Acquirer,22-Oct-10,20-Aug-12 +Hillcrest Bank,Overland Park,KS,22173,"Hillcrest Bank, N.A.",22-Oct-10,20-Aug-12 +First Suburban National Bank,Maywood,IL,16089,Seaway Bank and Trust Company,22-Oct-10,20-Aug-12 +The First National Bank of Barnesville,Barnesville,GA,2119,United Bank,22-Oct-10,2-Nov-12 +The Gordon Bank,Gordon,GA,33904,Morris Bank,22-Oct-10,2-Nov-12 +Progress Bank of Florida,Tampa,FL,32251,Bay Cities Bank,22-Oct-10,2-Nov-12 +First Bank of Jacksonville,Jacksonville,FL,27573,Ameris Bank,22-Oct-10,2-Nov-12 +Premier Bank,Jefferson City,MO,34016,Providence Bank,15-Oct-10,20-Aug-12 +WestBridge Bank and Trust Company,Chesterfield,MO,58205,Midland States Bank,15-Oct-10,20-Aug-12 +"Security Savings Bank, F.S.B.",Olathe,KS,30898,Simmons First National Bank,15-Oct-10,20-Aug-12 +Shoreline Bank,Shoreline,WA,35250,GBC International Bank,1-Oct-10,20-Aug-12 +Wakulla Bank,Crawfordville,FL,21777,Centennial Bank,1-Oct-10,2-Nov-12 +North County Bank,Arlington,WA,35053,Whidbey Island Bank,24-Sep-10,20-Aug-12 +Haven Trust Bank Florida,Ponte Vedra Beach,FL,58308,First Southern Bank,24-Sep-10,5-Nov-12 +Maritime Savings Bank,West Allis,WI,28612,"North Shore Bank, FSB",17-Sep-10,20-Aug-12 +Bramble Savings Bank,Milford,OH,27808,Foundation Bank,17-Sep-10,20-Aug-12 +The Peoples Bank,Winder,GA,182,Community & Southern Bank,17-Sep-10,5-Nov-12 +First Commerce Community Bank,Douglasville,GA,57448,Community & Southern Bank,17-Sep-10,15-Jan-13 +Bank of Ellijay,Ellijay,GA,58197,Community & Southern Bank,17-Sep-10,15-Jan-13 +ISN Bank,Cherry Hill,NJ,57107,Customers Bank,17-Sep-10,22-Aug-12 +Horizon Bank,Bradenton,FL,35061,Bank of the Ozarks,10-Sep-10,5-Nov-12 +Sonoma Valley Bank,Sonoma,CA,27259,Westamerica Bank,20-Aug-10,12-Sep-12 +Los Padres Bank,Solvang,CA,32165,Pacific Western Bank,20-Aug-10,12-Sep-12 +Butte Community Bank,Chico,CA,33219,"Rabobank, N.A.",20-Aug-10,12-Sep-12 +Pacific State Bank,Stockton,CA,27090,"Rabobank, N.A.",20-Aug-10,12-Sep-12 +ShoreBank,Chicago,IL,15640,Urban Partnership Bank,20-Aug-10,12-Sep-12 +Imperial Savings and Loan Association,Martinsville,VA,31623,"River Community Bank, N.A.",20-Aug-10,24-Aug-12 +Independent National Bank,Ocala,FL,27344,"CenterState Bank of Florida, N.A.",20-Aug-10,5-Nov-12 +Community National Bank at Bartow,Bartow,FL,25266,"CenterState Bank of Florida, N.A.",20-Aug-10,5-Nov-12 +Palos Bank and Trust Company,Palos Heights,IL,17599,First Midwest Bank,13-Aug-10,22-Aug-12 +Ravenswood Bank,Chicago,IL,34231,Northbrook Bank & Trust Company,6-Aug-10,22-Aug-12 +LibertyBank,Eugene,OR,31964,Home Federal Bank,30-Jul-10,22-Aug-12 +The Cowlitz Bank,Longview,WA,22643,Heritage Bank,30-Jul-10,22-Aug-12 +Coastal Community Bank,Panama City Beach,FL,9619,Centennial Bank,30-Jul-10,5-Nov-12 +Bayside Savings Bank,Port Saint Joe,FL,57669,Centennial Bank,30-Jul-10,5-Nov-12 +Northwest Bank & Trust,Acworth,GA,57658,State Bank and Trust Company,30-Jul-10,5-Nov-12 +Home Valley Bank ,Cave Junction,OR,23181,South Valley Bank & Trust,23-Jul-10,12-Sep-12 +SouthwestUSA Bank ,Las Vegas,NV,35434,Plaza Bank,23-Jul-10,22-Aug-12 +Community Security Bank ,New Prague,MN,34486,Roundbank,23-Jul-10,12-Sep-12 +Thunder Bank ,Sylvan Grove,KS,10506,The Bennington State Bank,23-Jul-10,13-Sep-12 +Williamsburg First National Bank ,Kingstree,SC,17837,"First Citizens Bank and Trust Company, Inc.",23-Jul-10,5-Nov-12 +Crescent Bank and Trust Company ,Jasper,GA,27559,Renasant Bank,23-Jul-10,5-Nov-12 +Sterling Bank ,Lantana,FL,32536,IBERIABANK,23-Jul-10,5-Nov-12 +"Mainstreet Savings Bank, FSB",Hastings,MI,28136,Commercial Bank,16-Jul-10,13-Sep-12 +Olde Cypress Community Bank,Clewiston,FL,28864,"CenterState Bank of Florida, N.A.",16-Jul-10,5-Nov-12 +Turnberry Bank,Aventura,FL,32280,NAFH National Bank,16-Jul-10,5-Nov-12 +Metro Bank of Dade County,Miami,FL,25172,NAFH National Bank,16-Jul-10,5-Nov-12 +First National Bank of the South,Spartanburg,SC,35383,NAFH National Bank,16-Jul-10,5-Nov-12 +Woodlands Bank,Bluffton,SC,32571,Bank of the Ozarks,16-Jul-10,5-Nov-12 +Home National Bank,Blackwell,OK,11636,RCB Bank,9-Jul-10,10-Dec-12 +USA Bank,Port Chester,NY,58072,New Century Bank,9-Jul-10,14-Sep-12 +Ideal Federal Savings Bank,Baltimore,MD,32456,No Acquirer,9-Jul-10,14-Sep-12 +Bay National Bank,Baltimore,MD,35462,"Bay Bank, FSB",9-Jul-10,15-Jan-13 +High Desert State Bank,Albuquerque,NM,35279,First American Bank,25-Jun-10,14-Sep-12 +First National Bank,Savannah,GA,34152,"The Savannah Bank, N.A.",25-Jun-10,5-Nov-12 +Peninsula Bank,Englewood,FL,26563,"Premier American Bank, N.A.",25-Jun-10,5-Nov-12 +Nevada Security Bank,Reno,NV,57110,Umpqua Bank,18-Jun-10,23-Aug-12 +Washington First International Bank,Seattle,WA,32955,East West Bank,11-Jun-10,14-Sep-12 +TierOne Bank,Lincoln,NE,29341,Great Western Bank,4-Jun-10,14-Sep-12 +Arcola Homestead Savings Bank,Arcola,IL,31813,No Acquirer,4-Jun-10,14-Sep-12 +First National Bank,Rosedale,MS,15814,The Jefferson Bank,4-Jun-10,5-Nov-12 +Sun West Bank,Las Vegas,NV,34785,City National Bank,28-May-10,14-Sep-12 +"Granite Community Bank, NA",Granite Bay,CA,57315,Tri Counties Bank,28-May-10,14-Sep-12 +Bank of Florida - Tampa,Tampa,FL,57814,EverBank,28-May-10,5-Nov-12 +Bank of Florida - Southwest,Naples,FL,35106,EverBank,28-May-10,5-Nov-12 +Bank of Florida - Southeast,Fort Lauderdale,FL,57360,EverBank,28-May-10,5-Nov-12 +Pinehurst Bank,Saint Paul,MN,57735,Coulee Bank,21-May-10,26-Oct-12 +Midwest Bank and Trust Company,Elmwood Park,IL,18117,"FirstMerit Bank, N.A.",14-May-10,23-Aug-12 +Southwest Community Bank,Springfield,MO,34255,Simmons First National Bank,14-May-10,23-Aug-12 +New Liberty Bank,Plymouth,MI,35586,Bank of Ann Arbor,14-May-10,23-Aug-12 +Satilla Community Bank,Saint Marys,GA,35114,Ameris Bank,14-May-10,5-Nov-12 +1st Pacific Bank of California,San Diego,CA,35517,City National Bank,7-May-10,13-Dec-12 +Towne Bank of Arizona,Mesa,AZ,57697,Commerce Bank of Arizona,7-May-10,23-Aug-12 +Access Bank,Champlin,MN,16476,PrinsBank,7-May-10,23-Aug-12 +The Bank of Bonifay,Bonifay,FL,14246,First Federal Bank of Florida,7-May-10,5-Nov-12 +Frontier Bank,Everett,WA,22710,"Union Bank, N.A.",30-Apr-10,15-Jan-13 +BC National Banks,Butler,MO,17792,Community First Bank,30-Apr-10,23-Aug-12 +Champion Bank,Creve Coeur,MO,58362,BankLiberty,30-Apr-10,23-Aug-12 +CF Bancorp,Port Huron,MI,30005,First Michigan Bank,30-Apr-10,15-Jan-13 +Westernbank Puerto Rico,Mayaguez,PR,31027,Banco Popular de Puerto Rico,30-Apr-10,5-Nov-12 +R-G Premier Bank of Puerto Rico,Hato Rey,PR,32185,Scotiabank de Puerto Rico,30-Apr-10,5-Nov-12 +Eurobank,San Juan,PR,27150,Oriental Bank and Trust,30-Apr-10,5-Nov-12 +Wheatland Bank,Naperville,IL,58429,Wheaton Bank & Trust,23-Apr-10,23-Aug-12 +Peotone Bank and Trust Company,Peotone,IL,10888,First Midwest Bank,23-Apr-10,23-Aug-12 +Lincoln Park Savings Bank,Chicago,IL,30600,Northbrook Bank & Trust Company,23-Apr-10,23-Aug-12 +New Century Bank,Chicago,IL,34821,"MB Financial Bank, N.A.",23-Apr-10,23-Aug-12 +Citizens Bank and Trust Company of Chicago,Chicago,IL,34658,Republic Bank of Chicago,23-Apr-10,23-Aug-12 +Broadway Bank,Chicago,IL,22853,"MB Financial Bank, N.A.",23-Apr-10,23-Aug-12 +"Amcore Bank, National Association",Rockford,IL,3735,Harris N.A.,23-Apr-10,23-Aug-12 +City Bank,Lynnwood,WA,21521,Whidbey Island Bank,16-Apr-10,14-Sep-12 +Tamalpais Bank,San Rafael,CA,33493,"Union Bank, N.A.",16-Apr-10,23-Aug-12 +Innovative Bank,Oakland,CA,23876,Center Bank,16-Apr-10,23-Aug-12 +Butler Bank,Lowell,MA,26619,People's United Bank,16-Apr-10,23-Aug-12 +Riverside National Bank of Florida,Fort Pierce,FL,24067,"TD Bank, N.A.",16-Apr-10,5-Nov-12 +AmericanFirst Bank,Clermont,FL,57724,"TD Bank, N.A.",16-Apr-10,31-Oct-12 +First Federal Bank of North Florida,Palatka,FL,28886,"TD Bank, N.A.",16-Apr-10,15-Jan-13 +Lakeside Community Bank,Sterling Heights,MI,34878,No Acquirer,16-Apr-10,23-Aug-12 +Beach First National Bank,Myrtle Beach,SC,34242,Bank of North Carolina,9-Apr-10,5-Nov-12 +Desert Hills Bank,Phoenix,AZ,57060,New York Community Bank,26-Mar-10,23-Aug-12 +Unity National Bank,Cartersville,GA,34678,Bank of the Ozarks,26-Mar-10,14-Sep-12 +Key West Bank,Key West,FL,34684,Centennial Bank,26-Mar-10,23-Aug-12 +McIntosh Commercial Bank,Carrollton,GA,57399,CharterBank,26-Mar-10,23-Aug-12 +State Bank of Aurora,Aurora,MN,8221,Northern State Bank,19-Mar-10,23-Aug-12 +First Lowndes Bank,Fort Deposit,AL,24957,First Citizens Bank,19-Mar-10,23-Aug-12 +Bank of Hiawassee,Hiawassee,GA,10054,Citizens South Bank,19-Mar-10,23-Aug-12 +Appalachian Community Bank,Ellijay,GA,33989,Community & Southern Bank,19-Mar-10,31-Oct-12 +Advanta Bank Corp.,Draper,UT,33535,No Acquirer,19-Mar-10,14-Sep-12 +Century Security Bank,Duluth,GA,58104,Bank of Upson,19-Mar-10,23-Aug-12 +American National Bank,Parma,OH,18806,The National Bank and Trust Company,19-Mar-10,23-Aug-12 +Statewide Bank,Covington,LA,29561,Home Bank,12-Mar-10,23-Aug-12 +Old Southern Bank,Orlando,FL,58182,Centennial Bank,12-Mar-10,23-Aug-12 +The Park Avenue Bank,New York,NY,27096,Valley National Bank,12-Mar-10,23-Aug-12 +LibertyPointe Bank,New York,NY,58071,Valley National Bank,11-Mar-10,23-Aug-12 +Centennial Bank,Ogden,UT,34430,No Acquirer,5-Mar-10,14-Sep-12 +Waterfield Bank,Germantown,MD,34976,No Acquirer,5-Mar-10,23-Aug-12 +Bank of Illinois,Normal,IL,9268,Heartland Bank and Trust Company,5-Mar-10,23-Aug-12 +Sun American Bank,Boca Raton,FL,27126,First-Citizens Bank & Trust Company,5-Mar-10,23-Aug-12 +Rainier Pacific Bank,Tacoma,WA,38129,Umpqua Bank,26-Feb-10,23-Aug-12 +Carson River Community Bank,Carson City,NV,58352,Heritage Bank of Nevada,26-Feb-10,15-Jan-13 +"La Jolla Bank, FSB",La Jolla,CA,32423,"OneWest Bank, FSB",19-Feb-10,24-Aug-12 +George Washington Savings Bank,Orland Park,IL,29952,"FirstMerit Bank, N.A.",19-Feb-10,24-Aug-12 +The La Coste National Bank,La Coste,TX,3287,Community National Bank,19-Feb-10,14-Sep-12 +Marco Community Bank,Marco Island,FL,57586,Mutual of Omaha Bank,19-Feb-10,24-Aug-12 +1st American State Bank of Minnesota,Hancock,MN,15448,"Community Development Bank, FSB",5-Feb-10,24-Aug-12 +American Marine Bank,Bainbridge Island,WA,16730,Columbia State Bank,29-Jan-10,24-Aug-12 +First Regional Bank,Los Angeles,CA,23011,First-Citizens Bank & Trust Company,29-Jan-10,24-Aug-12 +Community Bank and Trust,Cornelia,GA,5702,SCBT National Association,29-Jan-10,15-Jan-13 +"Marshall Bank, N.A.",Hallock,MN,16133,United Valley Bank,29-Jan-10,23-Aug-12 +Florida Community Bank,Immokalee,FL,5672,"Premier American Bank, N.A.",29-Jan-10,15-Jan-13 +First National Bank of Georgia,Carrollton,GA,16480,Community & Southern Bank,29-Jan-10,13-Dec-12 +Columbia River Bank,The Dalles,OR,22469,Columbia State Bank,22-Jan-10,14-Sep-12 +Evergreen Bank,Seattle,WA,20501,Umpqua Bank,22-Jan-10,15-Jan-13 +Charter Bank,Santa Fe,NM,32498,Charter Bank,22-Jan-10,23-Aug-12 +Bank of Leeton,Leeton,MO,8265,"Sunflower Bank, N.A.",22-Jan-10,15-Jan-13 +Premier American Bank,Miami,FL,57147,"Premier American Bank, N.A.",22-Jan-10,13-Dec-12 +Barnes Banking Company,Kaysville,UT,1252,No Acquirer,15-Jan-10,23-Aug-12 +St. Stephen State Bank,St. Stephen,MN,17522,First State Bank of St. Joseph,15-Jan-10,23-Aug-12 +Town Community Bank & Trust,Antioch,IL,34705,First American Bank,15-Jan-10,23-Aug-12 +Horizon Bank,Bellingham,WA,22977,Washington Federal Savings and Loan Association,8-Jan-10,23-Aug-12 +"First Federal Bank of California, F.S.B.",Santa Monica,CA,28536,"OneWest Bank, FSB",18-Dec-09,23-Aug-12 +Imperial Capital Bank,La Jolla,CA,26348,City National Bank,18-Dec-09,5-Sep-12 +Independent Bankers' Bank,Springfield,IL,26820,The Independent BankersBank (TIB),18-Dec-09,23-Aug-12 +New South Federal Savings Bank,Irondale,AL,32276,Beal Bank,18-Dec-09,23-Aug-12 +Citizens State Bank,New Baltimore,MI,1006,No Acquirer,18-Dec-09,5-Nov-12 +Peoples First Community Bank,Panama City,FL,32167,Hancock Bank,18-Dec-09,5-Nov-12 +RockBridge Commercial Bank,Atlanta,GA,58315,No Acquirer,18-Dec-09,5-Nov-12 +SolutionsBank,Overland Park,KS,4731,Arvest Bank,11-Dec-09,23-Aug-12 +"Valley Capital Bank, N.A.",Mesa,AZ,58399,Enterprise Bank & Trust,11-Dec-09,23-Aug-12 +"Republic Federal Bank, N.A.",Miami,FL,22846,1st United Bank,11-Dec-09,5-Nov-12 +Greater Atlantic Bank,Reston,VA,32583,Sonabank,4-Dec-09,5-Nov-12 +Benchmark Bank,Aurora,IL,10440,"MB Financial Bank, N.A.",4-Dec-09,23-Aug-12 +AmTrust Bank,Cleveland,OH,29776,New York Community Bank,4-Dec-09,5-Nov-12 +The Tattnall Bank,Reidsville,GA,12080,Heritage Bank of the South,4-Dec-09,5-Nov-12 +First Security National Bank,Norcross,GA,26290,State Bank and Trust Company,4-Dec-09,5-Nov-12 +The Buckhead Community Bank,Atlanta,GA,34663,State Bank and Trust Company,4-Dec-09,5-Nov-12 +Commerce Bank of Southwest Florida,Fort Myers,FL,58016,Central Bank,20-Nov-09,5-Nov-12 +Pacific Coast National Bank,San Clemente,CA,57914,Sunwest Bank,13-Nov-09,22-Aug-12 +Orion Bank,Naples,FL,22427,IBERIABANK,13-Nov-09,5-Nov-12 +"Century Bank, F.S.B.",Sarasota,FL,32267,IBERIABANK,13-Nov-09,22-Aug-12 +United Commercial Bank,San Francisco,CA,32469,East West Bank,6-Nov-09,5-Nov-12 +Gateway Bank of St. Louis,St. Louis,MO,19450,Central Bank of Kansas City,6-Nov-09,22-Aug-12 +Prosperan Bank,Oakdale,MN,35074,"Alerus Financial, N.A.",6-Nov-09,22-Aug-12 +Home Federal Savings Bank,Detroit,MI,30329,Liberty Bank and Trust Company,6-Nov-09,22-Aug-12 +United Security Bank,Sparta,GA,22286,Ameris Bank,6-Nov-09,15-Jan-13 +North Houston Bank,Houston,TX,18776,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Madisonville State Bank,Madisonville,TX,33782,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Citizens National Bank,Teague,TX,25222,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Park National Bank,Chicago,IL,11677,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Pacific National Bank,San Francisco,CA,30006,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +California National Bank,Los Angeles,CA,34659,U.S. Bank N.A.,30-Oct-09,5-Sep-12 +San Diego National Bank,San Diego,CA,23594,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Community Bank of Lemont,Lemont,IL,35291,U.S. Bank N.A.,30-Oct-09,15-Jan-13 +"Bank USA, N.A.",Phoenix,AZ,32218,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +First DuPage Bank,Westmont,IL,35038,First Midwest Bank,23-Oct-09,22-Aug-12 +Riverview Community Bank,Otsego,MN,57525,Central Bank,23-Oct-09,22-Aug-12 +Bank of Elmwood,Racine,WI,18321,Tri City National Bank,23-Oct-09,22-Aug-12 +Flagship National Bank,Bradenton,FL,35044,First Federal Bank of Florida,23-Oct-09,22-Aug-12 +Hillcrest Bank Florida,Naples,FL,58336,Stonegate Bank,23-Oct-09,22-Aug-12 +American United Bank,Lawrenceville,GA,57794,Ameris Bank,23-Oct-09,5-Sep-12 +Partners Bank,Naples,FL,57959,Stonegate Bank,23-Oct-09,15-Jan-13 +San Joaquin Bank,Bakersfield,CA,23266,Citizens Business Bank,16-Oct-09,22-Aug-12 +Southern Colorado National Bank,Pueblo,CO,57263,Legacy Bank,2-Oct-09,5-Sep-12 +Jennings State Bank,Spring Grove,MN,11416,Central Bank,2-Oct-09,21-Aug-12 +Warren Bank,Warren,MI,34824,The Huntington National Bank,2-Oct-09,21-Aug-12 +Georgian Bank,Atlanta,GA,57151,"First Citizens Bank and Trust Company, Inc.",25-Sep-09,21-Aug-12 +"Irwin Union Bank, F.S.B.",Louisville,KY,57068,"First Financial Bank, N.A.",18-Sep-09,5-Sep-12 +Irwin Union Bank and Trust Company,Columbus,IN,10100,"First Financial Bank, N.A.",18-Sep-09,21-Aug-12 +Venture Bank,Lacey,WA,22868,First-Citizens Bank & Trust Company,11-Sep-09,21-Aug-12 +Brickwell Community Bank,Woodbury,MN,57736,CorTrust Bank N.A.,11-Sep-09,15-Jan-13 +"Corus Bank, N.A.",Chicago,IL,13693,"MB Financial Bank, N.A.",11-Sep-09,21-Aug-12 +First State Bank,Flagstaff,AZ,34875,Sunwest Bank,4-Sep-09,15-Jan-13 +Platinum Community Bank,Rolling Meadows,IL,35030,No Acquirer,4-Sep-09,21-Aug-12 +Vantus Bank,Sioux City,IA,27732,Great Southern Bank,4-Sep-09,21-Aug-12 +InBank,Oak Forest,IL,20203,"MB Financial Bank, N.A.",4-Sep-09,21-Aug-12 +First Bank of Kansas City,Kansas City,MO,25231,Great American Bank,4-Sep-09,21-Aug-12 +Affinity Bank,Ventura,CA,27197,Pacific Western Bank,28-Aug-09,21-Aug-12 +Mainstreet Bank,Forest Lake,MN,1909,Central Bank,28-Aug-09,21-Aug-12 +Bradford Bank,Baltimore,MD,28312,Manufacturers and Traders Trust Company (M&T Bank),28-Aug-09,15-Jan-13 +Guaranty Bank,Austin,TX,32618,BBVA Compass,21-Aug-09,21-Aug-12 +CapitalSouth Bank,Birmingham,AL,22130,IBERIABANK,21-Aug-09,15-Jan-13 +First Coweta Bank,Newnan,GA,57702,United Bank,21-Aug-09,15-Jan-13 +ebank,Atlanta,GA,34682,"Stearns Bank, N.A.",21-Aug-09,21-Aug-12 +Community Bank of Nevada,Las Vegas,NV,34043,No Acquirer,14-Aug-09,21-Aug-12 +Community Bank of Arizona,Phoenix,AZ,57645,MidFirst Bank,14-Aug-09,21-Aug-12 +"Union Bank, National Association",Gilbert,AZ,34485,MidFirst Bank,14-Aug-09,21-Aug-12 +Colonial Bank,Montgomery,AL,9609,"Branch Banking & Trust Company, (BB&T)",14-Aug-09,5-Sep-12 +Dwelling House Savings and Loan Association,Pittsburgh,PA,31559,"PNC Bank, N.A.",14-Aug-09,15-Jan-13 +Community First Bank,Prineville,OR,23268,Home Federal Bank,7-Aug-09,15-Jan-13 +Community National Bank of Sarasota County,Venice,FL,27183,"Stearns Bank, N.A.",7-Aug-09,20-Aug-12 +First State Bank,Sarasota,FL,27364,"Stearns Bank, N.A.",7-Aug-09,20-Aug-12 +Mutual Bank,Harvey,IL,18659,United Central Bank,31-Jul-09,20-Aug-12 +First BankAmericano,Elizabeth,NJ,34270,Crown Bank,31-Jul-09,20-Aug-12 +Peoples Community Bank,West Chester,OH,32288,"First Financial Bank, N.A.",31-Jul-09,20-Aug-12 +Integrity Bank,Jupiter,FL,57604,Stonegate Bank,31-Jul-09,20-Aug-12 +First State Bank of Altus,Altus,OK,9873,Herring Bank,31-Jul-09,20-Aug-12 +Security Bank of Jones County,Gray,GA,8486,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of Houston County,Perry,GA,27048,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of Bibb County,Macon,GA,27367,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of North Metro,Woodstock,GA,57105,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of North Fulton,Alpharetta,GA,57430,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of Gwinnett County,Suwanee,GA,57346,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Waterford Village Bank,Williamsville,NY,58065,"Evans Bank, N.A.",24-Jul-09,20-Aug-12 +Temecula Valley Bank,Temecula,CA,34341,First-Citizens Bank & Trust Company,17-Jul-09,20-Aug-12 +Vineyard Bank,Rancho Cucamonga,CA,23556,California Bank & Trust,17-Jul-09,20-Aug-12 +BankFirst,Sioux Falls,SD,34103,"Alerus Financial, N.A.",17-Jul-09,20-Aug-12 +First Piedmont Bank,Winder,GA,34594,First American Bank and Trust Company,17-Jul-09,15-Jan-13 +Bank of Wyoming,Thermopolis,WY,22754,Central Bank & Trust,10-Jul-09,20-Aug-12 +Founders Bank,Worth,IL,18390,The PrivateBank and Trust Company,2-Jul-09,20-Aug-12 +Millennium State Bank of Texas,Dallas,TX,57667,State Bank of Texas,2-Jul-09,26-Oct-12 +First National Bank of Danville,Danville,IL,3644,"First Financial Bank, N.A.",2-Jul-09,20-Aug-12 +Elizabeth State Bank,Elizabeth,IL,9262,Galena State Bank and Trust Company,2-Jul-09,20-Aug-12 +Rock River Bank,Oregon,IL,15302,The Harvard State Bank,2-Jul-09,20-Aug-12 +First State Bank of Winchester,Winchester,IL,11710,The First National Bank of Beardstown,2-Jul-09,20-Aug-12 +John Warner Bank,Clinton,IL,12093,State Bank of Lincoln,2-Jul-09,20-Aug-12 +Mirae Bank,Los Angeles,CA,57332,Wilshire State Bank,26-Jun-09,20-Aug-12 +MetroPacific Bank,Irvine,CA,57893,Sunwest Bank,26-Jun-09,20-Aug-12 +Horizon Bank,Pine City,MN,9744,"Stearns Bank, N.A.",26-Jun-09,20-Aug-12 +Neighborhood Community Bank,Newnan,GA,35285,CharterBank,26-Jun-09,20-Aug-12 +Community Bank of West Georgia,Villa Rica,GA,57436,No Acquirer,26-Jun-09,17-Aug-12 +First National Bank of Anthony,Anthony,KS,4614,Bank of Kansas,19-Jun-09,17-Aug-12 +Cooperative Bank,Wilmington,NC,27837,First Bank,19-Jun-09,17-Aug-12 +Southern Community Bank,Fayetteville,GA,35251,United Community Bank,19-Jun-09,17-Aug-12 +Bank of Lincolnwood,Lincolnwood,IL,17309,Republic Bank of Chicago,5-Jun-09,17-Aug-12 +Citizens National Bank,Macomb,IL,5757,Morton Community Bank,22-May-09,4-Sep-12 +Strategic Capital Bank,Champaign,IL,35175,Midland States Bank,22-May-09,4-Sep-12 +"BankUnited, FSB",Coral Gables,FL,32247,BankUnited,21-May-09,17-Aug-12 +Westsound Bank,Bremerton,WA,34843,Kitsap Bank,8-May-09,4-Sep-12 +America West Bank,Layton,UT,35461,Cache Valley Bank,1-May-09,17-Aug-12 +Citizens Community Bank,Ridgewood,NJ,57563,North Jersey Community Bank,1-May-09,4-Sep-12 +"Silverton Bank, NA",Atlanta,GA,26535,No Acquirer,1-May-09,17-Aug-12 +First Bank of Idaho,Ketchum,ID,34396,"U.S. Bank, N.A.",24-Apr-09,17-Aug-12 +First Bank of Beverly Hills,Calabasas,CA,32069,No Acquirer,24-Apr-09,4-Sep-12 +Michigan Heritage Bank,Farmington Hills,MI,34369,Level One Bank,24-Apr-09,17-Aug-12 +American Southern Bank,Kennesaw,GA,57943,Bank of North Georgia,24-Apr-09,17-Aug-12 +Great Basin Bank of Nevada,Elko,NV,33824,Nevada State Bank,17-Apr-09,4-Sep-12 +American Sterling Bank,Sugar Creek,MO,8266,Metcalf Bank,17-Apr-09,31-Aug-12 +New Frontier Bank,Greeley,CO,34881,No Acquirer,10-Apr-09,4-Sep-12 +Cape Fear Bank,Wilmington,NC,34639,First Federal Savings and Loan Association,10-Apr-09,17-Aug-12 +Omni National Bank,Atlanta,GA,22238,No Acquirer,27-Mar-09,17-Aug-12 +"TeamBank, NA",Paola,KS,4754,Great Southern Bank,20-Mar-09,17-Aug-12 +Colorado National Bank,Colorado Springs,CO,18896,Herring Bank,20-Mar-09,17-Aug-12 +FirstCity Bank,Stockbridge,GA,18243,No Acquirer,20-Mar-09,17-Aug-12 +Freedom Bank of Georgia,Commerce,GA,57558,Northeast Georgia Bank,6-Mar-09,17-Aug-12 +Security Savings Bank,Henderson,NV,34820,Bank of Nevada,27-Feb-09,7-Sep-12 +Heritage Community Bank,Glenwood,IL,20078,"MB Financial Bank, N.A.",27-Feb-09,17-Aug-12 +Silver Falls Bank,Silverton,OR,35399,Citizens Bank,20-Feb-09,17-Aug-12 +Pinnacle Bank of Oregon,Beaverton,OR,57342,Washington Trust Bank of Spokane,13-Feb-09,17-Aug-12 +Corn Belt Bank & Trust Co.,Pittsfield,IL,16500,The Carlinville National Bank,13-Feb-09,17-Aug-12 +Riverside Bank of the Gulf Coast,Cape Coral,FL,34563,TIB Bank,13-Feb-09,17-Aug-12 +Sherman County Bank,Loup City,NE,5431,Heritage Bank,13-Feb-09,17-Aug-12 +County Bank,Merced,CA,22574,Westamerica Bank,6-Feb-09,4-Sep-12 +Alliance Bank,Culver City,CA,23124,California Bank & Trust,6-Feb-09,16-Aug-12 +FirstBank Financial Services,McDonough,GA,57017,Regions Bank,6-Feb-09,16-Aug-12 +Ocala National Bank,Ocala,FL,26538,"CenterState Bank of Florida, N.A.",30-Jan-09,4-Sep-12 +Suburban FSB,Crofton,MD,30763,Bank of Essex,30-Jan-09,16-Aug-12 +MagnetBank,Salt Lake City,UT,58001,No Acquirer,30-Jan-09,16-Aug-12 +1st Centennial Bank,Redlands,CA,33025,First California Bank,23-Jan-09,16-Aug-12 +Bank of Clark County,Vancouver,WA,34959,Umpqua Bank,16-Jan-09,16-Aug-12 +National Bank of Commerce,Berkeley,IL,19733,Republic Bank of Chicago,16-Jan-09,16-Aug-12 +Sanderson State Bank,Sanderson,TX,11568,The Pecos County State Bank,12-Dec-08,4-Sep-12 +Haven Trust Bank,Duluth,GA,35379,"Branch Banking & Trust Company, (BB&T)",12-Dec-08,16-Aug-12 +First Georgia Community Bank,Jackson,GA,34301,United Bank,5-Dec-08,16-Aug-12 +PFF Bank & Trust ,Pomona,CA,28344,"U.S. Bank, N.A.",21-Nov-08,4-Jan-13 +Downey Savings & Loan,Newport Beach,CA,30968,"U.S. Bank, N.A.",21-Nov-08,4-Jan-13 +Community Bank,Loganville,GA,16490,Bank of Essex,21-Nov-08,4-Sep-12 +Security Pacific Bank,Los Angeles,CA,23595,Pacific Western Bank,7-Nov-08,28-Aug-12 +"Franklin Bank, SSB",Houston,TX,26870,Prosperity Bank,7-Nov-08,16-Aug-12 +Freedom Bank,Bradenton,FL,57930,Fifth Third Bank,31-Oct-08,16-Aug-12 +Alpha Bank & Trust,Alpharetta,GA,58241,"Stearns Bank, N.A.",24-Oct-08,16-Aug-12 +Meridian Bank,Eldred,IL,13789,National Bank,10-Oct-08,31-May-12 +Main Street Bank,Northville,MI,57654,Monroe Bank & Trust,10-Oct-08,16-Aug-12 +Washington Mutual Bank,Henderson,NV,32633,JP Morgan Chase Bank,25-Sep-08,16-Aug-12 +Ameribank,Northfork,WV,6782,The Citizens Savings Bank,19-Sep-08,16-Aug-12 +Silver State Bank,Henderson,NV,34194,Nevada State Bank,5-Sep-08,16-Aug-12 +Integrity Bank,Alpharetta,GA,35469,Regions Bank,29-Aug-08,16-Aug-12 +Columbian Bank & Trust,Topeka,KS,22728,Citizens Bank & Trust,22-Aug-08,16-Aug-12 +First Priority Bank,Bradenton,FL,57523,SunTrust Bank,1-Aug-08,16-Aug-12 +"First Heritage Bank, NA",Newport Beach,CA,57961,Mutual of Omaha Bank,25-Jul-08,28-Aug-12 +First National Bank of Nevada,Reno,NV,27011,Mutual of Omaha Bank,25-Jul-08,28-Aug-12 +IndyMac Bank,Pasadena,CA,29730,"OneWest Bank, FSB",11-Jul-08,28-Aug-12 +"First Integrity Bank, NA",Staples,MN,12736,First International Bank and Trust,30-May-08,28-Aug-12 +"ANB Financial, NA",Bentonville,AR,33901,Pulaski Bank and Trust Company,9-May-08,28-Aug-12 +Hume Bank,Hume,MO,1971,Security Bank,7-Mar-08,28-Aug-12 +Douglass National Bank,Kansas City,MO,24660,Liberty Bank and Trust Company,25-Jan-08,26-Oct-12 +Miami Valley Bank,Lakeview,OH,16848,The Citizens Banking Company,4-Oct-07,28-Aug-12 +NetBank,Alpharetta,GA,32575,ING DIRECT,28-Sep-07,28-Aug-12 +Metropolitan Savings Bank,Pittsburgh,PA,35353,Allegheny Valley Bank of Pittsburgh,2-Feb-07,27-Oct-10 +Bank of Ephraim,Ephraim,UT,1249,Far West Bank,25-Jun-04,9-Apr-08 +Reliance Bank,White Plains,NY,26778,Union State Bank,19-Mar-04,9-Apr-08 +Guaranty National Bank of Tallahassee,Tallahassee,FL,26838,Hancock Bank of Florida,12-Mar-04,5-Jun-12 +Dollar Savings Bank,Newark,NJ,31330,No Acquirer,14-Feb-04,9-Apr-08 +Pulaski Savings Bank,Philadelphia,PA,27203,Earthstar Bank,14-Nov-03,22-Jul-05 +First National Bank of Blanchardville,Blanchardville,WI,11639,The Park Bank,9-May-03,5-Jun-12 +Southern Pacific Bank,Torrance,CA,27094,Beal Bank,7-Feb-03,20-Oct-08 +Farmers Bank of Cheneyville,Cheneyville,LA,16445,Sabine State Bank & Trust,17-Dec-02,20-Oct-04 +Bank of Alamo,Alamo,TN,9961,No Acquirer,8-Nov-02,18-Mar-05 +AmTrade International Bank,Atlanta,GA,33784,No Acquirer,30-Sep-02,11-Sep-06 +Universal Federal Savings Bank,Chicago,IL,29355,Chicago Community Bank,27-Jun-02,9-Apr-08 +Connecticut Bank of Commerce,Stamford,CT,19183,Hudson United Bank,26-Jun-02,14-Feb-12 +New Century Bank,Shelby Township,MI,34979,No Acquirer,28-Mar-02,18-Mar-05 +Net 1st National Bank,Boca Raton,FL,26652,Bank Leumi USA,1-Mar-02,9-Apr-08 +"NextBank, NA",Phoenix,AZ,22314,No Acquirer,7-Feb-02,27-Aug-10 +Oakwood Deposit Bank Co.,Oakwood,OH,8966,The State Bank & Trust Company,1-Feb-02,25-Oct-12 +Bank of Sierra Blanca,Sierra Blanca,TX,22002,The Security State Bank of Pecos,18-Jan-02,6-Nov-03 +"Hamilton Bank, NA",Miami,FL,24382,Israel Discount Bank of New York,11-Jan-02,5-Jun-12 +Sinclair National Bank,Gravette,AR,34248,Delta Trust & Bank,7-Sep-01,10-Feb-04 +"Superior Bank, FSB",Hinsdale,IL,32646,"Superior Federal, FSB",27-Jul-01,5-Jun-12 +Malta National Bank,Malta,OH,6629,North Valley Bank,3-May-01,18-Nov-02 +First Alliance Bank & Trust Co.,Manchester,NH,34264,Southern New Hampshire Bank & Trust,2-Feb-01,18-Feb-03 +National State Bank of Metropolis,Metropolis,IL,3815,Banterra Bank of Marion,14-Dec-00,17-Mar-05 +Bank of Honolulu,Honolulu,HI,21029,Bank of the Orient,13-Oct-00,17-Mar-05 diff --git a/pandas/io/tests/data/failed_banklist.html b/pandas/io/tests/data/banklist.html similarity index 97% rename from pandas/io/tests/data/failed_banklist.html rename to pandas/io/tests/data/banklist.html index ea2a5c2799..8e15f37ccf 100644 --- a/pandas/io/tests/data/failed_banklist.html +++ b/pandas/io/tests/data/banklist.html @@ -455,8 +455,25 @@ + - + Douglas County Bank + Douglasville + GA + 21649 + Hamilton State Bank + April 26, 2013 + April 30, 2013 + + + Parkway Bank + Lenoir + NC + 57158 + CertusBank, National Association + April 26, 2013 + April 30, 2013 + Chipola Community Bank Marianna @@ -5230,7 +5247,7 @@ Last Updated - 04/23/2013 + 04/30/2013 diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index d0468026ca..6e2f6ec00d 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -2,33 +2,48 @@ import re from cStringIO import StringIO from unittest import TestCase +import collections +import numbers +from urllib2 import urlopen +from contextlib import closing +import warnings import nose import numpy as np +from numpy.random import rand from numpy.testing.decorators import slow -from pandas.io.html import read_html, import_module -from pandas import DataFrame, MultiIndex -from pandas.util.testing import assert_frame_equal, network +from pandas.io.html import read_html, import_module, _parse, _LxmlFrameParser +from pandas.io.html import _BeautifulSoupHtml5LibFrameParser +from pandas.io.html import _BeautifulSoupLxmlFrameParser, _remove_whitespace +from pandas import DataFrame, MultiIndex, read_csv, Timestamp +from pandas.util.testing import assert_frame_equal, network, get_data_path +from pandas.util.testing import makeCustomDataframe as mkdf -def _skip_if_no_parser(): +def _have_module(module_name): try: - import_module('lxml') + import_module(module_name) + return True except ImportError: - try: - import_module('bs4') - except ImportError: - raise nose.SkipTest + return False + +def _skip_if_no(module_name): + if not _have_module(module_name): + raise nose.SkipTest -DATA_PATH = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') + +def _skip_if_none(module_names): + if isinstance(module_names, basestring): + _skip_if_no(module_names) + else: + if not any(_have_module(module_name) for module_name in module_names): + raise nose.SkipTest -def _run_read_html(*args, **kwargs): - _skip_if_no_parser() - return read_html(*args, **kwargs) +DATA_PATH = get_data_path() def isframe(x): @@ -47,14 +62,36 @@ def assert_framelist_equal(list1, list2): assert not frame_i.empty, 'frames are both empty' +def _run_read_html(parser, io, match='.+', flavor='bs4', header=None, + index_col=None, skiprows=None, infer_types=False, + attrs=None): + if isinstance(skiprows, numbers.Integral) and skiprows < 0: + raise AssertionError('cannot skip rows starting from the end of the ' + 'data (you passed a negative value)') + return _parse(parser, io, match, flavor, header, index_col, skiprows, + infer_types, attrs) + + class TestLxmlReadHtml(TestCase): + def test_to_html_compat(self): + df = mkdf(4, 3, data_gen_f=lambda *args: rand(), c_idx_names=False, + r_idx_names=False).applymap('{0:.3f}'.format) + out = df.to_html() + res = self.run_read_html(out, attrs={'class': 'dataframe'}, + index_col=0)[0] + print df.dtypes + print res.dtypes + assert_frame_equal(res, df) + def setUp(self): self.spam_data = os.path.join(DATA_PATH, 'spam.html') - self.banklist_data = os.path.join(DATA_PATH, 'failed_banklist.html') + self.banklist_data = os.path.join(DATA_PATH, 'banklist.html') def run_read_html(self, *args, **kwargs): kwargs['flavor'] = 'lxml' - return _run_read_html(*args, **kwargs) + _skip_if_no('lxml') + parser = _LxmlFrameParser + return _run_read_html(parser, *args, **kwargs) @network def test_banklist_url(self): @@ -85,13 +122,31 @@ def test_banklist(self): @slow def test_banklist_header(self): + def try_remove_ws(x): + try: + return _remove_whitespace(x) + except AttributeError: + return x + df = self.run_read_html(self.banklist_data, 'Metcalf', - attrs={'id': 'table'}, header=0, skiprows=1)[0] - self.assertFalse(df.empty) - cols = ['Bank Name', 'City', 'State', 'CERT #', - 'Acquiring Institution', 'Closing Date', 'Updated Date'] - self.assertListEqual(df.columns.values.tolist(), cols) - self.assertEqual(df.shape[0], 499) + attrs={'id': 'table'}, infer_types=False)[0] + ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'), + converters={'Closing Date': Timestamp, + 'Updated Date': Timestamp}) + self.assertNotEqual(df.shape, ground_truth.shape) + self.assertRaises(AssertionError, assert_frame_equal, df, + ground_truth.applymap(try_remove_ws)) + + @slow + def test_gold_canyon(self): + gc = 'Gold Canyon' + with open(self.banklist_data, 'r') as f: + raw_text = f.read() + + self.assertIn(gc, raw_text) + df = self.run_read_html(self.banklist_data, 'Gold Canyon', + attrs={'id': 'table'}, infer_types=False)[0] + self.assertNotIn(gc, df.to_string()) def test_spam(self): df1 = self.run_read_html(self.spam_data, '.*Water.*', @@ -99,8 +154,10 @@ def test_spam(self): df2 = self.run_read_html(self.spam_data, 'Unit', infer_types=False) assert_framelist_equal(df1, df2) + print df1[0] - self.assertEqual(df1[0].ix[0, 0], 'Nutrient') + self.assertEqual(df1[0].ix[0, 0], 'Proximates') + self.assertEqual(df1[0].columns[0], 'Nutrient') def test_spam_no_match(self): dfs = self.run_read_html(self.spam_data) @@ -113,8 +170,9 @@ def test_banklist_no_match(self): self.assertIsInstance(df, DataFrame) def test_spam_header(self): - df = self.run_read_html(self.spam_data, '.*Water.*', header=0)[0] - self.assertEqual(df.columns[0], 'Nutrient') + df = self.run_read_html(self.spam_data, '.*Water.*', header=0) + df = self.run_read_html(self.spam_data, '.*Water.*', header=1)[0] + self.assertEqual(df.columns[0], 'Water') self.assertFalse(df.empty) def test_skiprows_int(self): @@ -179,26 +237,20 @@ def test_index(self): df2 = self.run_read_html(self.spam_data, 'Unit', index_col=0) assert_framelist_equal(df1, df2) - def test_header(self): - df1 = self.run_read_html(self.spam_data, '.*Water.*', header=0) - df2 = self.run_read_html(self.spam_data, 'Unit', header=0) - assert_framelist_equal(df1, df2) - self.assertEqual(df1[0].columns[0], 'Nutrient') - def test_header_and_index(self): - df1 = self.run_read_html(self.spam_data, '.*Water.*', header=0, + df1 = self.run_read_html(self.spam_data, '.*Water.*', header=1, index_col=0) - df2 = self.run_read_html(self.spam_data, 'Unit', header=0, index_col=0) + df2 = self.run_read_html(self.spam_data, 'Unit', header=1, index_col=0) assert_framelist_equal(df1, df2) def test_infer_types(self): - df1 = self.run_read_html(self.spam_data, '.*Water.*', header=0, - index_col=0, infer_types=False) - df2 = self.run_read_html(self.spam_data, 'Unit', header=0, index_col=0, + df1 = self.run_read_html(self.spam_data, '.*Water.*', index_col=0, + infer_types=False) + df2 = self.run_read_html(self.spam_data, 'Unit', index_col=0, infer_types=False) assert_framelist_equal(df1, df2) - df2 = self.run_read_html(self.spam_data, 'Unit', header=0, index_col=0, + df2 = self.run_read_html(self.spam_data, 'Unit', index_col=0, infer_types=True) self.assertRaises(AssertionError, assert_framelist_equal, df1, df2) @@ -304,21 +356,137 @@ def test_negative_skiprows_banklist(self): @slow def test_multiple_matches(self): - url = self.banklist_data - dfs = self.run_read_html(url, match=r'Florida') - self.assertIsInstance(dfs, list) + url = 'http://code.google.com/p/pythonxy/wiki/StandardPlugins' + dfs = self.run_read_html(url, match='Python', + attrs={'class': 'wikitable'}) self.assertGreater(len(dfs), 1) - for df in dfs: - self.assertIsInstance(df, DataFrame) + + @network + def test_pythonxy_plugins_table(self): + url = 'http://code.google.com/p/pythonxy/wiki/StandardPlugins' + dfs = self.run_read_html(url, match='Python', + attrs={'class': 'wikitable'}) + zz = [df.iloc[0, 0] for df in dfs] + self.assertListEqual(sorted(zz), sorted(['Python', 'SciTE'])) def test_invalid_flavor(): url = 'google.com' - nose.tools.assert_raises(AssertionError, _run_read_html, url, 'google', + nose.tools.assert_raises(AssertionError, read_html, url, 'google', flavor='not a* valid**++ flaver') -class TestBs4ReadHtml(TestLxmlReadHtml): +@slow +class TestBs4LxmlParser(TestLxmlReadHtml): + def test(self): + pass + def run_read_html(self, *args, **kwargs): kwargs['flavor'] = 'bs4' - return _run_read_html(*args, **kwargs) + _skip_if_no('lxml') + parser = _BeautifulSoupLxmlFrameParser + return _run_read_html(parser, *args, **kwargs) + + +@slow +class TestBs4Html5LibParser(TestBs4LxmlParser): + def test(self): + pass + + def run_read_html(self, *args, **kwargs): + kwargs['flavor'] = 'bs4' + _skip_if_no('html5lib') + parser = _BeautifulSoupHtml5LibFrameParser + return _run_read_html(parser, *args, **kwargs) + + @slow + def test_banklist_header(self): + def try_remove_ws(x): + try: + return _remove_whitespace(x) + except AttributeError: + return x + + df = self.run_read_html(self.banklist_data, 'Metcalf', + attrs={'id': 'table'}, infer_types=True)[0] + ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'), + converters={'Updated Date': Timestamp, + 'Closing Date': Timestamp}) + # these will not + self.assertTupleEqual(df.shape, ground_truth.shape) + old = ['First Vietnamese American Bank In Vietnamese', + 'Westernbank Puerto Rico En Espanol', + 'R-G Premier Bank of Puerto Rico En Espanol', + 'Eurobank En Espanol', 'Sanderson State Bank En Espanol', + 'Washington Mutual Bank (Including its subsidiary Washington ' + 'Mutual Bank FSB)', + 'Silver State Bank En Espanol', + 'AmTrade International BankEn Espanol', + 'Hamilton Bank, NA En Espanol', + 'The Citizens Savings BankPioneer Community Bank, Inc.'] + new = ['First Vietnamese American Bank', 'Westernbank Puerto Rico', + 'R-G Premier Bank of Puerto Rico', 'Eurobank', + 'Sanderson State Bank', 'Washington Mutual Bank', + 'Silver State Bank', 'AmTrade International Bank', + 'Hamilton Bank, NA', 'The Citizens Savings Bank'] + dfnew = df.applymap(try_remove_ws).replace(old, new) + gtnew = ground_truth.applymap(try_remove_ws) + assert_frame_equal(dfnew, gtnew) + + @slow + def test_gold_canyon(self): + gc = 'Gold Canyon' + with open(self.banklist_data, 'r') as f: + raw_text = f.read() + + self.assertIn(gc, raw_text) + df = self.run_read_html(self.banklist_data, 'Gold Canyon', + attrs={'id': 'table'}, infer_types=False)[0] + self.assertIn(gc, df.to_string()) + + +def get_elements_from_url(url, flavor, element='table'): + _skip_if_no('bs4') + _skip_if_no(flavor) + from bs4 import BeautifulSoup, SoupStrainer + strainer = SoupStrainer(element) + with closing(urlopen(url)) as f: + soup = BeautifulSoup(f, features=flavor, parse_only=strainer) + return soup.find_all(element) + + +@slow +def test_bs4_finds_tables(): + url = ('http://ndb.nal.usda.gov/ndb/foods/show/1732?fg=&man=&' + 'lfacet=&format=&count=&max=25&offset=&sort=&qlookup=spam') + flavors = 'lxml', 'html5lib' + with warnings.catch_warnings(): + warnings.filterwarnings('ignore') + + for flavor in flavors: + assert get_elements_from_url(url, flavor, 'table') + + +def get_lxml_elements(url, element): + + _skip_if_no('lxml') + from lxml.html import parse + doc = parse(url) + return doc.xpath('.//{0}'.format(element)) + + +@slow +def test_lxml_finds_tables(): + url = ('http://ndb.nal.usda.gov/ndb/foods/show/1732?fg=&man=&' + 'lfacet=&format=&count=&max=25&offset=&sort=&qlookup=spam') + assert get_lxml_elements(url, 'table') + + +@slow +def test_lxml_finds_tbody(): + url = ('http://ndb.nal.usda.gov/ndb/foods/show/1732?fg=&man=&' + 'lfacet=&format=&count=&max=25&offset=&sort=&qlookup=spam') + assert get_lxml_elements(url, 'tbody') + + + diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 86387989a7..f38fe61d45 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -126,16 +126,18 @@ def assert_almost_equal(a, b, check_less_precise = False): return assert_dict_equal(a, b) if isinstance(a, basestring): - assert a == b, (a, b) + assert a == b, "{0} != {1}".format(a, b) return True if isiterable(a): np.testing.assert_(isiterable(b)) - assert(len(a) == len(b)) + na, nb = len(a), len(b) + assert na == nb, "{0} != {1}".format(na, nb) + if np.array_equal(a, b): return True else: - for i in xrange(len(a)): + for i in xrange(na): assert_almost_equal(a[i], b[i], check_less_precise) return True @@ -169,7 +171,7 @@ def assert_almost_equal(a, b, check_less_precise = False): np.testing.assert_almost_equal( 1, a / b, decimal=decimal, err_msg=err_msg(a, b), verbose=False) else: - assert(a == b) + assert a == b, "%s != %s" % (a, b) def is_sorted(seq):