diff --git a/pandas/core/format.py b/pandas/core/format.py index 5a4b7a4d59..aa01f9253f 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -151,6 +151,7 @@ class DataFrameFormatter(object): self.to_string() : console-friendly tabular output self.to_html() : html table + self.to_latex() : LaTeX tabular environment table """ @@ -189,63 +190,103 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, else: self.columns = frame.columns - def to_string(self, force_unicode=False): + def _to_str_columns(self, force_unicode=False): """ - Render a DataFrame to a console-friendly tabular output. + Render a DataFrame to a list of columns (as lists of strings). """ frame = self.frame - to_write = [] + # may include levels names also + str_index = self._get_formatted_index() + str_columns = self._get_formatted_column_labels() - if len(frame.columns) == 0 or len(frame.index) == 0: - info_line = (u'Empty %s\nColumns: %s\nIndex: %s' - % (type(self.frame).__name__, - frame.columns, frame.index)) - to_write.append(info_line) - else: - # may include levels names also - str_index = self._get_formatted_index() - str_columns = self._get_formatted_column_labels() - - stringified = [] - - for i, c in enumerate(self.columns): - if self.header: - fmt_values = self._format_col(i) - cheader = str_columns[i] - max_len = max(max(_strlen(x) for x in fmt_values), - max(len(x) for x in cheader)) - if self.justify == 'left': - cheader = [x.ljust(max_len) for x in cheader] - else: - cheader = [x.rjust(max_len) for x in cheader] - fmt_values = cheader + fmt_values - stringified.append(_make_fixed_width(fmt_values, - self.justify)) - else: - stringified = [_make_fixed_width(self._format_col(i), - self.justify) - for i, c in enumerate(self.columns)] + stringified = [] - if self.index: - to_write.append(adjoin(1, str_index, *stringified)) + for i, c in enumerate(self.columns): + if self.header: + fmt_values = self._format_col(i) + cheader = str_columns[i] + max_len = max(max(_strlen(x) for x in fmt_values), + max(len(x) for x in cheader)) + if self.justify == 'left': + cheader = [x.ljust(max_len) for x in cheader] + else: + cheader = [x.rjust(max_len) for x in cheader] + fmt_values = cheader + fmt_values + stringified.append(_make_fixed_width(fmt_values, + self.justify)) else: - to_write.append(adjoin(1, *stringified)) + stringified = [_make_fixed_width(self._format_col(i), + self.justify) + for i, c in enumerate(self.columns)] + + strcols = stringified + if self.index: + strcols.insert(0, str_index) if not py3compat.PY3: if force_unicode: - to_write = [unicode(s) for s in to_write] + strcols = map(lambda col: map(unicode, col), strcols) else: # generally everything is plain strings, which has ascii # encoding. problem is when there is a char with value over 127 # - everything then gets converted to unicode. try: - for s in to_write: - str(s) + map(lambda col: map(str, col), strcols) except UnicodeError: - to_write = [unicode(s) for s in to_write] + strcols = map(lambda col: map(unicode, col), strcols) + + return strcols + + def to_string(self, force_unicode=False): + """ + Render a DataFrame to a console-friendly tabular output. + """ + frame = self.frame + + if len(frame.columns) == 0 or len(frame.index) == 0: + info_line = (u'Empty %s\nColumns: %s\nIndex: %s' + % (type(self.frame).__name__, + frame.columns, frame.index)) + text = info_line + else: + strcols = self._to_str_columns(force_unicode) + text = adjoin(1, *strcols) + + self.buf.writelines(text) + + def to_latex(self, force_unicode=False, column_format=None): + """ + Render a DataFrame to a LaTeX tabular environment output. + """ + frame = self.frame + + if len(frame.columns) == 0 or len(frame.index) == 0: + info_line = (u'Empty %s\nColumns: %s\nIndex: %s' + % (type(self.frame).__name__, + frame.columns, frame.index)) + strcols = [[info_line]] + else: + strcols = self._to_str_columns(force_unicode) + + if column_format is None: + column_format = '|l|%s|' % '|'.join('c' for _ in strcols) + else: + assert isinstance(column_format, str) + + self.buf.write('\\begin{tabular}{%s}\n' % column_format) + self.buf.write('\\hline\n') + + nlevels = frame.index.nlevels + for i, row in enumerate(izip(*strcols)): + if i == nlevels: + self.buf.write('\\hline\n') # End of header + crow = [(x.replace('_', '\\_').replace('%', '\\%').replace('&', '\\&') if x else '{}') for x in row] + self.buf.write(' & '.join(crow)) + self.buf.write(' \\\\\n') - self.buf.writelines(to_write) + self.buf.write('\\hline\n') + self.buf.write('\\end{tabular}\n') def _format_col(self, i): col = self.columns[i] @@ -255,7 +296,7 @@ def _format_col(self, i): na_rep=self.na_rep, space=self.col_space) - def to_html(self): + def to_html(self, classes=None): """ Render a DataFrame to a html table. """ @@ -291,7 +332,13 @@ def write_tr(l, indent=0, indent_delta=4, header=False): indent_delta = 2 frame = self.frame - write('', indent) + _classes = ['dataframe'] # Default class. + if classes is not None: + if isinstance(classes, str): + classes = classes.split() + assert isinstance(classes, (list, tuple)) + _classes.extend(classes) + write('
' % ' '.join(_classes), indent) def _column_header(): row = [''] * (frame.index.nlevels - 1) @@ -349,12 +396,16 @@ def _maybe_bold_row(x): fmt_values[i] = self._format_col(i) # write values + index_formatter = self.formatters.get('__index__', None) for i in range(len(frame)): row = [] + index_value = frame.index[i] + if index_formatter: + index_value = index_formatter(index_value) if isinstance(frame.index, MultiIndex): - row.extend(_maybe_bold_row(frame.index[i])) + row.extend(_maybe_bold_row(index_value)) else: - row.append(_maybe_bold_row(frame.index[i])) + row.append(_maybe_bold_row(index_value)) for j in range(len(self.columns)): row.append(fmt_values[j][i]) write_tr(row, indent, indent_delta) @@ -409,6 +460,7 @@ def has_column_names(self): return _has_names(self.frame.columns) def _get_formatted_index(self): + # Note: this is only used by to_string(), not by to_html(). index = self.frame.index columns = self.frame.columns diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dc5c6c0f67..d32af46f7d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1240,7 +1240,7 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, - bold_rows=True): + bold_rows=True, classes=None): """ to_html-specific options bold_rows : boolean, default True @@ -1263,7 +1263,33 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, bold_rows=bold_rows, sparsify=sparsify, index_names=index_names) - formatter.to_html() + formatter.to_html(classes=classes) + + if buf is None: + return formatter.buf.getvalue() + + @Appender(fmt.docstring_to_string, indents=1) + def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None, + header=True, index=True, na_rep='NaN', formatters=None, + float_format=None, sparsify=None, index_names=True, + bold_rows=True): + """ + to_latex-specific options + bold_rows : boolean, default True + Make the row labels bold in the output + + Render a DataFrame to a tabular environment table. + You can splice this into a LaTeX document. + """ + formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns, + col_space=col_space, na_rep=na_rep, + header=header, index=index, + formatters=formatters, + float_format=float_format, + bold_rows=bold_rows, + sparsify=sparsify, + index_names=index_names) + formatter.to_latex() if buf is None: return formatter.buf.getvalue() diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 1484a41b91..fd16853296 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -6,6 +6,7 @@ import os import sys import unittest +from textwrap import dedent from numpy import nan from numpy.random import randn @@ -413,7 +414,7 @@ def test_to_html_multiindex(self): names=['CL0', 'CL1']) df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html() - expected = ('
\n' + expected = ('
\n' ' \n' ' \n' '
' @@ -451,7 +452,7 @@ def test_to_html_multiindex(self): np.mod(range(4), 2))) df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html() - expected = ('
CL0
\n' + expected = ('
\n' ' \n' ' \n' ' \n' @@ -495,6 +496,27 @@ def test_repr_html(self): fmt.reset_printoptions() + def test_to_html_with_classes(self): + df = pandas.DataFrame() + result = df.to_html(classes="sortable draggable") + expected = dedent(""" + +
+ + + + + + +
Index([], dtype=object)Empty DataFrame
+ + """).strip() + self.assertEqual(result, expected) + + result = df.to_html(classes=["sortable", "draggable"]) + self.assertEqual(result, expected) + + class TestSeriesFormatting(unittest.TestCase): def setUp(self):