From 684006dade006ea20571143d5b3a2332ef2294fd Mon Sep 17 00:00:00 2001 From: Martin Blais Date: Sat, 4 Aug 2012 20:01:15 -0400 Subject: [PATCH 1/5] Formatting: Added classes argument to DataFrame.to_html(). --- pandas/core/format.py | 10 ++++++++-- pandas/core/frame.py | 4 ++-- pandas/tests/test_format.py | 26 ++++++++++++++++++++++++-- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index 5a4b7a4d59..8b2fc8362a 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -255,7 +255,7 @@ def _format_col(self, i): na_rep=self.na_rep, space=self.col_space) - def to_html(self): + def to_html(self, classes=None): """ Render a DataFrame to a html table. """ @@ -291,7 +291,13 @@ def write_tr(l, indent=0, indent_delta=4, header=False): indent_delta = 2 frame = self.frame - write('', indent) + _classes = ['dataframe'] # Default class. + if classes is not None: + if isinstance(classes, str): + classes = classes.split() + assert isinstance(classes, (list, tuple)) + _classes.extend(classes) + write('
' % ' '.join(_classes), indent) def _column_header(): row = [''] * (frame.index.nlevels - 1) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dc5c6c0f67..1a09ba6a84 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1240,7 +1240,7 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, - bold_rows=True): + bold_rows=True, classes=None): """ to_html-specific options bold_rows : boolean, default True @@ -1263,7 +1263,7 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, bold_rows=bold_rows, sparsify=sparsify, index_names=index_names) - formatter.to_html() + formatter.to_html(classes=classes) if buf is None: return formatter.buf.getvalue() diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 1484a41b91..fd16853296 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -6,6 +6,7 @@ import os import sys import unittest +from textwrap import dedent from numpy import nan from numpy.random import randn @@ -413,7 +414,7 @@ def test_to_html_multiindex(self): names=['CL0', 'CL1']) df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html() - expected = ('
\n' + expected = ('
\n' ' \n' ' \n' '
' @@ -451,7 +452,7 @@ def test_to_html_multiindex(self): np.mod(range(4), 2))) df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html() - expected = ('
CL0
\n' + expected = ('
\n' ' \n' ' \n' ' \n' @@ -495,6 +496,27 @@ def test_repr_html(self): fmt.reset_printoptions() + def test_to_html_with_classes(self): + df = pandas.DataFrame() + result = df.to_html(classes="sortable draggable") + expected = dedent(""" + +
+ + + + + + +
Index([], dtype=object)Empty DataFrame
+ + """).strip() + self.assertEqual(result, expected) + + result = df.to_html(classes=["sortable", "draggable"]) + self.assertEqual(result, expected) + + class TestSeriesFormatting(unittest.TestCase): def setUp(self): From e7d329f2502a87324aa3c88101224004b90c8a45 Mon Sep 17 00:00:00 2001 From: Martin Blais Date: Sat, 4 Aug 2012 20:33:39 -0400 Subject: [PATCH 2/5] DataFormatter: Added support for formatting the index in to_html() via a special '__index__' key to the formatters dict. --- pandas/core/format.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index 8b2fc8362a..01b6f3110e 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -355,12 +355,16 @@ def _maybe_bold_row(x): fmt_values[i] = self._format_col(i) # write values + index_formatter = self.formatters.get('__index__', None) for i in range(len(frame)): row = [] + index_value = frame.index[i] + if index_formatter: + index_value = index_formatter(index_value) if isinstance(frame.index, MultiIndex): - row.extend(_maybe_bold_row(frame.index[i])) + row.extend(_maybe_bold_row(index_value)) else: - row.append(_maybe_bold_row(frame.index[i])) + row.append(_maybe_bold_row(index_value)) for j in range(len(self.columns)): row.append(fmt_values[j][i]) write_tr(row, indent, indent_delta) @@ -415,6 +419,7 @@ def has_column_names(self): return _has_names(self.frame.columns) def _get_formatted_index(self): + # Note: this is only used by to_string(), not by to_html(). index = self.frame.index columns = self.frame.columns From c68ca7e4ff68fd2e06a794c777243ff9f67cb502 Mon Sep 17 00:00:00 2001 From: Martin Blais Date: Sat, 4 Aug 2012 20:55:42 -0400 Subject: [PATCH 3/5] DataFormatter: Refactored to_string() in order to let the future to_latex() take advantage of it. --- pandas/core/format.py | 96 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 40 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index 01b6f3110e..f825cf5ead 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -189,63 +189,79 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, else: self.columns = frame.columns - def to_string(self, force_unicode=False): + def _to_str_columns(self, force_unicode=False): """ - Render a DataFrame to a console-friendly tabular output. + Render a DataFrame to a list of columns (as lists of strings). """ frame = self.frame - to_write = [] + # may include levels names also + str_index = self._get_formatted_index() + str_columns = self._get_formatted_column_labels() - if len(frame.columns) == 0 or len(frame.index) == 0: - info_line = (u'Empty %s\nColumns: %s\nIndex: %s' - % (type(self.frame).__name__, - frame.columns, frame.index)) - to_write.append(info_line) - else: - # may include levels names also - str_index = self._get_formatted_index() - str_columns = self._get_formatted_column_labels() - - stringified = [] - - for i, c in enumerate(self.columns): - if self.header: - fmt_values = self._format_col(i) - cheader = str_columns[i] - max_len = max(max(_strlen(x) for x in fmt_values), - max(len(x) for x in cheader)) - if self.justify == 'left': - cheader = [x.ljust(max_len) for x in cheader] - else: - cheader = [x.rjust(max_len) for x in cheader] - fmt_values = cheader + fmt_values - stringified.append(_make_fixed_width(fmt_values, - self.justify)) - else: - stringified = [_make_fixed_width(self._format_col(i), - self.justify) - for i, c in enumerate(self.columns)] + stringified = [] - if self.index: - to_write.append(adjoin(1, str_index, *stringified)) + for i, c in enumerate(self.columns): + if self.header: + fmt_values = self._format_col(i) + cheader = str_columns[i] + max_len = max(max(_strlen(x) for x in fmt_values), + max(len(x) for x in cheader)) + if self.justify == 'left': + cheader = [x.ljust(max_len) for x in cheader] + else: + cheader = [x.rjust(max_len) for x in cheader] + fmt_values = cheader + fmt_values + stringified.append(_make_fixed_width(fmt_values, + self.justify)) else: - to_write.append(adjoin(1, *stringified)) + stringified = [_make_fixed_width(self._format_col(i), + self.justify) + for i, c in enumerate(self.columns)] + + strcols = stringified + if self.index: + strcols.insert(0, str_index) if not py3compat.PY3: if force_unicode: - to_write = [unicode(s) for s in to_write] + strcols = map(lambda col: map(unicode, col), strcols) else: # generally everything is plain strings, which has ascii # encoding. problem is when there is a char with value over 127 # - everything then gets converted to unicode. try: - for s in to_write: - str(s) + map(lambda col: map(str, col), strcols) except UnicodeError: - to_write = [unicode(s) for s in to_write] + strcols = map(lambda col: map(unicode, col), strcols) + + return strcols + + def to_string(self, force_unicode=False): + """ + Render a DataFrame to a console-friendly tabular output. + """ + frame = self.frame + + if len(frame.columns) == 0 or len(frame.index) == 0: + info_line = (u'Empty %s\nColumns: %s\nIndex: %s' + % (type(self.frame).__name__, + frame.columns, frame.index)) + text = info_line + else: + strcols = self._to_str_columns(force_unicode) + text = adjoin(1, *strcols) + + self.buf.writelines(text) + + + + + + + + - self.buf.writelines(to_write) def _format_col(self, i): col = self.columns[i] From aa2748aa13f7727d608c437cd55cf64c2f57dcde Mon Sep 17 00:00:00 2001 From: Martin Blais Date: Sat, 4 Aug 2012 23:10:28 -0400 Subject: [PATCH 4/5] DataFrameFormatter: Implemented really basic (but working) LaTeX serialization support in DataFrame.to_latex(). --- pandas/core/format.py | 33 +++++++++++++++++++++++++++++---- pandas/core/frame.py | 26 ++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index f825cf5ead..3d2d4e75d7 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -151,6 +151,7 @@ class DataFrameFormatter(object): self.to_string() : console-friendly tabular output self.to_html() : html table + self.to_latex() : LaTeX tabular environment table """ @@ -251,17 +252,41 @@ def to_string(self, force_unicode=False): else: strcols = self._to_str_columns(force_unicode) text = adjoin(1, *strcols) - - self.buf.writelines(text) - - + self.buf.writelines(text) + def to_latex(self, force_unicode=False, column_format=None): + """ + Render a DataFrame to a LaTeX tabular environment output. + """ + frame = self.frame + if len(frame.columns) == 0 or len(frame.index) == 0: + info_line = (u'Empty %s\nColumns: %s\nIndex: %s' + % (type(self.frame).__name__, + frame.columns, frame.index)) + strcols = [[info_line]] + else: + strcols = self._to_str_columns(force_unicode) + if column_format is None: + column_format = '|l|%s|' % '|'.join('c' for _ in strcols) + else: + assert isinstance(column_format, str) + self.buf.write('\\begin{tabular}{%s}\n' % column_format) + self.buf.write('\\hline\n') + nlevels = frame.index.nlevels + for i, row in enumerate(izip(*strcols)): + if i == nlevels: + self.buf.write('\\hline\n') # End of header + crow = [(x.replace('_', '\\_') if x else '{}') for x in row] + self.buf.write(' & '.join(crow)) + self.buf.write(' \\\\\n') + self.buf.write('\\hline\n') + self.buf.write('\\end{tabular}\n') def _format_col(self, i): col = self.columns[i] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1a09ba6a84..d32af46f7d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1268,6 +1268,32 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, if buf is None: return formatter.buf.getvalue() + @Appender(fmt.docstring_to_string, indents=1) + def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None, + header=True, index=True, na_rep='NaN', formatters=None, + float_format=None, sparsify=None, index_names=True, + bold_rows=True): + """ + to_latex-specific options + bold_rows : boolean, default True + Make the row labels bold in the output + + Render a DataFrame to a tabular environment table. + You can splice this into a LaTeX document. + """ + formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns, + col_space=col_space, na_rep=na_rep, + header=header, index=index, + formatters=formatters, + float_format=float_format, + bold_rows=bold_rows, + sparsify=sparsify, + index_names=index_names) + formatter.to_latex() + + if buf is None: + return formatter.buf.getvalue() + def info(self, verbose=True, buf=None): """ Concise summary of a DataFrame, used in __repr__ when very large. From bfee97ffcc465f7dfc72e36bac230c772a79a06c Mon Sep 17 00:00:00 2001 From: Martin Blais Date: Fri, 10 Aug 2012 20:27:27 -0400 Subject: [PATCH 5/5] BUG: Converted more unsafe characters for LaTeX. --- pandas/core/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index 3d2d4e75d7..aa01f9253f 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -281,7 +281,7 @@ def to_latex(self, force_unicode=False, column_format=None): for i, row in enumerate(izip(*strcols)): if i == nlevels: self.buf.write('\\hline\n') # End of header - crow = [(x.replace('_', '\\_') if x else '{}') for x in row] + crow = [(x.replace('_', '\\_').replace('%', '\\%').replace('&', '\\&') if x else '{}') for x in row] self.buf.write(' & '.join(crow)) self.buf.write(' \\\\\n')