diff --git a/pandas/core/format.py b/pandas/core/format.py
index 5a4b7a4d59..aa01f9253f 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -151,6 +151,7 @@ class DataFrameFormatter(object):
self.to_string() : console-friendly tabular output
self.to_html() : html table
+ self.to_latex() : LaTeX tabular environment table
"""
@@ -189,63 +190,103 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
else:
self.columns = frame.columns
- def to_string(self, force_unicode=False):
+ def _to_str_columns(self, force_unicode=False):
"""
- Render a DataFrame to a console-friendly tabular output.
+ Render a DataFrame to a list of columns (as lists of strings).
"""
frame = self.frame
- to_write = []
+ # may include levels names also
+ str_index = self._get_formatted_index()
+ str_columns = self._get_formatted_column_labels()
- if len(frame.columns) == 0 or len(frame.index) == 0:
- info_line = (u'Empty %s\nColumns: %s\nIndex: %s'
- % (type(self.frame).__name__,
- frame.columns, frame.index))
- to_write.append(info_line)
- else:
- # may include levels names also
- str_index = self._get_formatted_index()
- str_columns = self._get_formatted_column_labels()
-
- stringified = []
-
- for i, c in enumerate(self.columns):
- if self.header:
- fmt_values = self._format_col(i)
- cheader = str_columns[i]
- max_len = max(max(_strlen(x) for x in fmt_values),
- max(len(x) for x in cheader))
- if self.justify == 'left':
- cheader = [x.ljust(max_len) for x in cheader]
- else:
- cheader = [x.rjust(max_len) for x in cheader]
- fmt_values = cheader + fmt_values
- stringified.append(_make_fixed_width(fmt_values,
- self.justify))
- else:
- stringified = [_make_fixed_width(self._format_col(i),
- self.justify)
- for i, c in enumerate(self.columns)]
+ stringified = []
- if self.index:
- to_write.append(adjoin(1, str_index, *stringified))
+ for i, c in enumerate(self.columns):
+ if self.header:
+ fmt_values = self._format_col(i)
+ cheader = str_columns[i]
+ max_len = max(max(_strlen(x) for x in fmt_values),
+ max(len(x) for x in cheader))
+ if self.justify == 'left':
+ cheader = [x.ljust(max_len) for x in cheader]
+ else:
+ cheader = [x.rjust(max_len) for x in cheader]
+ fmt_values = cheader + fmt_values
+ stringified.append(_make_fixed_width(fmt_values,
+ self.justify))
else:
- to_write.append(adjoin(1, *stringified))
+ stringified = [_make_fixed_width(self._format_col(i),
+ self.justify)
+ for i, c in enumerate(self.columns)]
+
+ strcols = stringified
+ if self.index:
+ strcols.insert(0, str_index)
if not py3compat.PY3:
if force_unicode:
- to_write = [unicode(s) for s in to_write]
+ strcols = map(lambda col: map(unicode, col), strcols)
else:
# generally everything is plain strings, which has ascii
# encoding. problem is when there is a char with value over 127
# - everything then gets converted to unicode.
try:
- for s in to_write:
- str(s)
+ map(lambda col: map(str, col), strcols)
except UnicodeError:
- to_write = [unicode(s) for s in to_write]
+ strcols = map(lambda col: map(unicode, col), strcols)
+
+ return strcols
+
+ def to_string(self, force_unicode=False):
+ """
+ Render a DataFrame to a console-friendly tabular output.
+ """
+ frame = self.frame
+
+ if len(frame.columns) == 0 or len(frame.index) == 0:
+ info_line = (u'Empty %s\nColumns: %s\nIndex: %s'
+ % (type(self.frame).__name__,
+ frame.columns, frame.index))
+ text = info_line
+ else:
+ strcols = self._to_str_columns(force_unicode)
+ text = adjoin(1, *strcols)
+
+ self.buf.writelines(text)
+
+ def to_latex(self, force_unicode=False, column_format=None):
+ """
+ Render a DataFrame to a LaTeX tabular environment output.
+ """
+ frame = self.frame
+
+ if len(frame.columns) == 0 or len(frame.index) == 0:
+ info_line = (u'Empty %s\nColumns: %s\nIndex: %s'
+ % (type(self.frame).__name__,
+ frame.columns, frame.index))
+ strcols = [[info_line]]
+ else:
+ strcols = self._to_str_columns(force_unicode)
+
+ if column_format is None:
+ column_format = '|l|%s|' % '|'.join('c' for _ in strcols)
+ else:
+ assert isinstance(column_format, str)
+
+ self.buf.write('\\begin{tabular}{%s}\n' % column_format)
+ self.buf.write('\\hline\n')
+
+ nlevels = frame.index.nlevels
+ for i, row in enumerate(izip(*strcols)):
+ if i == nlevels:
+ self.buf.write('\\hline\n') # End of header
+ crow = [(x.replace('_', '\\_').replace('%', '\\%').replace('&', '\\&') if x else '{}') for x in row]
+ self.buf.write(' & '.join(crow))
+ self.buf.write(' \\\\\n')
- self.buf.writelines(to_write)
+ self.buf.write('\\hline\n')
+ self.buf.write('\\end{tabular}\n')
def _format_col(self, i):
col = self.columns[i]
@@ -255,7 +296,7 @@ def _format_col(self, i):
na_rep=self.na_rep,
space=self.col_space)
- def to_html(self):
+ def to_html(self, classes=None):
"""
Render a DataFrame to a html table.
"""
@@ -291,7 +332,13 @@ def write_tr(l, indent=0, indent_delta=4, header=False):
indent_delta = 2
frame = self.frame
- write('
', indent)
+ _classes = ['dataframe'] # Default class.
+ if classes is not None:
+ if isinstance(classes, str):
+ classes = classes.split()
+ assert isinstance(classes, (list, tuple))
+ _classes.extend(classes)
+ write('' % ' '.join(_classes), indent)
def _column_header():
row = [''] * (frame.index.nlevels - 1)
@@ -349,12 +396,16 @@ def _maybe_bold_row(x):
fmt_values[i] = self._format_col(i)
# write values
+ index_formatter = self.formatters.get('__index__', None)
for i in range(len(frame)):
row = []
+ index_value = frame.index[i]
+ if index_formatter:
+ index_value = index_formatter(index_value)
if isinstance(frame.index, MultiIndex):
- row.extend(_maybe_bold_row(frame.index[i]))
+ row.extend(_maybe_bold_row(index_value))
else:
- row.append(_maybe_bold_row(frame.index[i]))
+ row.append(_maybe_bold_row(index_value))
for j in range(len(self.columns)):
row.append(fmt_values[j][i])
write_tr(row, indent, indent_delta)
@@ -409,6 +460,7 @@ def has_column_names(self):
return _has_names(self.frame.columns)
def _get_formatted_index(self):
+ # Note: this is only used by to_string(), not by to_html().
index = self.frame.index
columns = self.frame.columns
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index dc5c6c0f67..d32af46f7d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1240,7 +1240,7 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
def to_html(self, buf=None, columns=None, col_space=None, colSpace=None,
header=True, index=True, na_rep='NaN', formatters=None,
float_format=None, sparsify=None, index_names=True,
- bold_rows=True):
+ bold_rows=True, classes=None):
"""
to_html-specific options
bold_rows : boolean, default True
@@ -1263,7 +1263,33 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None,
bold_rows=bold_rows,
sparsify=sparsify,
index_names=index_names)
- formatter.to_html()
+ formatter.to_html(classes=classes)
+
+ if buf is None:
+ return formatter.buf.getvalue()
+
+ @Appender(fmt.docstring_to_string, indents=1)
+ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None,
+ header=True, index=True, na_rep='NaN', formatters=None,
+ float_format=None, sparsify=None, index_names=True,
+ bold_rows=True):
+ """
+ to_latex-specific options
+ bold_rows : boolean, default True
+ Make the row labels bold in the output
+
+ Render a DataFrame to a tabular environment table.
+ You can splice this into a LaTeX document.
+ """
+ formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns,
+ col_space=col_space, na_rep=na_rep,
+ header=header, index=index,
+ formatters=formatters,
+ float_format=float_format,
+ bold_rows=bold_rows,
+ sparsify=sparsify,
+ index_names=index_names)
+ formatter.to_latex()
if buf is None:
return formatter.buf.getvalue()
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
index 1484a41b91..fd16853296 100644
--- a/pandas/tests/test_format.py
+++ b/pandas/tests/test_format.py
@@ -6,6 +6,7 @@
import os
import sys
import unittest
+from textwrap import dedent
from numpy import nan
from numpy.random import randn
@@ -413,7 +414,7 @@ def test_to_html_multiindex(self):
names=['CL0', 'CL1'])
df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns)
result = df.to_html()
- expected = ('\n'
+ expected = ('\n'
' \n'
' \n'
' | CL0 | '
@@ -451,7 +452,7 @@ def test_to_html_multiindex(self):
np.mod(range(4), 2)))
df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns)
result = df.to_html()
- expected = ('\n'
+ expected = ('\n'
' \n'
' \n'
' | \n'
@@ -495,6 +496,27 @@ def test_repr_html(self):
fmt.reset_printoptions()
+ def test_to_html_with_classes(self):
+ df = pandas.DataFrame()
+ result = df.to_html(classes="sortable draggable")
+ expected = dedent("""
+
+
+
+
+ | Index([], dtype=object) |
+ Empty DataFrame |
+
+
+
+
+ """).strip()
+ self.assertEqual(result, expected)
+
+ result = df.to_html(classes=["sortable", "draggable"])
+ self.assertEqual(result, expected)
+
+
class TestSeriesFormatting(unittest.TestCase):
def setUp(self):
|