Skip to content

Commit 70611a9

Browse files
authored
Fix printing of unicode attributes on Python 2 (#914)
* Fix printing of unicode attributes on Python 2 Fixes GH834 * What's New * Add ground truth for test_unicode_data
1 parent 884b247 commit 70611a9

File tree

8 files changed

+107
-55
lines changed

8 files changed

+107
-55
lines changed

doc/whats-new.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,10 @@ Bug fixes
9898
- ``Variable.copy(deep=True)`` no longer converts MultiIndex into a base Index
9999
(:issue:`769`). By `Benoit Bovy <https://github.com/benbovy>`_.
100100

101-
- Fixed incorrect test for dask version :issue:`891`. By
101+
- Fix printing datasets with unicode attributes on Python 2 (:issue:`892`). By
102+
`Stephan Hoyer <https://github.com/shoyer>`_.
103+
104+
- Fixed incorrect test for dask version (:issue:`891`). By
102105
`Stephan Hoyer <https://github.com/shoyer>`_.
103106

104107
- :py:func:`~xarray.plot.contour` now plots the correct number of contours

xarray/core/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def wrapped_func(self, **kwargs):
109109
return wrapped_func
110110

111111

112-
class AbstractArray(ImplementsArrayReduce):
112+
class AbstractArray(ImplementsArrayReduce, formatting.ReprMixin):
113113
def __bool__(self):
114114
return bool(self.values)
115115

xarray/core/coordinates.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def _dim_shape(var):
3737
return [(dim, size) for dim, size in zip(var.dims, var.shape)]
3838

3939

40-
class AbstractCoordinates(Mapping):
40+
class AbstractCoordinates(Mapping, formatting.ReprMixin):
4141
def __getitem__(self, key):
4242
if (key in self._names or
4343
(isinstance(key, basestring) and
@@ -62,7 +62,7 @@ def __len__(self):
6262
def __contains__(self, key):
6363
return key in self._names
6464

65-
def __repr__(self):
65+
def __unicode__(self):
6666
return formatting.coords_repr(self)
6767

6868
@property
@@ -219,7 +219,7 @@ def __delitem__(self, key):
219219
del self._data._coords[key]
220220

221221

222-
class Indexes(Mapping):
222+
class Indexes(Mapping, formatting.ReprMixin):
223223
def __init__(self, source):
224224
self._source = source
225225

@@ -238,5 +238,5 @@ def __getitem__(self, key):
238238
else:
239239
raise KeyError(key)
240240

241-
def __repr__(self):
241+
def __unicode__(self):
242242
return formatting.indexes_repr(self)

xarray/core/dataset.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def as_dataset(obj):
105105
return obj
106106

107107

108-
class DataVariables(Mapping):
108+
class DataVariables(Mapping, formatting.ReprMixin):
109109
def __init__(self, dataset):
110110
self._dataset = dataset
111111

@@ -126,7 +126,7 @@ def __getitem__(self, key):
126126
else:
127127
raise KeyError(key)
128128

129-
def __repr__(self):
129+
def __unicode__(self):
130130
return formatting.vars_repr(self)
131131

132132

@@ -140,7 +140,8 @@ def __getitem__(self, key):
140140
return self.dataset.sel(**key)
141141

142142

143-
class Dataset(Mapping, ImplementsDatasetReduce, BaseDataObject):
143+
class Dataset(Mapping, ImplementsDatasetReduce, BaseDataObject,
144+
formatting.ReprMixin):
144145
"""A multi-dimensional, in memory, array database.
145146
146147
A dataset resembles an in-memory representation of a NetCDF file, and
@@ -810,7 +811,7 @@ def to_netcdf(self, path=None, mode='w', format=None, group=None,
810811
dump = utils.function_alias(to_netcdf, 'dump')
811812
dumps = utils.function_alias(to_netcdf, 'dumps')
812813

813-
def __repr__(self):
814+
def __unicode__(self):
814815
return formatting.dataset_repr(self)
815816

816817
@property

xarray/core/formatting.py

Lines changed: 68 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,33 @@
1+
"""String formatting routines for __repr__.
2+
3+
For the sake of sanity, we only do internal formatting with unicode, which can
4+
be returned by the __unicode__ special method. We use ReprMixin to provide the
5+
__repr__ method so that things can work on Python 2.
6+
"""
17
from datetime import datetime, timedelta
28
import functools
39

410
import numpy as np
511
import pandas as pd
612

713
from .options import OPTIONS
8-
from .pycompat import iteritems, unicode_type, bytes_type, dask_array_type
14+
from .pycompat import PY2, iteritems, unicode_type, bytes_type, dask_array_type
915

1016

1117
def pretty_print(x, numchars):
1218
"""Given an object `x`, call `str(x)` and format the returned string so
1319
that it is numchars long, padding with trailing spaces or truncating with
1420
ellipses as necessary
1521
"""
16-
s = str(x)
17-
if len(s) > numchars:
18-
return s[:(numchars - 3)] + '...'
19-
else:
20-
return s + ' ' * (numchars - len(s))
22+
s = maybe_truncate(x, numchars)
23+
return s + ' ' * max(numchars - len(s), 0)
24+
25+
26+
def maybe_truncate(obj, maxlen=500):
27+
s = unicode_type(obj)
28+
if len(s) > maxlen:
29+
s = s[:(maxlen - 3)] + u'...'
30+
return s
2131

2232

2333
def wrap_indent(text, start='', length=None):
@@ -27,6 +37,23 @@ def wrap_indent(text, start='', length=None):
2737
return start + indent.join(x for x in text.splitlines())
2838

2939

40+
def ensure_valid_repr(string):
41+
"""Ensure that the given value is valid for the result of __repr__.
42+
43+
On Python 2, this means we need to convert unicode to bytes. We won't need
44+
this function once we drop Python 2.7 support.
45+
"""
46+
if PY2 and isinstance(string, unicode):
47+
string = string.encode('utf-8')
48+
return string
49+
50+
51+
class ReprMixin(object):
52+
"""Mixin that defines __repr__ for a class that already has __unicode__."""
53+
def __repr__(self):
54+
return ensure_valid_repr(self.__unicode__())
55+
56+
3057
def _get_indexer_at_least_n_items(shape, n_desired):
3158
assert 0 < n_desired <= np.prod(shape)
3259
cum_items = np.cumprod(shape[::-1])
@@ -58,7 +85,7 @@ def first_n_items(x, n_desired):
5885

5986
def format_timestamp(t):
6087
"""Cast given object to a Timestamp and return a nicely formatted string"""
61-
datetime_str = str(pd.Timestamp(t))
88+
datetime_str = unicode_type(pd.Timestamp(t))
6289
try:
6390
date_str, time_str = datetime_str.split()
6491
except ValueError:
@@ -73,7 +100,7 @@ def format_timestamp(t):
73100

74101
def format_timedelta(t, timedelta_format=None):
75102
"""Cast given object to a Timestamp and return a nicely formatted string"""
76-
timedelta_str = str(pd.Timedelta(t))
103+
timedelta_str = unicode_type(pd.Timedelta(t))
77104
try:
78105
days_str, time_str = timedelta_str.split(' days ')
79106
except ValueError:
@@ -97,9 +124,9 @@ def format_item(x, timedelta_format=None, quote_strings=True):
97124
elif isinstance(x, (unicode_type, bytes_type)):
98125
return repr(x) if quote_strings else x
99126
elif isinstance(x, (float, np.float)):
100-
return '{0:.4}'.format(x)
127+
return u'{0:.4}'.format(x)
101128
else:
102-
return str(x)
129+
return unicode_type(x)
103130

104131

105132
def format_items(x):
@@ -135,27 +162,30 @@ def format_array_flat(items_ndarray, max_width):
135162
cum_len = np.cumsum([len(s) + 1 for s in pprint_items]) - 1
136163
if (max_possibly_relevant < items_ndarray.size or
137164
(cum_len > max_width).any()):
138-
end_padding = ' ...'
165+
end_padding = u' ...'
139166
count = max(np.argmax((cum_len + len(end_padding)) > max_width), 1)
140167
pprint_items = pprint_items[:count]
141168
else:
142-
end_padding = ''
169+
end_padding = u''
143170

144-
pprint_str = ' '.join(pprint_items) + end_padding
171+
pprint_str = u' '.join(pprint_items) + end_padding
145172
return pprint_str
146173

147174

148175
def _summarize_var_or_coord(name, var, col_width, show_values=True,
149176
marker=' ', max_width=None):
150177
if max_width is None:
151178
max_width = OPTIONS['display_width']
152-
first_col = pretty_print(' %s %s ' % (marker, name), col_width)
153-
dims_str = '(%s) ' % ', '.join(map(str, var.dims)) if var.dims else ''
154-
front_str = first_col + dims_str + ('%s ' % var.dtype)
179+
first_col = pretty_print(u' %s %s ' % (marker, name), col_width)
180+
if var.dims:
181+
dims_str = u'(%s) ' % u', '.join(map(unicode_type, var.dims))
182+
else:
183+
dims_str = u''
184+
front_str = u'%s%s%s ' % (first_col, dims_str, var.dtype)
155185
if show_values:
156186
values_str = format_array_flat(var, max_width - len(front_str))
157187
else:
158-
values_str = '...'
188+
values_str = u'...'
159189
return front_str + values_str
160190

161191

@@ -177,79 +207,73 @@ def summarize_var(name, var, col_width):
177207
def summarize_coord(name, var, col_width):
178208
is_index = name in var.dims
179209
show_values = is_index or _not_remote(var)
180-
marker = '*' if is_index else ' '
210+
marker = u'*' if is_index else u' '
181211
return _summarize_var_or_coord(name, var, col_width, show_values, marker)
182212

183213

184-
def _maybe_truncate(obj, maxlen=500):
185-
s = str(obj)
186-
if len(s) > maxlen:
187-
s = s[:(maxlen - 3)] + '...'
188-
return s
189-
190-
191214
def summarize_attr(key, value, col_width=None):
192215
# ignore col_width for now to more clearly distinguish attributes
193-
return ' %s: %s' % (key, _maybe_truncate(value))
216+
return u' %s: %s' % (key, maybe_truncate(value))
194217

195218

196-
EMPTY_REPR = ' *empty*'
219+
EMPTY_REPR = u' *empty*'
197220

198221

199222
def _calculate_col_width(mapping):
200-
max_name_length = max(len(str(k)) for k in mapping) if mapping else 0
223+
max_name_length = (max(len(unicode_type(k)) for k in mapping)
224+
if mapping else 0)
201225
col_width = max(max_name_length, 7) + 6
202226
return col_width
203227

204228

205229
def _mapping_repr(mapping, title, summarizer, col_width=None):
206230
if col_width is None:
207231
col_width = _calculate_col_width(mapping)
208-
summary = ['%s:' % title]
232+
summary = [u'%s:' % title]
209233
if mapping:
210234
summary += [summarizer(k, v, col_width) for k, v in mapping.items()]
211235
else:
212236
summary += [EMPTY_REPR]
213-
return '\n'.join(summary)
237+
return u'\n'.join(summary)
214238

215239

216-
coords_repr = functools.partial(_mapping_repr, title='Coordinates',
240+
coords_repr = functools.partial(_mapping_repr, title=u'Coordinates',
217241
summarizer=summarize_coord)
218242

219243

220-
vars_repr = functools.partial(_mapping_repr, title='Data variables',
244+
vars_repr = functools.partial(_mapping_repr, title=u'Data variables',
221245
summarizer=summarize_var)
222246

223247

224-
attrs_repr = functools.partial(_mapping_repr, title='Attributes',
248+
attrs_repr = functools.partial(_mapping_repr, title=u'Attributes',
225249
summarizer=summarize_attr)
226250

227251

228252
def indexes_repr(indexes):
229253
summary = []
230254
for k, v in indexes.items():
231255
summary.append(wrap_indent(repr(v), '%s: ' % k))
232-
return '\n'.join(summary)
256+
return u'\n'.join(summary)
233257

234258

235259
def array_repr(arr):
236260
# used for DataArray, Variable and Coordinate
237261
if hasattr(arr, 'name') and arr.name is not None:
238262
name_str = '%r ' % arr.name
239263
else:
240-
name_str = ''
241-
dim_summary = ', '.join('%s: %s' % (k, v) for k, v
264+
name_str = u''
265+
dim_summary = u', '.join(u'%s: %s' % (k, v) for k, v
242266
in zip(arr.dims, arr.shape))
243267

244-
summary = ['<xarray.%s %s(%s)>'
268+
summary = [u'<xarray.%s %s(%s)>'
245269
% (type(arr).__name__, name_str, dim_summary)]
246270

247271
if isinstance(getattr(arr, 'variable', arr)._data, dask_array_type):
248272
summary.append(repr(arr.data))
249273
elif arr._in_memory or arr.size < 1e5:
250274
summary.append(repr(arr.values))
251275
else:
252-
summary.append('[%s values with dtype=%s]' % (arr.size, arr.dtype))
276+
summary.append(u'[%s values with dtype=%s]' % (arr.size, arr.dtype))
253277

254278
if hasattr(arr, 'coords'):
255279
if arr.coords:
@@ -258,21 +282,21 @@ def array_repr(arr):
258282
if arr.attrs:
259283
summary.append(attrs_repr(arr.attrs))
260284

261-
return '\n'.join(summary)
285+
return u'\n'.join(summary)
262286

263287

264288
def dataset_repr(ds):
265-
summary = ['<xarray.%s>' % type(ds).__name__]
289+
summary = [u'<xarray.%s>' % type(ds).__name__]
266290

267291
col_width = _calculate_col_width(ds)
268292

269-
dims_start = pretty_print('Dimensions:', col_width)
270-
all_dim_strings = ['%s: %s' % (k, v) for k, v in iteritems(ds.dims)]
271-
summary.append('%s(%s)' % (dims_start, ', '.join(all_dim_strings)))
293+
dims_start = pretty_print(u'Dimensions:', col_width)
294+
all_dim_strings = [u'%s: %s' % (k, v) for k, v in iteritems(ds.dims)]
295+
summary.append(u'%s(%s)' % (dims_start, ', '.join(all_dim_strings)))
272296

273297
summary.append(coords_repr(ds.coords, col_width=col_width))
274298
summary.append(vars_repr(ds.data_vars, col_width=col_width))
275299
if ds.attrs:
276300
summary.append(attrs_repr(ds.attrs))
277301

278-
return '\n'.join(summary)
302+
return u'\n'.join(summary)

xarray/core/pycompat.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import sys
22

3+
PY2 = sys.version_info[0] < 3
34
PY3 = sys.version_info[0] >= 3
45

56
if PY3: # pragma: no cover

xarray/test/test_dataset.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
from copy import copy, deepcopy
23
from textwrap import dedent
34
try:
@@ -16,7 +17,7 @@
1617
DataArray, Variable, Coordinate, auto_combine,
1718
open_dataset, set_options)
1819
from xarray.core import indexing, utils
19-
from xarray.core.pycompat import iteritems, OrderedDict
20+
from xarray.core.pycompat import iteritems, OrderedDict, unicode_type
2021

2122
from . import (TestCase, unittest, InaccessibleArray, UnexpectedDataAccess,
2223
requires_dask)
@@ -114,6 +115,23 @@ def test_repr_period_index(self):
114115
# check that creating the repr doesn't raise an error #GH645
115116
repr(data)
116117

118+
def test_unicode_data(self):
119+
# regression test for GH834
120+
data = Dataset({u'foø': [u'ba®']}, attrs={u'å': u'∑'})
121+
repr(data) # should not raise
122+
123+
expected = dedent(u"""\
124+
<xarray.Dataset>
125+
Dimensions: (foø: 1)
126+
Coordinates:
127+
* foø (foø) <U3 %r
128+
Data variables:
129+
*empty*
130+
Attributes:
131+
å: ∑""" % u'ba®')
132+
actual = unicode_type(data)
133+
self.assertEqual(expected, actual)
134+
117135
def test_constructor(self):
118136
x1 = ('x', 2 * np.arange(100))
119137
x2 = ('x', np.arange(1000))

0 commit comments

Comments
 (0)