From 9110e55d9eac43803bc9af95e7bd44ecde916e4f Mon Sep 17 00:00:00 2001 From: "Kacper Kowalik (Xarthisius)" Date: Sat, 31 Oct 2020 18:30:45 -0500 Subject: [PATCH 1/3] Fix expected values in the original BE test. Add a new simple test. --- h5py/tests/test_big_endian_file.py | 44 ++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/h5py/tests/test_big_endian_file.py b/h5py/tests/test_big_endian_file.py index 7af322ff3..83fb92aa9 100644 --- a/h5py/tests/test_big_endian_file.py +++ b/h5py/tests/test_big_endian_file.py @@ -1,34 +1,50 @@ import pytest +import numpy as np from h5py import File +from .common import TestCase from .data_files import get_data_file_path def test_vlen_big_endian(): with File(get_data_file_path("vlen_string_s390x.h5")) as f: - assert f.attrs['created_on_s390x'] == 1 + assert f.attrs["created_on_s390x"] == 1 - dset = f['DSvariable'] - assert dset[0] == b'Parting' - assert dset[1] == b'is such' - assert dset[2] == b'sweet' - assert dset[3] == b'sorrow...' + dset = f["DSvariable"] + assert dset[0] == b"Parting" + assert dset[1] == b"is such" + assert dset[2] == b"sweet" + assert dset[3] == b"sorrow..." - dset = f['DSLEfloat'] + dset = f["DSLEfloat"] assert dset[0] == 3.14 assert dset[1] == 1.61 assert dset[2] == 2.71 assert dset[3] == 2.41 assert dset[4] == 1.2 - assert dset.dtype == 'f8' + assert f["DSBEfloat"][0] == 3.14 + assert f["DSBEfloat"].dtype == ">f8" - assert f['DSLEint'][0] == 1 - assert f['DSLEint'].dtype == 'uint64' + assert f["DSLEint"][0] == 1 + assert f["DSLEint"].dtype == "uint64" # Same int values with big endianess - assert f['DSBEint'][0] == 72057594037927936 - assert f['DSBEint'].dtype == '>i8' + assert f["DSBEint"][0] == 1 + assert f["DSBEint"].dtype == ">i8" + + +class TestEndianess(TestCase): + def test_simple_int_be(self): + fname = self.mktemp() + + arr = np.ndarray(shape=(1,), dtype=">i4", buffer=bytearray([0, 1, 3, 2])) + be_number = 0 * 256 ** 3 + 1 * 256 ** 2 + 3 * 256 ** 1 + 2 * 256 ** 0 + + with File(fname, mode="w") as f: + f.create_dataset("int", data=arr) + + with File(fname, mode="r") as f: + assert f["int"][()][0] == be_number From a8cd2cbfbd3e7b45bc1c127360c2ee7e64ae85b3 Mon Sep 17 00:00:00 2001 From: "Kacper Kowalik (Xarthisius)" Date: Sat, 31 Oct 2020 18:34:25 -0500 Subject: [PATCH 2/3] Preserve the endianess in Reader. Fixes #1729 --- h5py/_selector.pyx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/h5py/_selector.pyx b/h5py/_selector.pyx index 1b06e75e6..9b19a4210 100644 --- a/h5py/_selector.pyx +++ b/h5py/_selector.pyx @@ -19,6 +19,8 @@ from .h5s cimport SpaceID from .h5t cimport TypeID, typewrap, py_create from .utils cimport emalloc, efree, convert_dims +import sys + import_array() @@ -293,6 +295,7 @@ cdef class Reader: cdef Selector selector cdef TypeID h5_memory_datatype cdef int np_typenum + cdef char np_byteorder def __cinit__(self, DatasetID dsid): self.dataset = dsid.id @@ -305,6 +308,7 @@ cdef class Reader: h5_stored_datatype = typewrap(H5Dget_type(self.dataset)) np_dtype = h5_stored_datatype.py_dtype() self.np_typenum = np_dtype.num + self.np_byteorder = np_dtype.byteorder self.h5_memory_datatype = py_create(np_dtype) cdef ndarray make_array(self, hsize_t* mshape): @@ -325,6 +329,8 @@ cdef class Reader: arr_rank += 1 arr = PyArray_SimpleNew(arr_rank, arr_shape, self.np_typenum) + if self.np_byteorder == (sys.byteorder == 'little' and '>' or '<'): + arr = arr.byteswap().newbyteorder() finally: efree(arr_shape) From a79e616a6cdcae55b82046b7461b90fbda393ee0 Mon Sep 17 00:00:00 2001 From: "Kacper Kowalik (Xarthisius)" Date: Sat, 31 Oct 2020 18:50:24 -0500 Subject: [PATCH 3/3] Add info about bugfix in news and update .authors --- .authors.yml | 7 +++++++ news/fix_be_fastread.rst | 4 ++++ 2 files changed, 11 insertions(+) create mode 100644 news/fix_be_fastread.rst diff --git a/.authors.yml b/.authors.yml index 927b9f2d7..38aca6fcd 100644 --- a/.authors.yml +++ b/.authors.yml @@ -828,3 +828,10 @@ num_commits: 2 first_commit: 2020-10-13 03:57:42 github: opoplawski +- name: Kacper Kowalik + email: xarthisius.kk@gmail.com + aliases: + - Kacper Kowalik (Xarthisius) + num_commit: 2 + first_commit: 2020-10-31 18:30:45 + github: Xarthisius diff --git a/news/fix_be_fastread.rst b/news/fix_be_fastread.rst new file mode 100644 index 000000000..64dfc41e2 --- /dev/null +++ b/news/fix_be_fastread.rst @@ -0,0 +1,4 @@ +Bug fixes +--------- + +* Preserve endianess in Cython dataset Reader (:issue:`1729`).