Open
Description
What happened?
I'm generating an NWB to encode task rewards. My code is passing a numpy structured array to be added as a timeseries. I'm not sure if this is allowed or not, but nonetheless the behavior seems to differ beween Zarr and HDF5.
Steps to Reproduce
Running the following code on a Zarr NWB vs an HDF5 NWB, where rewards
contains multiple arrays of equal length.
with io_class(str(result_nwb_path), "r+") as io:
nwb_file = io.read()
# Make a structured array with named fields
reward_data = np.array(
list(zip(rewards['volume'], rewards['autorewarded'].astype(bool))),
dtype=[('volume', 'f4'), ('autorewarded', 'bool')]
)
timestamps = rewards['timestamps'].to_numpy()
rewards_ts = TimeSeries(
name='rewards_combined',
data=reward_data,
unit='mixed',
timestamps=timestamps,
description='Reward events with volume and autorewarded flag'
)
nwbfile.add_acquisition(rewards_ts)
io.write(nwb_file)
Traceback
When running this on an HDF5 NWB, an error is given during file write:
Traceback (most recent call last):
File "/opt/conda/lib/python3.9/site-packages/hdmf/backends/hdf5/h5tools.py", line 1439, in __list_fill__
dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings)
File "/opt/conda/lib/python3.9/site-packages/h5py/_hl/group.py", line 186, in create_dataset
dsid = dataset.make_new_dset(group, shape, dtype, data, name, **kwds)
File "/opt/conda/lib/python3.9/site-packages/h5py/_hl/dataset.py", line 88, in make_new_dset
tid = h5t.py_create(dtype, logical=1)
File "h5py/h5t.pyx", line 1669, in h5py.h5t.py_create
File "h5py/h5t.pyx", line 1693, in h5py.h5t.py_create
File "h5py/h5t.pyx", line 1727, in h5py.h5t.py_create
File "h5py/h5t.pyx", line 1522, in h5py.h5t._c_opaque
ValueError: Size must be positive (size must be positive)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/tmp/nxf.23GfiOSmB4/capsule/code/run_capsule.py", line 226, in <module>
if __name__ == "__main__": run()
File "/tmp/nxf.23GfiOSmB4/capsule/code/run_capsule.py", line 222, in run
io.write(nwb_file)
File "/opt/conda/lib/python3.9/site-packages/hdmf/utils.py", line 577, in func_call
return func(args[0], **pargs)
File "/opt/conda/lib/python3.9/site-packages/hdmf/backends/hdf5/h5tools.py", line 395, in write
super().write(**kwargs)
File "/opt/conda/lib/python3.9/site-packages/hdmf/utils.py", line 577, in func_call
return func(args[0], **pargs)
File "/opt/conda/lib/python3.9/site-packages/hdmf/backends/io.py", line 99, in write
self.write_builder(f_builder, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/hdmf/utils.py", line 577, in func_call
return func(args[0], **pargs)
File "/opt/conda/lib/python3.9/site-packages/hdmf/backends/hdf5/h5tools.py", line 834, in write_builder
self.write_group(self.__file, gbldr, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/hdmf/utils.py", line 577, in func_call
return func(args[0], **pargs)
File "/opt/conda/lib/python3.9/site-packages/hdmf/backends/hdf5/h5tools.py", line 1013, in write_group
self.write_group(group, sub_builder, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/hdmf/utils.py", line 577, in func_call
return func(args[0], **pargs)
File "/opt/conda/lib/python3.9/site-packages/hdmf/backends/hdf5/h5tools.py", line 1018, in write_group
self.write_dataset(group, sub_builder, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/hdmf/utils.py", line 577, in func_call
return func(args[0], **pargs)
File "/opt/conda/lib/python3.9/site-packages/hdmf/backends/hdf5/h5tools.py", line 1286, in write_dataset
dset = self.__list_fill__(parent, name, data, options)
File "/opt/conda/lib/python3.9/site-packages/hdmf/backends/hdf5/h5tools.py", line 1443, in __list_fill__
raise Exception(msg) from exc
Exception: Could not create dataset data in /acquisition/rewards_combined with shape (210,), dtype <class 'numpy.void'>, and iosettings {}. Size must be positive (size must be positive)
When running on a Zarr NWB, writing completes, only showing
/opt/conda/lib/python3.9/site-packages/zarr/util.py:118: RuntimeWarning: divide by zero encountered in log10
target_size = CHUNK_BASE * (2 ** np.log10(dset_size / (1024.0 * 1024)))
Then when reading the file later such like
nwb.acquisition['rewards_combined']
We get the error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File /opt/conda/lib/python3.12/site-packages/IPython/core/formatters.py:344, in BaseFormatter.__call__(self, obj)
342 method = get_real_method(obj, self.print_method)
343 if method is not None:
--> 344 return method()
345 return None
346 else:
File /opt/conda/lib/python3.12/site-packages/hdmf/container.py:692, in Container._repr_html_(self)
690 html_repr += "<div class='container-wrap'>"
691 html_repr += f"<div class='container-header'><div class='xr-obj-type'><h3>{header_text}</h3></div></div>"
--> 692 html_repr += self._generate_html_repr(self.fields, is_field=True)
693 html_repr += "</div>"
694 return html_repr
File /opt/conda/lib/python3.12/site-packages/hdmf/container.py:706, in Container._generate_html_repr(self, fields, level, access_code, is_field)
704 html_repr += value._generate_field_html(key, value, level, current_access_code)
705 else:
--> 706 html_repr += self._generate_field_html(key, value, level, current_access_code)
707 elif isinstance(fields, list):
708 for index, item in enumerate(fields):
File /opt/conda/lib/python3.12/site-packages/pynwb/base.py:342, in TimeSeries._generate_field_html(self, key, value, level, access_code)
339 linked_key = 'timestamps' if key == 'timestamp_link' else 'data'
340 value = [find_location_in_memory_nwbfile(linked_key, v) for v in value]
--> 342 return super()._generate_field_html(key, value, level, access_code)
File /opt/conda/lib/python3.12/site-packages/hdmf/container.py:733, in Container._generate_field_html(self, key, value, level, access_code)
730 is_array_data = hasattr(value, "shape") and hasattr(value, "dtype")
732 if is_array_data:
--> 733 html_content = self._generate_array_html(value, level + 1)
734 elif hasattr(value, "generate_html_repr"):
735 html_content = value.generate_html_repr(level + 1, access_code)
File /opt/conda/lib/python3.12/site-packages/hdmf/container.py:775, in Container._generate_array_html(self, array, level)
769 repr_html = generate_array_html_repr(array_info_dict, array.data, "DataIO")
770 elif it_was_read_with_io:
771 # The backend handles the representation here. Two special cases worth noting:
772 # 1. Array-type attributes (e.g., start_frame in ImageSeries) remain NumPy arrays
773 # even when their parent container has an IO
774 # 2. Data may have been modified after being read from storage
--> 775 repr_html = read_io.generate_dataset_html(array)
776 else: # Not sure which object could get here
777 object_class = array.__class__.__name__
File /opt/conda/lib/python3.12/site-packages/hdmf/backends/io.py:195, in HDMFIO.generate_dataset_html(dataset)
193 """Generates an html representation for a dataset"""
194 array_info_dict = get_basic_array_info(dataset)
--> 195 repr_html = generate_array_html_repr(array_info_dict, dataset)
197 return repr_html
File /opt/conda/lib/python3.12/site-packages/hdmf/utils.py:1023, in generate_array_html_repr(array_info_dict, array, dataset_type)
1021 array_is_small = array_size_bytes < 1024 * 0.1 # 10 % a kilobyte to display the array
1022 if array_is_small:
-> 1023 repr_html += "<br>" + str(np.asarray(array))
1025 return repr_html
File /opt/conda/lib/python3.12/site-packages/zarr/core.py:589, in Array.__array__(self, *args)
588 def __array__(self, *args):
--> 589 a = self[...]
590 if args:
591 a = a.astype(args[0])
File /opt/conda/lib/python3.12/site-packages/zarr/core.py:807, in Array.__getitem__(self, selection)
805 result = self.vindex[selection]
806 else:
--> 807 result = self.get_basic_selection(pure_selection, fields=fields)
808 return result
File /opt/conda/lib/python3.12/site-packages/zarr/core.py:933, in Array.get_basic_selection(self, selection, out, fields)
930 return self._get_basic_selection_zd(selection=selection, out=out,
931 fields=fields)
932 else:
--> 933 return self._get_basic_selection_nd(selection=selection, out=out,
934 fields=fields)
File /opt/conda/lib/python3.12/site-packages/zarr/core.py:976, in Array._get_basic_selection_nd(self, selection, out, fields)
970 def _get_basic_selection_nd(self, selection, out=None, fields=None):
971 # implementation of basic selection for array with at least one dimension
972
973 # setup indexer
974 indexer = BasicIndexer(selection, self)
--> 976 return self._get_selection(indexer=indexer, out=out, fields=fields)
File /opt/conda/lib/python3.12/site-packages/zarr/core.py:1267, in Array._get_selection(self, indexer, out, fields)
1261 if not hasattr(self.chunk_store, "getitems") or \
1262 any(map(lambda x: x == 0, self.shape)):
1263 # sequentially get one key at a time from storage
1264 for chunk_coords, chunk_selection, out_selection in indexer:
1265
1266 # load chunk selection into output array
-> 1267 self._chunk_getitem(chunk_coords, chunk_selection, out, out_selection,
1268 drop_axes=indexer.drop_axes, fields=fields)
1269 else:
1270 # allow storage to get multiple items at once
1271 lchunk_coords, lchunk_selection, lout_selection = zip(*indexer)
File /opt/conda/lib/python3.12/site-packages/zarr/core.py:1978, in Array._chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, drop_axes, fields)
1975 out[out_selection] = fill_value
1977 else:
-> 1978 self._process_chunk(out, cdata, chunk_selection, drop_axes,
1979 out_is_ndarray, fields, out_selection)
File /opt/conda/lib/python3.12/site-packages/zarr/core.py:1886, in Array._process_chunk(self, out, cdata, chunk_selection, drop_axes, out_is_ndarray, fields, out_selection, partial_read_decode)
1884 if isinstance(cdata, PartialReadBuffer):
1885 cdata = cdata.read_full()
-> 1886 self._compressor.decode(cdata, dest)
1887 else:
1888 chunk = ensure_ndarray_like(cdata).view(self._dtype)
File numcodecs/blosc.pyx:585, in numcodecs.blosc.Blosc.decode()
File numcodecs/blosc.pyx:393, in numcodecs.blosc.decompress()
ValueError: destination buffer too small; expected at least 1050, got 0
Operating System
Windows
Python Version
3.13
Package Versions
Writing Environment:
h5py==3.13.0
hdmf-zarr==0.11.1
numpy==1.26.4
pynwb==3.0.0
Reading environment:
hdmf==3.14.6
hdmf_zarr==0.9.0
Metadata
Metadata
Assignees
Labels
No labels