|
| 1 | +# Benchmark tensordot |
| 2 | +import sys |
| 3 | +from time import time |
| 4 | + |
| 5 | +import numpy as np |
| 6 | +import blosc2 |
| 7 | +import dask |
| 8 | +import dask.array as da |
| 9 | +import zarr |
| 10 | +from numcodecs import Blosc |
| 11 | +import h5py |
| 12 | +import hdf5plugin |
| 13 | +import b2h5py.auto |
| 14 | +assert(b2h5py.is_fast_slicing_enabled()) |
| 15 | + |
| 16 | + |
| 17 | +# --- Experiment Setup --- |
| 18 | +N = 600 |
| 19 | +shape_a = (N,) * 3 |
| 20 | +shape_b = (N,) * 3 |
| 21 | +shape_out = (N,) * 2 |
| 22 | +chunks = (150,) * 3 |
| 23 | +chunks_out = (150,) * 2 |
| 24 | +dtype = np.float64 |
| 25 | +cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=1) |
| 26 | +compressor = Blosc(cname='lz4', clevel=1, shuffle=Blosc.SHUFFLE) |
| 27 | +h5compressor = hdf5plugin.Blosc2(cname='lz4', clevel=1, filters=hdf5plugin.Blosc2.SHUFFLE) |
| 28 | +scheduler = "single-threaded" if blosc2.nthreads == 1 else "threads" |
| 29 | +create = True |
| 30 | + |
| 31 | +# --- Numpy array creation --- |
| 32 | +if create: |
| 33 | + t0 = time() |
| 34 | + # matrix_numpy = np.linspace(0, 1, N**3).reshape(shape_a) |
| 35 | + matrix_numpy = np.ones(N**3).reshape(shape_a) |
| 36 | + print(f"N={N}, Numpy array creation = {time() - t0:.2f} s") |
| 37 | + |
| 38 | +# --- Blosc2 array creation --- |
| 39 | +if create: |
| 40 | + t0 = time() |
| 41 | + matrix_a_blosc2 = blosc2.asarray(matrix_numpy, cparams=cparams, chunks=chunks, urlpath="a.b2nd", mode="w") |
| 42 | + matrix_b_blosc2 = blosc2.asarray(matrix_numpy, cparams=cparams, chunks=chunks, urlpath="b.b2nd", mode="w") |
| 43 | + print(f"N={N}, Array creation = {time() - t0:.2f} s") |
| 44 | + |
| 45 | +# Re-open the arrays |
| 46 | +t0 = time() |
| 47 | +matrix_a_blosc2 = blosc2.open("a.b2nd", mode="r") |
| 48 | +matrix_b_blosc2 = blosc2.open("b.b2nd", mode="r") |
| 49 | +print(f"N={N}, Blosc2 array opening = {time() - t0:.2f} s") |
| 50 | + |
| 51 | +# --- Tensordot computation --- |
| 52 | +for axis in ((0, 1), (1, 2), (2, 0)): |
| 53 | + t0 = time() |
| 54 | + lexpr = blosc2.lazyexpr("tensordot(matrix_a_blosc2, matrix_b_blosc2, axes=(axis, axis))") |
| 55 | + out_blosc2 = lexpr.compute(urlpath="out.b2nd", mode="w", chunks=chunks_out) |
| 56 | + print(f"axes={axis}, Blosc2 Performance = {time() - t0:.2f} s") |
| 57 | + |
| 58 | +# --- HDF5 array creation --- |
| 59 | +if create: |
| 60 | + t0 = time() |
| 61 | + f = h5py.File("a_b_out.h5", "w") |
| 62 | + f.create_dataset("a", data=matrix_numpy, chunks=chunks, **h5compressor) |
| 63 | + f.create_dataset("b", data=matrix_numpy, chunks=chunks, **h5compressor) |
| 64 | + f.create_dataset("out", shape=shape_out, dtype=dtype, chunks=chunks_out, **h5compressor) |
| 65 | + print(f"N={N}, HDF5 array creation = {time() - t0:.2f} s") |
| 66 | + f.close() |
| 67 | + |
| 68 | +# Re-open the HDF5 arrays |
| 69 | +t0 = time() |
| 70 | +f = h5py.File("a_b_out.h5", "a") |
| 71 | +matrix_a_hdf5 = f["a"] |
| 72 | +matrix_b_hdf5 = f["b"] |
| 73 | +out_hdf5 = f["out"] |
| 74 | +print(f"N={N}, HDF5 array opening = {time() - t0:.2f} s") |
| 75 | + |
| 76 | +# --- Tensordot computation with HDF5 --- |
| 77 | +for axis in ((0, 1), (1, 2), (2, 0)): |
| 78 | + t0 = time() |
| 79 | + blosc2.evaluate("tensordot(matrix_a_hdf5, matrix_b_hdf5, axes=(axis, axis))", out=out_hdf5) |
| 80 | + print(f"axes={axis}, HDF5 Performance = {time() - t0:.2f} s") |
| 81 | +f.close() |
| 82 | + |
| 83 | +# --- Zarr array creation --- |
| 84 | +if create: |
| 85 | + t0 = time() |
| 86 | + matrix_a_zarr = zarr.open_array("a.zarr", mode="w", shape=shape_a, chunks=chunks, |
| 87 | + dtype=dtype, compressor=compressor, zarr_format=2) |
| 88 | + matrix_a_zarr[:] = matrix_numpy |
| 89 | + |
| 90 | + matrix_b_zarr = zarr.open_array("b.zarr", mode="w", shape=shape_b, chunks=chunks, |
| 91 | + dtype=dtype, compressor=compressor, zarr_format=2) |
| 92 | + matrix_b_zarr[:] = matrix_numpy |
| 93 | + print(f"N={N}, Zarr array creation = {time() - t0:.2f} s") |
| 94 | + |
| 95 | +# --- Re-open the Zarr arrays --- |
| 96 | +t0 = time() |
| 97 | +matrix_a_zarr = zarr.open("a.zarr", mode="r") |
| 98 | +matrix_b_zarr = zarr.open("b.zarr", mode="r") |
| 99 | +matrix_a_dask = da.from_zarr(matrix_a_zarr) |
| 100 | +matrix_b_dask = da.from_zarr(matrix_b_zarr) |
| 101 | +print(f"N={N}, Dask + Zarr array opening = {time() - t0:.2f} s") |
| 102 | + |
| 103 | +# --- Tensordot computation with Dask --- |
| 104 | +zout = zarr.open_array("out.zarr", mode="w", shape=shape_out, chunks=chunks_out, |
| 105 | + dtype=dtype, compressor=compressor, zarr_format=2) |
| 106 | +with dask.config.set(scheduler=scheduler, num_workers=blosc2.nthreads): |
| 107 | + for axis in ((0, 1), (1, 2), (2, 0)): |
| 108 | + t0 = time() |
| 109 | + dexpr = da.tensordot(matrix_a_dask, matrix_b_dask, axes=(axis, axis)) |
| 110 | + da.to_zarr(dexpr, zout) |
| 111 | + print(f"axes={axis}, Dask Performance = {time() - t0:.2f} s") |
| 112 | + |
| 113 | +# --- Tensordot computation with Blosc2 |
| 114 | +zout2 = zarr.open_array("out2.zarr", mode="w", shape=shape_out, chunks=chunks_out, |
| 115 | + dtype=dtype, compressor=compressor, zarr_format=2) |
| 116 | +b2out = blosc2.empty(shape=shape_out, chunks=chunks_out, dtype=dtype, cparams=cparams, urlpath="out2.b2nd", mode="w") |
| 117 | +for axis in ((0, 1), (1, 2), (2, 0)): |
| 118 | + t0 = time() |
| 119 | + blosc2.evaluate("tensordot(matrix_a_zarr, matrix_b_zarr, axes=(axis, axis))", out=zout2) |
| 120 | + print(f"axes={axis}, Blosc2 Performance = {time() - t0:.2f} s") |
0 commit comments