Skip to content

Commit 0a3913a

Browse files
committed
Add bench on tensordot out-of-core (script version)
1 parent f0929db commit 0a3913a

File tree

1 file changed

+120
-0
lines changed

1 file changed

+120
-0
lines changed
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# Benchmark tensordot
2+
import sys
3+
from time import time
4+
5+
import numpy as np
6+
import blosc2
7+
import dask
8+
import dask.array as da
9+
import zarr
10+
from numcodecs import Blosc
11+
import h5py
12+
import hdf5plugin
13+
import b2h5py.auto
14+
assert(b2h5py.is_fast_slicing_enabled())
15+
16+
17+
# --- Experiment Setup ---
18+
N = 600
19+
shape_a = (N,) * 3
20+
shape_b = (N,) * 3
21+
shape_out = (N,) * 2
22+
chunks = (150,) * 3
23+
chunks_out = (150,) * 2
24+
dtype = np.float64
25+
cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=1)
26+
compressor = Blosc(cname='lz4', clevel=1, shuffle=Blosc.SHUFFLE)
27+
h5compressor = hdf5plugin.Blosc2(cname='lz4', clevel=1, filters=hdf5plugin.Blosc2.SHUFFLE)
28+
scheduler = "single-threaded" if blosc2.nthreads == 1 else "threads"
29+
create = True
30+
31+
# --- Numpy array creation ---
32+
if create:
33+
t0 = time()
34+
# matrix_numpy = np.linspace(0, 1, N**3).reshape(shape_a)
35+
matrix_numpy = np.ones(N**3).reshape(shape_a)
36+
print(f"N={N}, Numpy array creation = {time() - t0:.2f} s")
37+
38+
# --- Blosc2 array creation ---
39+
if create:
40+
t0 = time()
41+
matrix_a_blosc2 = blosc2.asarray(matrix_numpy, cparams=cparams, chunks=chunks, urlpath="a.b2nd", mode="w")
42+
matrix_b_blosc2 = blosc2.asarray(matrix_numpy, cparams=cparams, chunks=chunks, urlpath="b.b2nd", mode="w")
43+
print(f"N={N}, Array creation = {time() - t0:.2f} s")
44+
45+
# Re-open the arrays
46+
t0 = time()
47+
matrix_a_blosc2 = blosc2.open("a.b2nd", mode="r")
48+
matrix_b_blosc2 = blosc2.open("b.b2nd", mode="r")
49+
print(f"N={N}, Blosc2 array opening = {time() - t0:.2f} s")
50+
51+
# --- Tensordot computation ---
52+
for axis in ((0, 1), (1, 2), (2, 0)):
53+
t0 = time()
54+
lexpr = blosc2.lazyexpr("tensordot(matrix_a_blosc2, matrix_b_blosc2, axes=(axis, axis))")
55+
out_blosc2 = lexpr.compute(urlpath="out.b2nd", mode="w", chunks=chunks_out)
56+
print(f"axes={axis}, Blosc2 Performance = {time() - t0:.2f} s")
57+
58+
# --- HDF5 array creation ---
59+
if create:
60+
t0 = time()
61+
f = h5py.File("a_b_out.h5", "w")
62+
f.create_dataset("a", data=matrix_numpy, chunks=chunks, **h5compressor)
63+
f.create_dataset("b", data=matrix_numpy, chunks=chunks, **h5compressor)
64+
f.create_dataset("out", shape=shape_out, dtype=dtype, chunks=chunks_out, **h5compressor)
65+
print(f"N={N}, HDF5 array creation = {time() - t0:.2f} s")
66+
f.close()
67+
68+
# Re-open the HDF5 arrays
69+
t0 = time()
70+
f = h5py.File("a_b_out.h5", "a")
71+
matrix_a_hdf5 = f["a"]
72+
matrix_b_hdf5 = f["b"]
73+
out_hdf5 = f["out"]
74+
print(f"N={N}, HDF5 array opening = {time() - t0:.2f} s")
75+
76+
# --- Tensordot computation with HDF5 ---
77+
for axis in ((0, 1), (1, 2), (2, 0)):
78+
t0 = time()
79+
blosc2.evaluate("tensordot(matrix_a_hdf5, matrix_b_hdf5, axes=(axis, axis))", out=out_hdf5)
80+
print(f"axes={axis}, HDF5 Performance = {time() - t0:.2f} s")
81+
f.close()
82+
83+
# --- Zarr array creation ---
84+
if create:
85+
t0 = time()
86+
matrix_a_zarr = zarr.open_array("a.zarr", mode="w", shape=shape_a, chunks=chunks,
87+
dtype=dtype, compressor=compressor, zarr_format=2)
88+
matrix_a_zarr[:] = matrix_numpy
89+
90+
matrix_b_zarr = zarr.open_array("b.zarr", mode="w", shape=shape_b, chunks=chunks,
91+
dtype=dtype, compressor=compressor, zarr_format=2)
92+
matrix_b_zarr[:] = matrix_numpy
93+
print(f"N={N}, Zarr array creation = {time() - t0:.2f} s")
94+
95+
# --- Re-open the Zarr arrays ---
96+
t0 = time()
97+
matrix_a_zarr = zarr.open("a.zarr", mode="r")
98+
matrix_b_zarr = zarr.open("b.zarr", mode="r")
99+
matrix_a_dask = da.from_zarr(matrix_a_zarr)
100+
matrix_b_dask = da.from_zarr(matrix_b_zarr)
101+
print(f"N={N}, Dask + Zarr array opening = {time() - t0:.2f} s")
102+
103+
# --- Tensordot computation with Dask ---
104+
zout = zarr.open_array("out.zarr", mode="w", shape=shape_out, chunks=chunks_out,
105+
dtype=dtype, compressor=compressor, zarr_format=2)
106+
with dask.config.set(scheduler=scheduler, num_workers=blosc2.nthreads):
107+
for axis in ((0, 1), (1, 2), (2, 0)):
108+
t0 = time()
109+
dexpr = da.tensordot(matrix_a_dask, matrix_b_dask, axes=(axis, axis))
110+
da.to_zarr(dexpr, zout)
111+
print(f"axes={axis}, Dask Performance = {time() - t0:.2f} s")
112+
113+
# --- Tensordot computation with Blosc2
114+
zout2 = zarr.open_array("out2.zarr", mode="w", shape=shape_out, chunks=chunks_out,
115+
dtype=dtype, compressor=compressor, zarr_format=2)
116+
b2out = blosc2.empty(shape=shape_out, chunks=chunks_out, dtype=dtype, cparams=cparams, urlpath="out2.b2nd", mode="w")
117+
for axis in ((0, 1), (1, 2), (2, 0)):
118+
t0 = time()
119+
blosc2.evaluate("tensordot(matrix_a_zarr, matrix_b_zarr, axes=(axis, axis))", out=zout2)
120+
print(f"axes={axis}, Blosc2 Performance = {time() - t0:.2f} s")

0 commit comments

Comments
 (0)