@@ -31,12 +31,10 @@ function generate_file(filename, format)
3131 end
3232 end
3333 elseif format == :arrow
34- open (Arrow. Writer, filename) do writer
35- for i in 1 : numchunks
36- starttpl, endtpl = (i- 1 )* chunktpl+ 1 , min (i* chunktpl, totaltpl)
37- Arrow. write (writer, (data= map (i -> (a= rand (), b= rand (), c= rand (), d= rand ()),
38- 1 : endtpl- starttpl+ 1 ),))
39- end
34+ for i in 1 : numchunks
35+ starttpl, endtpl = (i- 1 )* chunktpl+ 1 , min (i* chunktpl, totaltpl)
36+ Arrow. append (" random_data.arrow" , (data= map (i -> (a= rand (), b= rand (), c= rand (), d= rand ()),
37+ 1 : endtpl- starttpl+ 1 ),);file= false )
4038 end
4139 end
4240end
@@ -129,9 +127,8 @@ rngs = [Xoshiro(i) for i in 1:Threads.nthreads()]
129127
130128As you can see, the speed-up is not linear in the number of threads for an hdf5 file. This is
131129mainly due to the fact that accessing the chunks is single-threaded, so one would need to use
132- ` MPI.jl ` as explained at https://juliaio.github.io/HDF5.jl/stable/mpi/ to improve the multi-threading
133- performance. Though, we are already sampling at 500MB/s, which is not bad!
134-
130+ ` MPI.jl ` as explained at [ HDF5.jl/stable/mpi/] ( https://juliaio.github.io/HDF5.jl/stable/mpi/ ) to
131+ improve the multi-threading performance. Though, we are already sampling at 500MB/s, which is not bad!
135132Using ` Arrow.jl ` gives an even better performance, and a scalability which is better than
136133linear somehow, reaching a 2GB/s sampling speed!
137134
0 commit comments