1+ package org .hdf5javalib .utils ;
2+
3+ import org .hdf5javalib .dataclass .HdfData ;
4+ import org .hdf5javalib .dataclass .HdfFixedPoint ;
5+ import org .hdf5javalib .datasource .TypedDataSource ;
6+ import org .hdf5javalib .hdffile .dataobjects .HdfObjectHeaderPrefix ;
7+ import org .hdf5javalib .hdffile .dataobjects .messages .AttributeMessage ;
8+ import org .hdf5javalib .hdffile .dataobjects .messages .HdfMessage ;
9+ import org .hdf5javalib .hdffile .dataobjects .messages .LinkMessage ;
10+ import org .hdf5javalib .hdfjava .HdfDataFile ;
11+ import org .hdf5javalib .hdfjava .HdfDataset ;
12+ import org .hdf5javalib .hdfjava .HdfFileReader ;
13+ import org .slf4j .Logger ;
14+ import org .slf4j .LoggerFactory ;
15+
16+ import java .io .IOException ;
17+ import java .lang .reflect .InvocationTargetException ;
18+ import java .nio .channels .SeekableByteChannel ;
19+ import java .nio .file .Files ;
20+ import java .nio .file .Path ;
21+ import java .nio .file .StandardOpenOption ;
22+ import java .util .Arrays ;
23+ import java .util .Comparator ;
24+ import java .util .DoubleSummaryStatistics ;
25+ import java .util .Optional ;
26+
27+ /**
28+ * Utility class for displaying HDF5 dataset data and managing attributes.
29+ * <p>
30+ * The {@code HdfDisplayUtils} class provides methods to display scalar and vector data
31+ * from HDF5 datasets using a {@link TypedDataSource}, as well as to create version attributes
32+ * for datasets. It supports various data types and formats the output for easy inspection,
33+ * handling both primitive and array types.
34+ * </p>
35+ */
36+ public class HdfDisplaySummaryStatsUtils {
37+ private static final Logger log = LoggerFactory .getLogger (HdfDisplaySummaryStatsUtils .class );
38+ public static final String UNDEFINED = "<Undefined>" ;
39+ private static final String STREAM_EQUALS = " stream = " ;
40+
41+ public static void displayLinkMessages (HdfObjectHeaderPrefix objectHeader ) {
42+ for ( HdfMessage hdfMessage : objectHeader .getHeaderMessages ()) {
43+ if ( hdfMessage instanceof LinkMessage ) {
44+ LinkMessage linkMessage = (LinkMessage ) hdfMessage ;
45+ System .out .println ("\t LinkMessage: " + linkMessage .toString ());
46+ }
47+
48+ }
49+ }
50+
51+ // Define a functional interface for actions that may need channel, dataset, and reader
52+ @ FunctionalInterface
53+ interface FileAction {
54+ void perform (SeekableByteChannel channel , HdfDataset dataSet , HdfFileReader reader ) throws Exception ;
55+ }
56+
57+ public static String undefinedArrayToString (HdfFixedPoint [] values ) {
58+ if (values == null || values .length == 0 ) {
59+ return "Not Present" ;
60+ }
61+ StringBuilder sb = new StringBuilder ("[" );
62+ for (int i = 0 ; i < values .length ; i ++) {
63+ sb .append (values [i ].isUndefined ()?UNDEFINED :values [i ].toString () );
64+ if (i != values .length - 1 ) {
65+ sb .append (", " );
66+ }
67+ }
68+ sb .append (']' );
69+ return sb .toString ();
70+ }
71+
72+ // Generalized method to process the file and apply a custom action per dataset
73+ private static void processFile (Path filePath , FileAction action ) {
74+ try (SeekableByteChannel channel = Files .newByteChannel (filePath , StandardOpenOption .READ )) {
75+ HdfFileReader reader = new HdfFileReader (channel ).readFile ();
76+ for (HdfDataset dataSet : reader .getDatasets ()) {
77+ System .out .println ("{} " + dataSet );
78+ // log.info("{} ", dataSet);
79+ action .perform (channel , dataSet , reader );
80+ }
81+ } catch (Exception e ) {
82+ log .error ("Exception in processFile: {}" , filePath , e );
83+ }
84+ }
85+
86+ public static void displayFileAttr (Path filePath ) {
87+ processFile (filePath , (channel , dataSet , reader ) -> displayAttributes (dataSet ));
88+ }
89+
90+ public static void displayFile (Path filePath ) {
91+ processFile (filePath , HdfDisplaySummaryStatsUtils ::displayData );
92+ }
93+
94+ public static void displayAttributes (HdfDataset dataSet ) throws InvocationTargetException , InstantiationException , IllegalAccessException , IOException {
95+ for (AttributeMessage message : dataSet .getAttributeMessages ()) {
96+ HdfDataHolder dataHolder = message .getHdfDataHolder ();
97+ if (dataHolder .getDimensionality () == 1 ) {
98+ HdfData [] data = dataHolder .getAll (HdfData [].class );
99+ log .info ("Data = {}" , Arrays .toString (data ));
100+ } else if (dataHolder .getDimensionality () == 2 ) {
101+ HdfData [][] data = dataHolder .getAll (HdfData [][].class );
102+ for (HdfData [] row : data ) {
103+ log .info ("Row = {}" , Arrays .toString (row ));
104+ }
105+ }
106+ }
107+ }
108+
109+ // public static String getDataObjectFullName(HdfDataObject hdfDataObject) {
110+ // List<String> parents = new ArrayList<>();
111+ // HdfDataObject currentNode = hdfDataObject;
112+ // while(currentNode.getParent() != null) {
113+ // parents.add(currentNode.getObjectName());
114+ // currentNode = currentNode.getParent().getDataObject();
115+ // }
116+ // Collections.reverse(parents);
117+ // String objectPathString = '/' + currentNode.getObjectName() + String.join("/", parents);
118+ // return objectPathString;
119+ // }
120+
121+ public static void displayData (SeekableByteChannel channel , HdfDataset ds , HdfFileReader reader ) throws Exception {
122+ log .debug ("Dataset path: {}" , ds .getObjectPath ());
123+ if (ds .hasData ()) {
124+ switch (ds .getDimensionality ()) {
125+ case 0 :
126+ displayScalarData (channel , ds , reader );
127+ break ;
128+ case 1 :
129+ displayVectorData (channel , ds , reader );
130+ break ;
131+ case 2 :
132+ displayMatrixData (channel , ds , reader );
133+ break ;
134+ default :
135+ displayNDimData (channel , ds , reader );
136+ break ;
137+
138+ }
139+ } else if (ds .isDataset () && ds .getHardLink () != null ) {
140+ log .info ("{}: HARDLINK = {} " , ds .getObjectName (), ds .getHardLink ());
141+ }
142+ }
143+
144+ /**
145+ * Displays scalar data from a dataset.
146+ * <p>
147+ * Reads and prints the scalar value from the dataset using both direct reading and
148+ * streaming methods, formatting the output with the dataset name and type information.
149+ * </p>
150+ *
151+ * @param fileChannel the seekable byte channel for reading the HDF5 file
152+ * @param dataSet the dataset to read from
153+ * @param hdfDataFile the HDF5 file context
154+ * @param <T> the type of the data
155+ * @throws IOException if an I/O error occurs
156+ */
157+ public static <T extends Comparable <T >> void displayScalarData (SeekableByteChannel fileChannel , HdfDataset dataSet , HdfDataFile hdfDataFile ) throws IOException , InvocationTargetException , InstantiationException , IllegalAccessException {
158+ Class <T > clazz = getClassForDatatype (dataSet );
159+ TypedDataSource <T > dataSource = new TypedDataSource <>(fileChannel , hdfDataFile , dataSet , clazz );
160+
161+ // Optional<String> max = dataSource.streamScalar().map(h->h.toString()).max(Comparator.naturalOrder());
162+ DoubleSummaryStatistics stats = dataSource .streamScalar ()
163+ .mapToDouble (h -> {
164+ try {
165+ return Double .valueOf (h .toString ());
166+ } catch (Exception ex ) {
167+ return Double .NaN ;
168+ }
169+ }).summaryStatistics ();
170+
171+ System .out .println (dataSet .getObjectPath () + " " + dataSet .getDatatype ().getDatatypeClass ().name () + "->" + clazz .getSimpleName () + " streamScalar stats = " + stats );
172+ // long count = dataSource.parallelStreamScalar().count();
173+ // System.out.println(dataSet.getObjectPath() + " stream count = " + String.format("%,d", count) + ":" + dataSet.getDatatype().toString());
174+ }
175+
176+ /**
177+ * Displays vector data from a dataset.
178+ * <p>
179+ * Reads and prints the vector data from the dataset using both direct reading and
180+ * streaming methods, formatting the output with type information and a comma-separated
181+ * list of values.
182+ * </p>
183+ *
184+ * @param fileChannel the seekable byte channel for reading the HDF5 file
185+ * @param dataSet the dataset to read from
186+ * @param hdfDataFile the HDF5 file context
187+ * @param <T> the type of the data elements
188+ * @throws IOException if an I/O error occurs
189+ */
190+ public static <T extends Comparable <T >> void displayVectorData (SeekableByteChannel fileChannel , HdfDataset dataSet , HdfDataFile hdfDataFile ) throws IOException , InvocationTargetException , InstantiationException , IllegalAccessException {
191+ Class <T > clazz = getClassForDatatype (dataSet );
192+ TypedDataSource <T > dataSource = new TypedDataSource <>(fileChannel , hdfDataFile , dataSet , clazz );
193+
194+ // T[] resultArray = dataSource.readVector();
195+ // log.info("{} read = {}", displayType(clazz, resultArray), displayValue(resultArray));
196+
197+ // Optional<String> max = dataSource.streamVector().map(h->h.toString()).max(Comparator.naturalOrder());
198+ // stats = dataSource.streamVector().mapToDouble(h->Double.valueOf(h.toString())).summaryStatistics();
199+ // System.out.println(dataSet.getObjectPath() + " " + dataSet.getDatatype().getDatatypeClass().name() + "->" + clazz.getSimpleName() + " streamVector max = " + (max.isPresent() ? max.get().toString() : "NaN"));
200+ DoubleSummaryStatistics stats = dataSource .streamVector ()
201+ .mapToDouble (h -> {
202+ try {
203+ return Double .valueOf (h .toString ());
204+ } catch (Exception ex ) {
205+ return Double .NaN ;
206+ }
207+ }).summaryStatistics ();
208+ System .out .println (dataSet .getObjectPath () + " " + dataSet .getDatatype ().getDatatypeClass ().name () + "->" + clazz .getSimpleName () + " streamVector stats = " + stats );
209+
210+ // long count = dataSource.parallelStreamVector().count();
211+ // System.out.println(dataSet.getObjectPath() + " stream count = " + String.format("%,d", count) + ":" + dataSet.getDatatype().toString());
212+ }
213+
214+ /**
215+ * Displays vector data from a dataset.
216+ * <p>
217+ * Reads and prints the vector data from the dataset using both direct reading and
218+ * streaming methods, formatting the output with type information and a comma-separated
219+ * list of values.
220+ * </p>
221+ *
222+ * @param fileChannel the seekable byte channel for reading the HDF5 file
223+ * @param dataSet the dataset to read from
224+ * @param hdfDataFile the HDF5 file context
225+ * @param <T> the type of the data elements
226+ * @throws IOException if an I/O error occurs
227+ */
228+ public static <T extends Comparable <T >> void displayMatrixData (SeekableByteChannel fileChannel , HdfDataset dataSet , HdfDataFile hdfDataFile ) throws IOException , InvocationTargetException , InstantiationException , IllegalAccessException {
229+ Class <T > clazz = getClassForDatatype (dataSet );
230+ TypedDataSource <T > dataSource = new TypedDataSource <>(fileChannel , hdfDataFile , dataSet , clazz );
231+
232+ DoubleSummaryStatistics stats = dataSource .streamMatrix ().flatMap (h -> Arrays .stream (h ))
233+ .mapToDouble (h -> {
234+ try {
235+ return Double .valueOf (h .toString ());
236+ } catch (Exception ex ) {
237+ return Double .NaN ;
238+ }
239+ }).summaryStatistics ();
240+
241+ System .out .println (dataSet .getObjectPath () + " " + dataSet .getDatatype ().getDatatypeClass ().name () + "->" + clazz .getSimpleName () + " streamMatrix stats = " + stats );
242+
243+ // long count = dataSource.parallelStreamMatrix().count();
244+ // System.out.println(dataSet.getObjectPath() + " stream count = " + String.format("%,d", count) + ":" + dataSet.getDatatype().toString());
245+ }
246+
247+ /**
248+ * Displays vector data from a dataset.
249+ * <p>
250+ * Reads and prints the vector data from the dataset using both direct reading and
251+ * streaming methods, formatting the output with type information and a comma-separated
252+ * list of values.
253+ * </p>
254+ *
255+ * @param fileChannel the seekable byte channel for reading the HDF5 file
256+ * @param dataSet the dataset to read from
257+ * @param hdfDataFile the HDF5 file context
258+ * @param <T> the type of the data elements
259+ * @throws IOException if an I/O error occurs
260+ */
261+ private static <T extends Comparable <T >> void displayNDimData (SeekableByteChannel fileChannel , HdfDataset dataSet , HdfDataFile hdfDataFile ) throws IOException , InvocationTargetException , InstantiationException , IllegalAccessException {
262+ Class <T > clazz = getClassForDatatype (dataSet );
263+ TypedDataSource <T > dataSource = new TypedDataSource <>(fileChannel , hdfDataFile , dataSet , clazz );
264+ // String readResult = flattenedArrayToString(dataSource.readFlattened(), dataSource.getShape());
265+ // log.info("read = {}", readResult);
266+
267+ // Optional<String> max = dataSource.streamFlattened().map(h->h.toString()).max(Comparator.naturalOrder());
268+ DoubleSummaryStatistics stats = dataSource .streamFlattened ()
269+ .mapToDouble (h -> {
270+ try {
271+ return Double .valueOf (h .toString ());
272+ } catch (Exception ex ) {
273+ return Double .NaN ;
274+ }
275+ }).summaryStatistics ();
276+ System .out .println (dataSet .getObjectPath () + " " + dataSet .getDatatype ().getDatatypeClass ().name () + "->" + clazz .getSimpleName () + " streamFlattened stats = " + stats );
277+
278+ // long count = dataSource.parallelStreamFlattened().count();
279+ // System.out.println(dataSet.getObjectPath() + " stream count = " + String.format("%,d", count) + ":" + dataSet.getDatatype().toString());
280+ }
281+
282+ @ SuppressWarnings ("unchecked" )
283+ private static <T extends Comparable <T >> Class <T > getClassForDatatype (HdfDataset dataSet ) {
284+ return (Class <T >) switch (dataSet .getDatatype ().getDatatypeClass ()) {
285+ case FIXED , TIME -> Long .class ;
286+ case FLOAT -> Double .class ;
287+ case STRING , BITFIELD , OPAQUE , COMPOUND , REFERENCE , ENUM , VLEN , ARRAY -> String .class ;
288+ };
289+
290+ }
291+ }
0 commit comments