|
1 | 1 | import { FileMetaData } from 'hyparquet' |
2 | 2 | import { ReactNode } from 'react' |
3 | 3 |
|
4 | | - |
5 | 4 | interface LayoutProps { |
6 | 5 | byteLength: number |
7 | 6 | metadata: FileMetaData |
@@ -56,44 +55,43 @@ function RowGroups({ metadata }: { metadata: FileMetaData }) { |
56 | 55 | return <> |
57 | 56 | {metadata.row_groups.map((rowGroup, i) => |
58 | 57 | <Group key={i} name={`RowGroup ${i}`} bytes={rowGroup.total_byte_size}> |
59 | | - <div>Columns parsing is not implemented</div> |
60 | | - { |
61 | | - /* |
62 | | - /// TODO(SL): expose getColumnRange and ColumnChunk frim hyparquet, then re-enable this code |
63 | | -
|
64 | 58 | {rowGroup.columns.map((column, j) => |
65 | 59 | <Column key={j} column={column} />, |
66 | 60 | )} |
67 | | - */} |
68 | 61 | </Group>, |
69 | 62 | )} |
70 | 63 | </> |
71 | 64 | } |
72 | 65 |
|
73 | | -// function Column({ key, column }: { key: number, column: ColumnChunk }) { |
74 | | -// if (!column.meta_data) return null |
75 | | -// const end = getColumnRange(column.meta_data)[1] |
76 | | -// const pages = [ |
77 | | -// { name: 'Dictionary', offset: column.meta_data.dictionary_page_offset }, |
78 | | -// { name: 'Data', offset: column.meta_data.data_page_offset }, |
79 | | -// { name: 'Index', offset: column.meta_data.index_page_offset }, |
80 | | -// { name: 'End', offset: end }, |
81 | | -// ] |
82 | | -// .filter(({ offset }) => offset !== undefined) |
83 | | -// .sort((a, b) => Number(a.offset) - Number(b.offset)) |
84 | | - |
85 | | -// const children = pages.slice(0, -1).map(({ name, offset }, index) => |
86 | | -// <Cell key={name} name={name} start={offset} end={pages[index + 1].offset} />, |
87 | | -// ) |
88 | | - |
89 | | - |
90 | | -// return <Group |
91 | | -// key={key} |
92 | | -// name={`Column ${column.meta_data?.path_in_schema.join('.')}`} |
93 | | -// bytes={column.meta_data?.total_compressed_size}> |
94 | | -// {children} |
95 | | -// </Group> |
96 | | -// } |
| 66 | +type ColumnChunk = FileMetaData['row_groups'][number]['columns'][number] |
| 67 | +type ColumnMetadata = NonNullable<ColumnChunk['meta_data']> |
| 68 | + |
| 69 | +function Column({ key, column }: { key: number, column: ColumnChunk }) { |
| 70 | + |
| 71 | + if (!column.meta_data) return null |
| 72 | + const { meta_data } = column |
| 73 | + const { dictionary_page_offset, data_page_offset, index_page_offset } = meta_data |
| 74 | + const end = getColumnRange(column.meta_data)[1] |
| 75 | + const pages = [ |
| 76 | + { name: 'Dictionary', offset: dictionary_page_offset }, |
| 77 | + { name: 'Data', offset: data_page_offset }, |
| 78 | + { name: 'Index', offset: index_page_offset }, |
| 79 | + { name: 'End', offset: end }, |
| 80 | + ] |
| 81 | + .filter((page): page is {name: string, offset: bigint} => page.offset !== undefined) |
| 82 | + .sort((a, b) => Number(a.offset) - Number(b.offset)) |
| 83 | + |
| 84 | + const children = pages.slice(0, -1).map(({ name, offset }, index) => |
| 85 | + <Cell key={name} name={name} start={offset} end={pages[index + 1].offset} />, |
| 86 | + ) |
| 87 | + |
| 88 | + return <Group |
| 89 | + key={key} |
| 90 | + name={`Column ${column.meta_data.path_in_schema.join('.')}`} |
| 91 | + bytes={column.meta_data.total_compressed_size}> |
| 92 | + {children} |
| 93 | + </Group> |
| 94 | +} |
97 | 95 |
|
98 | 96 | function ColumnIndexes({ metadata }: { metadata: FileMetaData }) { |
99 | 97 | const indexPages = [] |
@@ -123,3 +121,19 @@ function ColumnIndexes({ metadata }: { metadata: FileMetaData }) { |
123 | 121 | )} |
124 | 122 | </Group> |
125 | 123 | } |
| 124 | + |
| 125 | + |
| 126 | +/** |
| 127 | + * Find the start byte offset for a column chunk. |
| 128 | + * |
| 129 | + * @param {ColumnMetaData} columnMetadata |
| 130 | + * @returns {[bigint, bigint]} byte offset range |
| 131 | + */ |
| 132 | +function getColumnRange({ dictionary_page_offset, data_page_offset, total_compressed_size }: ColumnMetadata): [bigint, bigint] { |
| 133 | + /// Copied from hyparquet because it's not exported |
| 134 | + let columnOffset = dictionary_page_offset |
| 135 | + if (!columnOffset || data_page_offset < columnOffset) { |
| 136 | + columnOffset = data_page_offset |
| 137 | + } |
| 138 | + return [columnOffset, columnOffset + total_compressed_size] |
| 139 | +} |
0 commit comments