Skip to content

Commit d5f6a27

Browse files
committed
add column information in parquet layout
1 parent c3753e6 commit d5f6a27

File tree

1 file changed

+45
-31
lines changed

1 file changed

+45
-31
lines changed

apps/hyparquet-demo/src/ParquetLayout.tsx

Lines changed: 45 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import { FileMetaData } from 'hyparquet'
22
import { ReactNode } from 'react'
33

4-
54
interface LayoutProps {
65
byteLength: number
76
metadata: FileMetaData
@@ -56,44 +55,43 @@ function RowGroups({ metadata }: { metadata: FileMetaData }) {
5655
return <>
5756
{metadata.row_groups.map((rowGroup, i) =>
5857
<Group key={i} name={`RowGroup ${i}`} bytes={rowGroup.total_byte_size}>
59-
<div>Columns parsing is not implemented</div>
60-
{
61-
/*
62-
/// TODO(SL): expose getColumnRange and ColumnChunk frim hyparquet, then re-enable this code
63-
6458
{rowGroup.columns.map((column, j) =>
6559
<Column key={j} column={column} />,
6660
)}
67-
*/}
6861
</Group>,
6962
)}
7063
</>
7164
}
7265

73-
// function Column({ key, column }: { key: number, column: ColumnChunk }) {
74-
// if (!column.meta_data) return null
75-
// const end = getColumnRange(column.meta_data)[1]
76-
// const pages = [
77-
// { name: 'Dictionary', offset: column.meta_data.dictionary_page_offset },
78-
// { name: 'Data', offset: column.meta_data.data_page_offset },
79-
// { name: 'Index', offset: column.meta_data.index_page_offset },
80-
// { name: 'End', offset: end },
81-
// ]
82-
// .filter(({ offset }) => offset !== undefined)
83-
// .sort((a, b) => Number(a.offset) - Number(b.offset))
84-
85-
// const children = pages.slice(0, -1).map(({ name, offset }, index) =>
86-
// <Cell key={name} name={name} start={offset} end={pages[index + 1].offset} />,
87-
// )
88-
89-
90-
// return <Group
91-
// key={key}
92-
// name={`Column ${column.meta_data?.path_in_schema.join('.')}`}
93-
// bytes={column.meta_data?.total_compressed_size}>
94-
// {children}
95-
// </Group>
96-
// }
66+
type ColumnChunk = FileMetaData['row_groups'][number]['columns'][number]
67+
type ColumnMetadata = NonNullable<ColumnChunk['meta_data']>
68+
69+
function Column({ key, column }: { key: number, column: ColumnChunk }) {
70+
71+
if (!column.meta_data) return null
72+
const { meta_data } = column
73+
const { dictionary_page_offset, data_page_offset, index_page_offset } = meta_data
74+
const end = getColumnRange(column.meta_data)[1]
75+
const pages = [
76+
{ name: 'Dictionary', offset: dictionary_page_offset },
77+
{ name: 'Data', offset: data_page_offset },
78+
{ name: 'Index', offset: index_page_offset },
79+
{ name: 'End', offset: end },
80+
]
81+
.filter((page): page is {name: string, offset: bigint} => page.offset !== undefined)
82+
.sort((a, b) => Number(a.offset) - Number(b.offset))
83+
84+
const children = pages.slice(0, -1).map(({ name, offset }, index) =>
85+
<Cell key={name} name={name} start={offset} end={pages[index + 1].offset} />,
86+
)
87+
88+
return <Group
89+
key={key}
90+
name={`Column ${column.meta_data.path_in_schema.join('.')}`}
91+
bytes={column.meta_data.total_compressed_size}>
92+
{children}
93+
</Group>
94+
}
9795

9896
function ColumnIndexes({ metadata }: { metadata: FileMetaData }) {
9997
const indexPages = []
@@ -123,3 +121,19 @@ function ColumnIndexes({ metadata }: { metadata: FileMetaData }) {
123121
)}
124122
</Group>
125123
}
124+
125+
126+
/**
127+
* Find the start byte offset for a column chunk.
128+
*
129+
* @param {ColumnMetaData} columnMetadata
130+
* @returns {[bigint, bigint]} byte offset range
131+
*/
132+
function getColumnRange({ dictionary_page_offset, data_page_offset, total_compressed_size }: ColumnMetadata): [bigint, bigint] {
133+
/// Copied from hyparquet because it's not exported
134+
let columnOffset = dictionary_page_offset
135+
if (!columnOffset || data_page_offset < columnOffset) {
136+
columnOffset = data_page_offset
137+
}
138+
return [columnOffset, columnOffset + total_compressed_size]
139+
}

0 commit comments

Comments
 (0)