|
| 1 | +import HighTable, { DataFrame, rowCache } from 'hightable' |
| 2 | +import { FileMetaData, byteLengthFromUrl, parquetMetadataAsync, parquetSchema } from 'hyparquet' |
| 3 | +import { ReactNode, useCallback, useEffect, useState } from 'react' |
| 4 | +import Dropdown from './Dropdown.js' |
| 5 | +import Dropzone from './Dropzone.js' |
| 6 | +import Layout from './Layout.js' |
| 7 | +import ParquetLayout from './ParquetLayout.js' |
| 8 | +import ParquetMetadata from './ParquetMetadata.js' |
| 9 | +import { asyncBufferFrom } from './utils.js' |
| 10 | +import { parquetQueryWorker } from './workers/parquetWorkerClient.js' |
| 11 | +import { AsyncBufferFrom, Row } from './workers/types.js' |
| 12 | + |
| 13 | +type Lens = 'table' | 'metadata' | 'layout' |
| 14 | + |
| 15 | +/** |
| 16 | + * Hyparquet demo viewer page |
| 17 | + * @param {Object} props |
| 18 | + * @param {string} [props.url] |
| 19 | + * @returns {ReactNode} |
| 20 | + */ |
| 21 | +export default function App({ url }: { url?: string }): ReactNode { |
| 22 | + const [error, setError] = useState<Error>() |
| 23 | + const [df, setDf] = useState<DataFrame>() |
| 24 | + const [name, setName] = useState<string>() |
| 25 | + const [lens, setLens] = useState<Lens>('table') |
| 26 | + const [metadata, setMetadata] = useState<FileMetaData>() |
| 27 | + const [byteLength, setByteLength] = useState<number>() |
| 28 | + |
| 29 | + const setUnknownError = useCallback((e: unknown) => { |
| 30 | + setError(e instanceof Error ? e : new Error(String(e))) |
| 31 | + }, []) |
| 32 | + |
| 33 | + const onUrlDrop = useCallback( |
| 34 | + (url: string) => { |
| 35 | + // Add key=url to query string |
| 36 | + const params = new URLSearchParams(location.search) |
| 37 | + params.set('key', url) |
| 38 | + history.pushState({}, '', `${location.pathname}?${params}`) |
| 39 | + byteLengthFromUrl(url).then(byteLength => setAsyncBuffer(url, { url, byteLength })).catch(setUnknownError) |
| 40 | + }, |
| 41 | + [setUnknownError], |
| 42 | + ) |
| 43 | + |
| 44 | + useEffect(() => { |
| 45 | + if (!df && url) { |
| 46 | + onUrlDrop(url) |
| 47 | + } |
| 48 | + }, [ url, df, onUrlDrop]) |
| 49 | + |
| 50 | + function onFileDrop(file: File) { |
| 51 | + // Clear query string |
| 52 | + history.pushState({}, '', location.pathname) |
| 53 | + setAsyncBuffer(file.name, { file, byteLength: file.size }).catch(setUnknownError) |
| 54 | + } |
| 55 | + |
| 56 | + async function setAsyncBuffer(name: string, from: AsyncBufferFrom) { |
| 57 | + // TODO: Replace welcome with spinner |
| 58 | + const asyncBuffer = await asyncBufferFrom(from) |
| 59 | + const metadata = await parquetMetadataAsync(asyncBuffer) |
| 60 | + setMetadata(metadata) |
| 61 | + setName(name) |
| 62 | + setByteLength(from.byteLength) |
| 63 | + let df = parquetDataFrame(from, metadata) |
| 64 | + df = rowCache(df) |
| 65 | + setDf(df) |
| 66 | + document.getElementById('welcome')?.remove() |
| 67 | + } |
| 68 | + |
| 69 | + return <Layout error={error}> |
| 70 | + <Dropzone |
| 71 | + onError={(e) => { setError(e) }} |
| 72 | + onFileDrop={onFileDrop} |
| 73 | + onUrlDrop={onUrlDrop}> |
| 74 | + {metadata && df && <> |
| 75 | + <div className='top-header'>{name}</div> |
| 76 | + <div className='view-header'> |
| 77 | + {byteLength !== undefined && <span title={byteLength.toLocaleString() + ' bytes'}>{formatFileSize(byteLength)}</span>} |
| 78 | + <span>{df.numRows.toLocaleString()} rows</span> |
| 79 | + <Dropdown label={lens}> |
| 80 | + <button onClick={() => { setLens('table') }}>Table</button> |
| 81 | + <button onClick={() => { setLens('metadata') }}>Metadata</button> |
| 82 | + {byteLength && <button onClick={() => { setLens('layout') }}>Layout</button>} |
| 83 | + </Dropdown> |
| 84 | + </div> |
| 85 | + {lens === 'table' && <HighTable cacheKey={name} data={df} onError={setError} />} |
| 86 | + {lens === 'metadata' && <ParquetMetadata metadata={metadata} />} |
| 87 | + {lens === 'layout' && byteLength && <ParquetLayout byteLength={byteLength} metadata={metadata} />} |
| 88 | + </>} |
| 89 | + </Dropzone> |
| 90 | + </Layout> |
| 91 | +} |
| 92 | + |
| 93 | +/** |
| 94 | + * Convert a parquet file into a dataframe. |
| 95 | + */ |
| 96 | +function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData): DataFrame { |
| 97 | + const { children } = parquetSchema(metadata) |
| 98 | + return { |
| 99 | + header: children.map(child => child.element.name), |
| 100 | + numRows: Number(metadata.num_rows), |
| 101 | + /** |
| 102 | + * @param {number} rowStart |
| 103 | + * @param {number} rowEnd |
| 104 | + * @param {string} orderBy |
| 105 | + * @returns {Promise<any[][]>} |
| 106 | + */ |
| 107 | + rows(rowStart: number, rowEnd: number, orderBy: string): Promise<Row[]> { |
| 108 | + console.log(`reading rows ${rowStart}-${rowEnd}`, orderBy) |
| 109 | + return parquetQueryWorker({ from, metadata, rowStart, rowEnd, orderBy }) |
| 110 | + }, |
| 111 | + sortable: true, |
| 112 | + } |
| 113 | +} |
| 114 | + |
| 115 | +/** |
| 116 | + * Returns the file size in human readable format. |
| 117 | + * |
| 118 | + * @param {number} bytes file size in bytes |
| 119 | + * @returns {string} formatted file size string |
| 120 | + */ |
| 121 | +function formatFileSize(bytes: number): string { |
| 122 | + const sizes = ['b', 'kb', 'mb', 'gb', 'tb'] |
| 123 | + if (bytes === 0) return '0 b' |
| 124 | + const i = Math.floor(Math.log2(bytes) / 10) |
| 125 | + if (i === 0) return `${bytes} b` |
| 126 | + const base = bytes / Math.pow(1024, i) |
| 127 | + return `${base < 10 ? base.toFixed(1) : Math.round(base)} ${sizes[i]}` |
| 128 | +} |
0 commit comments