Skip to content

Commit 9336931

Browse files
committed
demo for hyparquet
1 parent 2253659 commit 9336931

32 files changed

+1558
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ It contains the following package:
88
It also contains the following applications:
99
- [`hyperparam`](./apps/cli): a cli tool for viewing arbitrarily large datasets in the browser.
1010
- [`hightable-demo`](./apps/hightable-demo): an example project showing how to use [hightable](https://github.com/hyparam/hightable).
11+
- [`hyparquet-demo`](./apps/hyparquet-demo): an example project showing how to use [hyparquet](https://github.com/hyparam/hyparquet).

apps/hyparquet-demo/.gitignore

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Logs
2+
logs
3+
*.log
4+
npm-debug.log*
5+
yarn-debug.log*
6+
yarn-error.log*
7+
pnpm-debug.log*
8+
lerna-debug.log*
9+
10+
node_modules
11+
dist
12+
dist-ssr
13+
*.local
14+
15+
# Editor directories and files
16+
.vscode/*
17+
!.vscode/extensions.json
18+
.idea
19+
.DS_Store
20+
*.suo
21+
*.ntvs*
22+
*.njsproj
23+
*.sln
24+
*.sw?

apps/hyparquet-demo/README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# HighTable demo
2+
3+
This is an example project showing how to use [hightable](https://github.com/hyparam/hightable).
4+
5+
## Build
6+
7+
```bash
8+
cd apps/hightable-demo
9+
npm i
10+
npm run build
11+
```
12+
13+
The build artifacts will be stored in the `dist/` directory and can be served using any static server, eg. `http-server`:
14+
15+
```bash
16+
npm i -g http-server
17+
http-server dist/
18+
```

apps/hyparquet-demo/TODO.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
- the rows are not resolved from URL or File
2+
- move the index.html to a component?
3+
- move the common code from this demo and from components to a new package?
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import js from '@eslint/js'
2+
import react from 'eslint-plugin-react'
3+
import reactHooks from 'eslint-plugin-react-hooks'
4+
import reactRefresh from 'eslint-plugin-react-refresh'
5+
import globals from 'globals'
6+
import tseslint from 'typescript-eslint'
7+
import { sharedJsRules, sharedTsRules } from '../../shared.eslint.config.js'
8+
9+
export default tseslint.config(
10+
{ ignores: ['dist'] },
11+
{
12+
extends: [js.configs.recommended, ...tseslint.configs.strictTypeChecked, ...tseslint.configs.stylisticTypeChecked],
13+
// Set the react version
14+
settings: { react: { version: '18.3' } },
15+
files: ['src/**/*.{ts,tsx}'],
16+
languageOptions: {
17+
ecmaVersion: 2020,
18+
globals: globals.browser,
19+
parserOptions: {
20+
project: './tsconfig.json',
21+
tsconfigRootDir: import.meta.dirname,
22+
},
23+
},
24+
plugins: {
25+
react,
26+
'react-hooks': reactHooks,
27+
'react-refresh': reactRefresh,
28+
},
29+
rules: {
30+
...react.configs.recommended.rules,
31+
...react.configs['jsx-runtime'].rules,
32+
...reactHooks.configs.recommended.rules,
33+
'react-refresh/only-export-components': [
34+
'warn',
35+
{ allowConstantExport: true },
36+
],
37+
...js.configs.recommended.rules,
38+
...tseslint.configs.recommended.rules,
39+
...sharedJsRules,
40+
...sharedTsRules,
41+
},
42+
},
43+
)

apps/hyparquet-demo/index.html

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>hyparquet parquet file parser demo</title>
6+
<link rel="icon" href="favicon.png" />
7+
<!-- <link rel="stylesheet" href="demo/demo.css"> -->
8+
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Mulish:wght@400;600&display=swap"/>
9+
<meta name="description" content="Online demo of hyparquet: a parser for apache parquet files. Drag and drop parquet files to view parquet data.">
10+
<meta name="author" content="Hyperparam">
11+
<meta name="keywords" content="hyparquet, parquet, parquet file, parquet parser, parquet reader, parquet viewer, parquet data, apache parquet, hightable">
12+
<meta name="viewport" content="width=device-width, initial-scale=1" />
13+
</head>
14+
<body>
15+
<nav>
16+
<a class="brand" href='https://hyparam.github.io/hyparquet/'>
17+
hyparquet
18+
</a>
19+
</nav>
20+
<main id="content">
21+
<div id="app"></div>
22+
<div id="welcome">
23+
<h1>hyparquet</h1>
24+
<sub>
25+
/haɪ pɑːrˈkeɪ/
26+
<img src="demo/assets/audio.svg" alt="play hyparquet pronunciation" height="18" width="18" onclick="audio.play()">
27+
</sub>
28+
<audio id="audio" src="demo/assets/hyparquet.mp3"></audio>
29+
<h2>in-browser parquet file reader</h2>
30+
<p>
31+
<a href="https://www.npmjs.com/package/hyparquet"><img src="https://img.shields.io/npm/v/hyparquet" alt="npm hyparquet"></a>
32+
<a href="https://github.com/hyparam/hyparquet"><img src="https://img.shields.io/github/stars/hyparam/hyparquet?style=social" alt="star hyparquet"></a>
33+
</p>
34+
<p>
35+
Online demo of <a href="https://github.com/hyparam/hyparquet">hyparquet</a>: a parser for apache parquet files.
36+
Uses <a href="https://github.com/hyparam/hightable">hightable</a> for high performance windowed table viewing.
37+
</p>
38+
<p>
39+
Drag and drop a parquet file (or url) to see your parquet data. 👀
40+
</p>
41+
<p>
42+
Example files:
43+
<ul class="quick-links">
44+
<li>
45+
<a
46+
class="aws"
47+
href="?key=https://hyperparam-public.s3.amazonaws.com/wiki-en-00000-of-00041.parquet">
48+
s3://wiki-en-00000-of-00041.parquet
49+
</a>
50+
</li>
51+
<li>
52+
<a
53+
class="azure"
54+
href="?key=https://hyperparam.blob.core.windows.net/hyperparam/starcoderdata-js-00000-of-00065.parquet">
55+
azure://starcoderdata-js-00000-of-00065.parquet
56+
</a>
57+
</li>
58+
<li>
59+
<a
60+
class="huggingface"
61+
href="?key=https://huggingface.co/datasets/codeparrot/github-code/resolve/main/data/train-00000-of-01126.parquet?download=true">
62+
huggingface://github-code-00000-of-01126.parquet
63+
</a>
64+
</li>
65+
<li>
66+
<a
67+
class="github"
68+
href="?key=https://raw.githubusercontent.com/hyparam/hyparquet/master/test/files/rowgroups.parquet">
69+
github://rowgroups.parquet
70+
</a>
71+
</li>
72+
</ul>
73+
</p>
74+
</div>
75+
</main>
76+
<input id="file-input" type="file">
77+
78+
<script type="module" src="/src/main.tsx"></script>
79+
</body>
80+
</html>

apps/hyparquet-demo/package.json

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
{
2+
"name": "hyparquet-demo",
3+
"private": true,
4+
"version": "0.0.0",
5+
"type": "module",
6+
"scripts": {
7+
"dev": "vite",
8+
"build": "tsc -b && vite build",
9+
"lint": "eslint .",
10+
"preview": "vite preview"
11+
},
12+
"dependencies": {
13+
"hyparquet": "1.5.0",
14+
"hightable": "0.7.0",
15+
"react": "^18.3.1",
16+
"react-dom": "^18.3.1"
17+
},
18+
"devDependencies": {
19+
"@eslint/js": "^9.13.0",
20+
"@types/react": "^18.3.12",
21+
"@types/react-dom": "^18.3.1",
22+
"@vitejs/plugin-react": "^4.3.3",
23+
"eslint": "^9.13.0",
24+
"eslint-plugin-react": "^7.37.2",
25+
"eslint-plugin-react-hooks": "^5.0.0",
26+
"eslint-plugin-react-refresh": "^0.4.14",
27+
"globals": "^15.11.0",
28+
"typescript": "~5.6.2",
29+
"typescript-eslint": "^8.11.0",
30+
"vite": "^5.4.10"
31+
}
32+
}
1.04 KB
Loading

apps/hyparquet-demo/src/App.tsx

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
import HighTable, { DataFrame, rowCache } from 'hightable'
2+
import { FileMetaData, byteLengthFromUrl, parquetMetadataAsync, parquetSchema } from 'hyparquet'
3+
import { ReactNode, useCallback, useEffect, useState } from 'react'
4+
import Dropdown from './Dropdown.js'
5+
import Dropzone from './Dropzone.js'
6+
import Layout from './Layout.js'
7+
import ParquetLayout from './ParquetLayout.js'
8+
import ParquetMetadata from './ParquetMetadata.js'
9+
import { asyncBufferFrom } from './utils.js'
10+
import { parquetQueryWorker } from './workers/parquetWorkerClient.js'
11+
import { AsyncBufferFrom, Row } from './workers/types.js'
12+
13+
type Lens = 'table' | 'metadata' | 'layout'
14+
15+
/**
16+
* Hyparquet demo viewer page
17+
* @param {Object} props
18+
* @param {string} [props.url]
19+
* @returns {ReactNode}
20+
*/
21+
export default function App({ url }: { url?: string }): ReactNode {
22+
const [error, setError] = useState<Error>()
23+
const [df, setDf] = useState<DataFrame>()
24+
const [name, setName] = useState<string>()
25+
const [lens, setLens] = useState<Lens>('table')
26+
const [metadata, setMetadata] = useState<FileMetaData>()
27+
const [byteLength, setByteLength] = useState<number>()
28+
29+
const setUnknownError = useCallback((e: unknown) => {
30+
setError(e instanceof Error ? e : new Error(String(e)))
31+
}, [])
32+
33+
const onUrlDrop = useCallback(
34+
(url: string) => {
35+
// Add key=url to query string
36+
const params = new URLSearchParams(location.search)
37+
params.set('key', url)
38+
history.pushState({}, '', `${location.pathname}?${params}`)
39+
byteLengthFromUrl(url).then(byteLength => setAsyncBuffer(url, { url, byteLength })).catch(setUnknownError)
40+
},
41+
[setUnknownError],
42+
)
43+
44+
useEffect(() => {
45+
if (!df && url) {
46+
onUrlDrop(url)
47+
}
48+
}, [ url, df, onUrlDrop])
49+
50+
function onFileDrop(file: File) {
51+
// Clear query string
52+
history.pushState({}, '', location.pathname)
53+
setAsyncBuffer(file.name, { file, byteLength: file.size }).catch(setUnknownError)
54+
}
55+
56+
async function setAsyncBuffer(name: string, from: AsyncBufferFrom) {
57+
// TODO: Replace welcome with spinner
58+
const asyncBuffer = await asyncBufferFrom(from)
59+
const metadata = await parquetMetadataAsync(asyncBuffer)
60+
setMetadata(metadata)
61+
setName(name)
62+
setByteLength(from.byteLength)
63+
let df = parquetDataFrame(from, metadata)
64+
df = rowCache(df)
65+
setDf(df)
66+
document.getElementById('welcome')?.remove()
67+
}
68+
69+
return <Layout error={error}>
70+
<Dropzone
71+
onError={(e) => { setError(e) }}
72+
onFileDrop={onFileDrop}
73+
onUrlDrop={onUrlDrop}>
74+
{metadata && df && <>
75+
<div className='top-header'>{name}</div>
76+
<div className='view-header'>
77+
{byteLength !== undefined && <span title={byteLength.toLocaleString() + ' bytes'}>{formatFileSize(byteLength)}</span>}
78+
<span>{df.numRows.toLocaleString()} rows</span>
79+
<Dropdown label={lens}>
80+
<button onClick={() => { setLens('table') }}>Table</button>
81+
<button onClick={() => { setLens('metadata') }}>Metadata</button>
82+
{byteLength && <button onClick={() => { setLens('layout') }}>Layout</button>}
83+
</Dropdown>
84+
</div>
85+
{lens === 'table' && <HighTable cacheKey={name} data={df} onError={setError} />}
86+
{lens === 'metadata' && <ParquetMetadata metadata={metadata} />}
87+
{lens === 'layout' && byteLength && <ParquetLayout byteLength={byteLength} metadata={metadata} />}
88+
</>}
89+
</Dropzone>
90+
</Layout>
91+
}
92+
93+
/**
94+
* Convert a parquet file into a dataframe.
95+
*/
96+
function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData): DataFrame {
97+
const { children } = parquetSchema(metadata)
98+
return {
99+
header: children.map(child => child.element.name),
100+
numRows: Number(metadata.num_rows),
101+
/**
102+
* @param {number} rowStart
103+
* @param {number} rowEnd
104+
* @param {string} orderBy
105+
* @returns {Promise<any[][]>}
106+
*/
107+
rows(rowStart: number, rowEnd: number, orderBy: string): Promise<Row[]> {
108+
console.log(`reading rows ${rowStart}-${rowEnd}`, orderBy)
109+
return parquetQueryWorker({ from, metadata, rowStart, rowEnd, orderBy })
110+
},
111+
sortable: true,
112+
}
113+
}
114+
115+
/**
116+
* Returns the file size in human readable format.
117+
*
118+
* @param {number} bytes file size in bytes
119+
* @returns {string} formatted file size string
120+
*/
121+
function formatFileSize(bytes: number): string {
122+
const sizes = ['b', 'kb', 'mb', 'gb', 'tb']
123+
if (bytes === 0) return '0 b'
124+
const i = Math.floor(Math.log2(bytes) / 10)
125+
if (i === 0) return `${bytes} b`
126+
const base = bytes / Math.pow(1024, i)
127+
return `${base < 10 ? base.toFixed(1) : Math.round(base)} ${sizes[i]}`
128+
}

0 commit comments

Comments
 (0)