From 410c50de84388a76a457f4ee7a8663585b975ea6 Mon Sep 17 00:00:00 2001 From: Shoubhit Dash Date: Sat, 20 Dec 2025 22:59:31 +0530 Subject: [PATCH] add docs for code-chunk/wasm --- packages/code-chunk/README.md | 111 ++++++++++++++++++++++++++ packages/code-chunk/src/wasm.ts | 37 ++++----- packages/code-chunk/test/wasm.test.ts | 6 +- 3 files changed, 131 insertions(+), 23 deletions(-) diff --git a/packages/code-chunk/README.md b/packages/code-chunk/README.md index 2919d23..c66fa14 100644 --- a/packages/code-chunk/README.md +++ b/packages/code-chunk/README.md @@ -10,6 +10,7 @@ Uses tree-sitter to split source code at semantic boundaries (functions, classes - [How It Works](#how-it-works) - [Installation](#installation) - [Quickstart](#quickstart) +- [Edge Runtimes (WASM)](#edge-runtimes-wasm) - [API Reference](#api-reference) - [License](#license) @@ -21,6 +22,7 @@ Uses tree-sitter to split source code at semantic boundaries (functions, classes - **Multi-language**: TypeScript, JavaScript, Python, Rust, Go, Java - **Streaming**: Process large files incrementally - **Effect support**: First-class Effect integration +- **Edge-ready**: Works in Cloudflare Workers and other edge runtimes via WASM ## How It Works @@ -159,6 +161,81 @@ const program = Stream.runForEach( await Effect.runPromise(program) ``` +## Edge Runtimes (WASM) + +The default entry point uses Node.js APIs to load tree-sitter WASM files from the filesystem. For edge runtimes, use the `code-chunk/wasm` entry point which accepts pre-loaded WASM binaries. + +### Cloudflare Workers + +```typescript +import { createChunker } from 'code-chunk/wasm' + +import treeSitterWasm from 'web-tree-sitter/tree-sitter.wasm' +import typescriptWasm from 'tree-sitter-typescript/tree-sitter-typescript.wasm' +import javascriptWasm from 'tree-sitter-javascript/tree-sitter-javascript.wasm' + +export default { + async fetch(request: Request): Promise { + const chunker = await createChunker({ + treeSitter: treeSitterWasm, + languages: { + typescript: typescriptWasm, + javascript: javascriptWasm, + }, + }) + + const code = await request.text() + const chunks = await chunker.chunk('input.ts', code) + + return Response.json(chunks) + }, +} +``` + +### WasmConfig + +The `createChunker` function from `code-chunk/wasm` accepts a `WasmConfig` object: + +```typescript +interface WasmConfig { + treeSitter: WasmBinary + languages: Partial> +} + +type WasmBinary = Uint8Array | ArrayBuffer | Response | string +``` + +- `treeSitter`: The `web-tree-sitter` runtime WASM binary +- `languages`: Map of language names to their grammar WASM binaries + +Only include the languages you need to minimize bundle size. + +### WASM Errors + +The WASM entry point throws specific errors: + +- **`WasmParserError`**: Parser initialization or parsing failed +- **`WasmGrammarError`**: No WASM binary provided for requested language +- **`WasmChunkingError`**: Chunking process failed +- **`UnsupportedLanguageError`**: File extension not recognized + +```typescript +import { + WasmParserError, + WasmGrammarError, + WasmChunkingError, + UnsupportedLanguageError +} from 'code-chunk/wasm' + +try { + const chunks = await chunker.chunk('input.ts', code) +} catch (error) { + if (error instanceof WasmGrammarError) { + console.error(`Language not loaded: ${error.language}`) + } +} +``` + ## API Reference ### `chunk(filepath, code, options?)` @@ -202,6 +279,40 @@ Create a reusable chunker instance with default options. --- +### `createChunker(config, options?)` (WASM) + +Create a chunker for edge runtimes with pre-loaded WASM binaries. + +```typescript +import { createChunker } from 'code-chunk/wasm' +``` + +**Parameters:** +- `config`: `WasmConfig` with `treeSitter` and `languages` WASM binaries +- `options`: Optional `ChunkOptions` + +**Returns:** `Promise` + +**Throws:** `WasmParserError`, `WasmGrammarError`, `WasmChunkingError`, `UnsupportedLanguageError` + +--- + +### `WasmParser` + +Low-level parser class for edge runtimes. Use this when you need direct access to parsing without chunking. + +```typescript +import { WasmParser } from 'code-chunk/wasm' + +const parser = new WasmParser(config) +await parser.init() + +const result = await parser.parse(code, 'typescript') +console.log(result.tree.rootNode) +``` + +--- + ### `formatChunkWithContext(text, context, overlapText?)` Format chunk text with semantic context prepended. Useful for custom embedding pipelines. diff --git a/packages/code-chunk/src/wasm.ts b/packages/code-chunk/src/wasm.ts index fa2865a..07ff882 100644 --- a/packages/code-chunk/src/wasm.ts +++ b/packages/code-chunk/src/wasm.ts @@ -1,29 +1,35 @@ import { Effect } from 'effect' - -import type { - Chunk, - Chunker, - ChunkOptions, - Language, - WasmConfig, -} from './types' - import { chunk as chunkInternal, DEFAULT_CHUNK_OPTIONS, streamChunks as streamChunksInternal, } from './chunking' import { extractEntities } from './extract' -import { WasmParser } from './parser/wasm' import { detectLanguage } from './parser/languages' +import { WasmParser } from './parser/wasm' import { buildScopeTree } from './scope' +import type { + Chunk, + Chunker, + ChunkOptions, + Language, + WasmConfig, +} from './types' +export { formatChunkWithContext } from './context/format' +export { detectLanguage, LANGUAGE_EXTENSIONS } from './parser/languages' +export { + createWasmParser, + WasmGrammarError, + WasmParser, + WasmParserError, +} from './parser/wasm' export type { Chunk, ChunkContext, ChunkEntityInfo, - ChunkOptions, Chunker, + ChunkOptions, EntityInfo, EntityType, ImportInfo, @@ -34,15 +40,6 @@ export type { WasmConfig, } from './types' -export { formatChunkWithContext } from './context/format' -export { - WasmGrammarError, - WasmParser, - WasmParserError, - createWasmParser, -} from './parser/wasm' -export { detectLanguage, LANGUAGE_EXTENSIONS } from './parser/languages' - export class WasmChunkingError extends Error { readonly _tag = 'WasmChunkingError' override readonly cause?: unknown diff --git a/packages/code-chunk/test/wasm.test.ts b/packages/code-chunk/test/wasm.test.ts index 53a5123..8ae8f9f 100644 --- a/packages/code-chunk/test/wasm.test.ts +++ b/packages/code-chunk/test/wasm.test.ts @@ -1,16 +1,16 @@ +import { describe, expect, test } from 'bun:test' import { readFile } from 'node:fs/promises' import { resolve } from 'node:path' -import { describe, expect, test } from 'bun:test' import type { WasmConfig } from '../src/types' import { + createChunker, + UnsupportedLanguageError, WasmChunkingError, WasmGrammarError, WasmParser, WasmParserError, - createChunker, - UnsupportedLanguageError, } from '../src/wasm' async function loadWasmBinary(packagePath: string): Promise {