From cc6d5ec647f5aec7ab4e7898b4a440dfd1a69275 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 16:23:31 +0000 Subject: [PATCH 1/3] Move CLAUDE.md from parser/ to repository root This makes the development guide more discoverable at the top level of the repository. --- parser/CLAUDE.md => CLAUDE.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename parser/CLAUDE.md => CLAUDE.md (100%) diff --git a/parser/CLAUDE.md b/CLAUDE.md similarity index 100% rename from parser/CLAUDE.md rename to CLAUDE.md From 9b895276f390bfdca4e342f08bb4b156be1bcb92 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 16:33:27 +0000 Subject: [PATCH 2/3] Expand CLAUDE.md with comprehensive development guide Add detailed documentation including: - Next test workflow using cmd/next-test - Step-by-step implementation workflow - Test structure and flags documentation - TsqlAstParser setup and usage instructions - Important rules about not modifying ast.json files --- CLAUDE.md | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 99 insertions(+), 5 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 7ae455024..c2a7e4237 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,4 +1,26 @@ -# Parser Development Notes +# Claude Development Guide + +## Next Steps + +To find the next test to work on, run: + +```bash +go run ./cmd/next-test +``` + +This tool finds all tests with `todo: true` in their metadata and returns the one with the shortest `query.sql` file. + +## Workflow + +1. Run `go run ./cmd/next-test` to find the next test to implement +2. Check the test's `query.sql` to understand what SQL needs parsing +3. Check the test's `ast.json` to understand the expected output format +4. Implement the necessary AST types in `ast/` +5. Add parser logic in `parser/parser.go` +6. Add JSON marshaling functions in `parser/parser.go` +7. Enable the test by removing `todo: true` from its `metadata.json` (set it to `{}`) +8. Run `go test ./parser/... -timeout 5s` to verify +9. Check if other todo tests now pass (see below) ## Running Tests @@ -10,12 +32,84 @@ go test ./parser/... -timeout 5s The tests are very fast. If a test is timing out, it indicates a bug (likely an infinite loop in the parser). -## Checking Skipped Tests +## Checking for Newly Passing Todo Tests -After fixing parser issues, check if any skipped tests now pass: +After implementing parser changes, run: ```bash -go test ./parser -check-skipped -v 2>&1 | grep "PASSES NOW" +go test ./parser/... -only-todo -v 2>&1 | grep "PASS:" ``` -Tests that output `PASSES NOW` can have their `todo` flag removed from `metadata.json`. This helps identify when parser improvements fix multiple tests at once. +This shows any todo tests that now pass. Enable those tests by removing `todo: true` from their `metadata.json`. + +Available test flags: + +- `-only-todo` - Run only todo/invalid_syntax tests (find newly passing tests) +- `-run-todo` - Run todo/invalid_syntax tests along with normal tests + +## Test Structure + +Each test in `parser/testdata/` contains: + +- `metadata.json` - `{}` for enabled tests, `{"todo": true}` for pending tests, or `{"invalid_syntax": true}` for tests with invalid SQL +- `query.sql` - T-SQL to parse +- `ast.json` - Expected AST output + +## Important Rules + +**NEVER modify `ast.json` files** - These are golden files containing the expected output. If tests fail due to JSON mismatches, fix the Go code to match the expected output, not the other way around. + +## Generating ast.json with TsqlAstParser + +The `TsqlAstParser/` directory contains a C# tool that generates `ast.json` files using Microsoft's official T-SQL parser (ScriptDom). + +### Prerequisites + +1. Install .NET 8.0 SDK: + +```bash +curl -sSL https://dot.net/v1/dotnet-install.sh | bash /dev/stdin --channel 8.0 --install-dir ~/.dotnet +``` + +2. Download the NuGet package (if `packages/` directory is empty): + +```bash +mkdir -p packages +curl -L -o packages/microsoft.sqlserver.transactsql.scriptdom.170.128.0.nupkg \ + "https://api.nuget.org/v3-flatcontainer/microsoft.sqlserver.transactsql.scriptdom/170.128.0/microsoft.sqlserver.transactsql.scriptdom.170.128.0.nupkg" +``` + +3. Build the tool: + +```bash +~/.dotnet/dotnet build TsqlAstParser -c Release +``` + +### Usage + +Generate `ast.json` for a single test: + +```bash +~/.dotnet/dotnet run --project TsqlAstParser -c Release -- parser/testdata/TestName/query.sql parser/testdata/TestName/ast.json +``` + +Generate `ast.json` for all tests missing it: + +```bash +for dir in parser/testdata/*/; do + if [ -f "$dir/query.sql" ] && [ ! -f "$dir/ast.json" ]; then + ~/.dotnet/dotnet run --project TsqlAstParser -c Release -- "$dir/query.sql" "$dir/ast.json" + fi +done +``` + +### Limitations + +TsqlAstParser uses TSql160Parser (SQL Server 2022) and cannot parse: + +- SQL Server 170+ features (VECTOR indexes, AI functions, JSON enhancements) +- Fabric DW-specific syntax (CLONE TABLE, CLUSTER BY) +- Deprecated syntax removed in newer versions +- Intentionally invalid SQL (error test cases) + +Tests for unsupported syntax will not have `ast.json` files generated. From 1fa36f5f8a212a6e77adb97f08ff74542e1302c9 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Dec 2025 16:37:43 +0000 Subject: [PATCH 3/3] Add next-test command and adapt CLAUDE.md for this repo - Create cmd/next-test to find the next todo test (shortest query first) - Rewrite CLAUDE.md to match ClickHouse parser workflow: - Tests use explain.txt (not ast.json) - Remove TsqlAstParser section (not applicable) - Document metadata options --- CLAUDE.md | 83 ++++++---------------------------- cmd/next-test/main.go | 103 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 68 deletions(-) create mode 100644 cmd/next-test/main.go diff --git a/CLAUDE.md b/CLAUDE.md index c2a7e4237..eb678d6df 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -13,11 +13,11 @@ This tool finds all tests with `todo: true` in their metadata and returns the on ## Workflow 1. Run `go run ./cmd/next-test` to find the next test to implement -2. Check the test's `query.sql` to understand what SQL needs parsing -3. Check the test's `ast.json` to understand the expected output format +2. Check the test's `query.sql` to understand what ClickHouse SQL needs parsing +3. Check the test's `explain.txt` to understand the expected EXPLAIN output 4. Implement the necessary AST types in `ast/` 5. Add parser logic in `parser/parser.go` -6. Add JSON marshaling functions in `parser/parser.go` +6. Update the `Explain()` function if needed to match ClickHouse's output format 7. Enable the test by removing `todo: true` from its `metadata.json` (set it to `{}`) 8. Run `go test ./parser/... -timeout 5s` to verify 9. Check if other todo tests now pass (see below) @@ -37,79 +37,26 @@ The tests are very fast. If a test is timing out, it indicates a bug (likely an After implementing parser changes, run: ```bash -go test ./parser/... -only-todo -v 2>&1 | grep "PASS:" +go test ./parser/... -check-skipped -v 2>&1 | grep "PASSES NOW" ``` -This shows any todo tests that now pass. Enable those tests by removing `todo: true` from their `metadata.json`. - -Available test flags: - -- `-only-todo` - Run only todo/invalid_syntax tests (find newly passing tests) -- `-run-todo` - Run todo/invalid_syntax tests along with normal tests +Tests that output `PASSES NOW` can have their `todo` flag removed from `metadata.json`. This helps identify when parser improvements fix multiple tests at once. ## Test Structure Each test in `parser/testdata/` contains: -- `metadata.json` - `{}` for enabled tests, `{"todo": true}` for pending tests, or `{"invalid_syntax": true}` for tests with invalid SQL -- `query.sql` - T-SQL to parse -- `ast.json` - Expected AST output - -## Important Rules - -**NEVER modify `ast.json` files** - These are golden files containing the expected output. If tests fail due to JSON mismatches, fix the Go code to match the expected output, not the other way around. - -## Generating ast.json with TsqlAstParser - -The `TsqlAstParser/` directory contains a C# tool that generates `ast.json` files using Microsoft's official T-SQL parser (ScriptDom). - -### Prerequisites - -1. Install .NET 8.0 SDK: - -```bash -curl -sSL https://dot.net/v1/dotnet-install.sh | bash /dev/stdin --channel 8.0 --install-dir ~/.dotnet -``` - -2. Download the NuGet package (if `packages/` directory is empty): - -```bash -mkdir -p packages -curl -L -o packages/microsoft.sqlserver.transactsql.scriptdom.170.128.0.nupkg \ - "https://api.nuget.org/v3-flatcontainer/microsoft.sqlserver.transactsql.scriptdom/170.128.0/microsoft.sqlserver.transactsql.scriptdom.170.128.0.nupkg" -``` - -3. Build the tool: - -```bash -~/.dotnet/dotnet build TsqlAstParser -c Release -``` - -### Usage +- `metadata.json` - `{}` for enabled tests, `{"todo": true}` for pending tests +- `query.sql` - ClickHouse SQL to parse +- `explain.txt` - Expected EXPLAIN AST output (matches ClickHouse's format) -Generate `ast.json` for a single test: +### Metadata Options -```bash -~/.dotnet/dotnet run --project TsqlAstParser -c Release -- parser/testdata/TestName/query.sql parser/testdata/TestName/ast.json -``` +- `todo: true` - Test is pending implementation +- `skip: true` - Skip test entirely (e.g., causes infinite loop) +- `explain: false` - Skip test (e.g., ClickHouse couldn't parse it) +- `parse_error: true` - Query is intentionally invalid SQL -Generate `ast.json` for all tests missing it: - -```bash -for dir in parser/testdata/*/; do - if [ -f "$dir/query.sql" ] && [ ! -f "$dir/ast.json" ]; then - ~/.dotnet/dotnet run --project TsqlAstParser -c Release -- "$dir/query.sql" "$dir/ast.json" - fi -done -``` - -### Limitations - -TsqlAstParser uses TSql160Parser (SQL Server 2022) and cannot parse: - -- SQL Server 170+ features (VECTOR indexes, AI functions, JSON enhancements) -- Fabric DW-specific syntax (CLONE TABLE, CLUSTER BY) -- Deprecated syntax removed in newer versions -- Intentionally invalid SQL (error test cases) +## Important Rules -Tests for unsupported syntax will not have `ast.json` files generated. +**NEVER modify `explain.txt` files** - These are golden files containing the expected output from ClickHouse. If tests fail due to output mismatches, fix the Go code to match the expected output, not the other way around. diff --git a/cmd/next-test/main.go b/cmd/next-test/main.go new file mode 100644 index 000000000..e9c830276 --- /dev/null +++ b/cmd/next-test/main.go @@ -0,0 +1,103 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" +) + +type testMetadata struct { + Todo bool `json:"todo,omitempty"` + Explain *bool `json:"explain,omitempty"` + Skip bool `json:"skip,omitempty"` + ParseError bool `json:"parse_error,omitempty"` +} + +type todoTest struct { + name string + querySize int +} + +func main() { + testdataDir := "parser/testdata" + entries, err := os.ReadDir(testdataDir) + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading testdata: %v\n", err) + os.Exit(1) + } + + var todoTests []todoTest + + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + testDir := filepath.Join(testdataDir, entry.Name()) + metadataPath := filepath.Join(testDir, "metadata.json") + + // Read metadata + metadataBytes, err := os.ReadFile(metadataPath) + if err != nil { + continue + } + + var metadata testMetadata + if err := json.Unmarshal(metadataBytes, &metadata); err != nil { + continue + } + + // Only include tests marked as todo + if !metadata.Todo { + continue + } + + // Skip tests with skip or explain=false or parse_error + if metadata.Skip || (metadata.Explain != nil && !*metadata.Explain) || metadata.ParseError { + continue + } + + // Read query to get its size + queryPath := filepath.Join(testDir, "query.sql") + queryBytes, err := os.ReadFile(queryPath) + if err != nil { + continue + } + + todoTests = append(todoTests, todoTest{ + name: entry.Name(), + querySize: len(queryBytes), + }) + } + + if len(todoTests) == 0 { + fmt.Println("No todo tests found!") + return + } + + // Sort by query size (shortest first) + sort.Slice(todoTests, func(i, j int) bool { + return todoTests[i].querySize < todoTests[j].querySize + }) + + // Print the shortest one + next := todoTests[0] + testDir := filepath.Join(testdataDir, next.name) + + fmt.Printf("Next test: %s\n\n", next.name) + + // Print query.sql contents + queryPath := filepath.Join(testDir, "query.sql") + queryBytes, _ := os.ReadFile(queryPath) + fmt.Printf("Query (%d bytes):\n%s\n", next.querySize, string(queryBytes)) + + // Print explain.txt contents if it exists + explainPath := filepath.Join(testDir, "explain.txt") + if explainBytes, err := os.ReadFile(explainPath); err == nil { + fmt.Printf("\nExpected EXPLAIN output:\n%s\n", string(explainBytes)) + } + + fmt.Printf("\nRemaining todo tests: %d\n", len(todoTests)) +}