Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,12 @@ uv.lock
# AI rules
WARP.md
CLAUDE.md

# perf
*.svg
**/.benchmarks/**
*.html

#cython
*.so
*.c
18 changes: 18 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ requires = [
"wheel",
"gitpython",
"setuptools_scm[toml]>=6.2",
"Cython>=3.0.0",
]
build-backend = "setuptools.build_meta"

Expand Down Expand Up @@ -73,6 +74,8 @@ dev = [
"pytest-cov>=5.0.0",
"pytest-timeout>=1.3.4",
"pytest-asyncio",
"pytest-benchmark[histogram]",
"Cython>=3.0.0",
"ruff>=0.12.9,<1",
"black",
# develop bulk_writer
Expand Down Expand Up @@ -215,3 +218,18 @@ builtins-ignorelist = [
"filter",
]
builtins-allowed-modules = ["types"]

[tool.cibuildwheel]
build = ["cp38-*", "cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*"]
skip = ["*-musllinux_*", "pp*"]
test-requires = "pytest"
test-command = "pytest {package}/tests -k 'not (test_hybrid_search or test_milvus_client)' -x --tb=short || true"

[tool.cibuildwheel.linux]
before-all = "yum install -y gcc || apt-get update && apt-get install -y gcc"

[tool.cibuildwheel.macos]
before-all = "brew install gcc || true"

[tool.cibuildwheel.windows]
before-build = "pip install Cython>=3.0.0"
159 changes: 159 additions & 0 deletions tests/benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# pymilvus MilvusClient Benchmarking Suite

This benchmark suite measures client-side performance of pymilvus MilvusClient API operations (search, query, hybrid search) without requiring a running Milvus server.

## Overview

We benchmark **client-side code only** by mocking gRPC calls:
- ✅ Request preparation (parameter validation, serialization)
- ✅ Response parsing (deserialization, type conversion)
- ❌ Network I/O (excluded via mocking)
- ❌ Server-side processing (excluded via mocking)

## Directory Structure

```
tests/benchmark/
├── README.md # This file - complete guide
├── conftest.py # Mock gRPC stubs & shared fixtures
├── mock_responses.py # Fake protobuf response builders
├── test_search_bench.py # Search timing benchmarks
└── scripts/
├── profile_cpu.sh # CPU profiling wrapper
└── profile_memory.sh # Memory profiling wrapper
```

### Installation

```bash
pip install -r requirements.txt
```

---

## 1. Timing Benchmarks (pytest-benchmark)
### Usage

```bash
# Run all benchmarks
pytest tests/benchmark/ --benchmark-only

# Run specific benchmark
pytest tests/benchmark/test_search_bench.py::TestSearchBench::test_search_float32_varying_output_fields --benchmark-only

# Save baseline for comparison
pytest tests/benchmark/ --benchmark-only --benchmark-save=baseline

# Compare against baseline
pytest tests/benchmark/ --benchmark-only --benchmark-compare=baseline

# Generate histogram
pytest tests/benchmark/ --benchmark-only --benchmark-histogram
```

## 2. CPU Profiling (py-spy)
### Usage

#### Option A: Profile entire benchmark run

```bash
# Generate flamegraph (SVG)
py-spy record -o cpu_profile.svg --native -- pytest tests/benchmark/test_search_bench.py::TestSearchBench::test_search_float32 -v

# Generate speedscope format (interactive viewer)
py-spy record -o cpu_profile.speedscope.json -f speedscope -- pytest tests/benchmark/test_search_bench.py::TestSearchBench::test_search_float32 -v

# View speedscope: Upload to https://www.speedscope.app/
```

#### Option B: Use helper script

```bash
./tests/benchmark/scripts/profile_cpu.sh test_search_bench.py::test_search_float32
```

#### Option C: Profile specific function

```bash
# Top functions by CPU time
py-spy top -- python -m pytest tests/benchmark/test_search_bench.py::test_search_float32 -v
```

## 3. Memory Profiling (memray)

### What it Measures
- Memory allocation over time
- Peak memory usage
- Allocation flamegraphs
- Memory leaks
- Allocation call stacks

### Usage

#### Option A: Profile and generate reports

```bash
# Run with memray
memray run -o search_bench.bin pytest tests/benchmark/test_search_bench.py::test_search_float32 -v

# Generate flamegraph (HTML)
memray flamegraph search_bench.bin

# Generate table view (top allocators)
memray table search_bench.bin

# Generate tree view (call stack)
memray tree search_bench.bin

# Generate summary stats
memray summary search_bench.bin
```

#### Option B: Live monitoring

```bash
# Real-time memory usage in terminal
memray run --live pytest tests/benchmark/test_search_bench.py::test_search_float32 -v
```

#### Option C: Use helper script

```bash
./tests/benchmark/scripts/profile_memory.sh test_search_bench.py::test_search_float32
```

## 6. Complete Workflow

```bash
# Step 1: Install dependencies
pip install -e ".[dev]"

# Step 2: Run timing benchmarks (fast, ~minutes)
pytest tests/benchmark/ --benchmark-only

# Step 3: Identify slow tests from benchmark results

# Step 4: CPU profile specific slow tests
py-spy record -o cpu_slow_test.svg -- pytest tests/benchmark/test_search_bench.py::test_slow_one -v

# Step 5: Memory profile tests with large results
memray run -o mem_large.bin pytest tests/benchmark/test_search_bench.py::test_large_results -v
memray flamegraph mem_large.bin

# Step 6: Analyze results and fix bottlenecks

# Step 7: Re-run benchmarks and compare with baseline
pytest tests/benchmark/ --benchmark-only --benchmark-compare=baseline
```

## Expected Bottlenecks

Based on code analysis, we expect to find:

1. **Protobuf deserialization** - Large responses with many fields
2. **Vector data conversion** - Bytes → numpy arrays
3. **Type conversions** - Protobuf types → Python types
4. **Field iteration** - Processing many output fields
5. **Memory copies** - Unnecessary data duplication

These benchmarks will help us validate and quantify these hypotheses.
Empty file added tests/benchmark/__init__.py
Empty file.
85 changes: 85 additions & 0 deletions tests/benchmark/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from unittest.mock import MagicMock, patch

import pytest
from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusClient, StructFieldSchema
from pymilvus.grpc_gen import common_pb2, milvus_pb2, schema_pb2

from . import mock_responses


def setup_search_mock(client, mock_fn):
client._get_connection()._stub.Search = MagicMock(side_effect=mock_fn)


def setup_query_mock(client, mock_fn):
client._get_connection()._stub.Query = MagicMock(side_effect=mock_fn)


def setup_hybrid_search_mock(client, mock_fn):
client._get_connection()._stub.HybridSearch = MagicMock(side_effect=mock_fn)


def get_default_test_schema() -> CollectionSchema:
schema = MilvusClient.create_schema()
schema.add_field(field_name='id', datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name='embedding', datatype=DataType.FLOAT_VECTOR, dim=128)
schema.add_field(field_name='name', datatype=DataType.VARCHAR, max_length=100)
schema.add_field(field_name='bool_field', datatype=DataType.BOOL)
schema.add_field(field_name='int8_field', datatype=DataType.INT8)
schema.add_field(field_name='int16_field', datatype=DataType.INT16)
schema.add_field(field_name='int32_field', datatype=DataType.INT32)
schema.add_field(field_name='age', datatype=DataType.INT32)
schema.add_field(field_name='float_field', datatype=DataType.FLOAT)
schema.add_field(field_name='score', datatype=DataType.FLOAT)
schema.add_field(field_name='double_field', datatype=DataType.DOUBLE)
schema.add_field(field_name='varchar_field', datatype=DataType.VARCHAR, max_length=100)
schema.add_field(field_name='json_field', datatype=DataType.JSON)
schema.add_field(field_name='array_field', datatype=DataType.ARRAY, element_type=DataType.INT64, max_capacity=10)
schema.add_field(field_name='geometry_field', datatype=DataType.GEOMETRY)
schema.add_field(field_name='timestamptz_field', datatype=DataType.TIMESTAMPTZ)
schema.add_field(field_name='binary_vector', datatype=DataType.BINARY_VECTOR, dim=128)
schema.add_field(field_name='float16_vector', datatype=DataType.FLOAT16_VECTOR, dim=128)
schema.add_field(field_name='bfloat16_vector', datatype=DataType.BFLOAT16_VECTOR, dim=128)
schema.add_field(field_name='sparse_vector', datatype=DataType.SPARSE_FLOAT_VECTOR)
schema.add_field(field_name='int8_vector', datatype=DataType.INT8_VECTOR, dim=128)

struct_schema = StructFieldSchema()
struct_schema.add_field('struct_int', DataType.INT32)
struct_schema.add_field('struct_str', DataType.VARCHAR, max_length=100)
schema.add_field(field_name='struct_array_field', datatype=DataType.ARRAY, element_type=DataType.STRUCT, struct_schema=struct_schema, max_capacity=10)
return schema


@pytest.fixture
def mocked_milvus_client():
with patch('grpc.insecure_channel') as mock_channel_func, \
patch('grpc.secure_channel') as mock_secure_channel_func, \
patch('grpc.channel_ready_future') as mock_ready_future, \
patch('pymilvus.grpc_gen.milvus_pb2_grpc.MilvusServiceStub') as mock_stub_class:

mock_channel = MagicMock()
mock_channel_func.return_value = mock_channel
mock_secure_channel_func.return_value = mock_channel

mock_future = MagicMock()
mock_future.result = MagicMock(return_value=None)
mock_ready_future.return_value = mock_future

mock_stub = MagicMock()


mock_connect_response = milvus_pb2.ConnectResponse()
mock_connect_response.status.error_code = common_pb2.ErrorCode.Success
mock_connect_response.status.code = 0
mock_connect_response.identifier = 12345
mock_stub.Connect = MagicMock(return_value=mock_connect_response)

mock_stub.Search = MagicMock()
mock_stub.Query = MagicMock()
mock_stub.HybridSearch = MagicMock()

mock_stub_class.return_value = mock_stub

client = MilvusClient()

yield client
Loading