milvus-io · XuanYang-cn · Oct 21, 2025 · Dec 4, 2025 · Dec 4, 2025 · Dec 5, 2025
diff --git a/.gitignore b/.gitignore
@@ -43,3 +43,12 @@ uv.lock
 # AI rules
 WARP.md
 CLAUDE.md
+
+# perf
+*.svg
+**/.benchmarks/**
+*.html
+
+#cython
+*.so
+*.c
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,6 +5,7 @@ requires = [
   "wheel",
   "gitpython",
   "setuptools_scm[toml]>=6.2",
+  "Cython>=3.0.0",
 ]
 build-backend = "setuptools.build_meta"
 
@@ -73,6 +74,8 @@ dev = [
     "pytest-cov>=5.0.0",
     "pytest-timeout>=1.3.4",
     "pytest-asyncio",
+    "pytest-benchmark[histogram]",
+    "Cython>=3.0.0",
     "ruff>=0.12.9,<1",
     "black",
     # develop bulk_writer
@@ -215,3 +218,18 @@ builtins-ignorelist = [
     "filter",
 ]
 builtins-allowed-modules = ["types"]
+
+[tool.cibuildwheel]
+build = ["cp38-*", "cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*"]
+skip = ["*-musllinux_*", "pp*"]
+test-requires = "pytest"
+test-command = "pytest {package}/tests -k 'not (test_hybrid_search or test_milvus_client)' -x --tb=short || true"
+
+[tool.cibuildwheel.linux]
+before-all = "yum install -y gcc || apt-get update && apt-get install -y gcc"
+
+[tool.cibuildwheel.macos]
+before-all = "brew install gcc || true"
+
+[tool.cibuildwheel.windows]
+before-build = "pip install Cython>=3.0.0"
diff --git a/tests/benchmark/README.md b/tests/benchmark/README.md
@@ -0,0 +1,159 @@
+# pymilvus MilvusClient Benchmarking Suite
+
+This benchmark suite measures client-side performance of pymilvus MilvusClient API operations (search, query, hybrid search) without requiring a running Milvus server.
+
+## Overview
+
+We benchmark **client-side code only** by mocking gRPC calls:
+- ✅ Request preparation (parameter validation, serialization)
+- ✅ Response parsing (deserialization, type conversion)
+- ❌ Network I/O (excluded via mocking)
+- ❌ Server-side processing (excluded via mocking)
+
+## Directory Structure
+
+```
+tests/benchmark/
+├── README.md                # This file - complete guide
+├── conftest.py              # Mock gRPC stubs & shared fixtures
+├── mock_responses.py        # Fake protobuf response builders
+├── test_search_bench.py     # Search timing benchmarks
+└── scripts/
+    ├── profile_cpu.sh       # CPU profiling wrapper
+    └── profile_memory.sh    # Memory profiling wrapper
+```
+
+### Installation
+
+```bash
+pip install -r requirements.txt
+```
+
+---
+
+## 1. Timing Benchmarks (pytest-benchmark)
+### Usage
+
+```bash
+# Run all benchmarks
+pytest tests/benchmark/ --benchmark-only
+
+# Run specific benchmark
+pytest tests/benchmark/test_search_bench.py::TestSearchBench::test_search_float32_varying_output_fields --benchmark-only
+
+# Save baseline for comparison
+pytest tests/benchmark/ --benchmark-only --benchmark-save=baseline
+
+# Compare against baseline
+pytest tests/benchmark/ --benchmark-only --benchmark-compare=baseline
+
+# Generate histogram
+pytest tests/benchmark/ --benchmark-only --benchmark-histogram
+```
+
+## 2. CPU Profiling (py-spy)
+### Usage
+
+#### Option A: Profile entire benchmark run
+
+```bash
+# Generate flamegraph (SVG)
+py-spy record -o cpu_profile.svg --native -- pytest tests/benchmark/test_search_bench.py::TestSearchBench::test_search_float32 -v
+
+# Generate speedscope format (interactive viewer)
+py-spy record -o cpu_profile.speedscope.json -f speedscope -- pytest tests/benchmark/test_search_bench.py::TestSearchBench::test_search_float32 -v
+
+# View speedscope: Upload to https://www.speedscope.app/
+```
+
+#### Option B: Use helper script
+
+```bash
+./tests/benchmark/scripts/profile_cpu.sh test_search_bench.py::test_search_float32
+```
+
+#### Option C: Profile specific function
+
+```bash
+# Top functions by CPU time
+py-spy top -- python -m pytest tests/benchmark/test_search_bench.py::test_search_float32 -v
+```
+
+## 3. Memory Profiling (memray)
+
+### What it Measures
+- Memory allocation over time
+- Peak memory usage
+- Allocation flamegraphs
+- Memory leaks
+- Allocation call stacks
+
+### Usage
+
+#### Option A: Profile and generate reports
+
+```bash
+# Run with memray
+memray run -o search_bench.bin pytest tests/benchmark/test_search_bench.py::test_search_float32 -v
+
+# Generate flamegraph (HTML)
+memray flamegraph search_bench.bin
+
+# Generate table view (top allocators)
+memray table search_bench.bin
+
+# Generate tree view (call stack)
+memray tree search_bench.bin
+
+# Generate summary stats
+memray summary search_bench.bin
+```
+
+#### Option B: Live monitoring
+
+```bash
+# Real-time memory usage in terminal
+memray run --live pytest tests/benchmark/test_search_bench.py::test_search_float32 -v
+```
+
+#### Option C: Use helper script
+
+```bash
+./tests/benchmark/scripts/profile_memory.sh test_search_bench.py::test_search_float32
+```
+
+## 6. Complete Workflow
+
+```bash
+# Step 1: Install dependencies
+pip install -e ".[dev]"
+
+# Step 2: Run timing benchmarks (fast, ~minutes)
+pytest tests/benchmark/ --benchmark-only
+
+# Step 3: Identify slow tests from benchmark results
+
+# Step 4: CPU profile specific slow tests
+py-spy record -o cpu_slow_test.svg -- pytest tests/benchmark/test_search_bench.py::test_slow_one -v
+
+# Step 5: Memory profile tests with large results
+memray run -o mem_large.bin pytest tests/benchmark/test_search_bench.py::test_large_results -v
+memray flamegraph mem_large.bin
+
+# Step 6: Analyze results and fix bottlenecks
+
+# Step 7: Re-run benchmarks and compare with baseline
+pytest tests/benchmark/ --benchmark-only --benchmark-compare=baseline
+```
+
+## Expected Bottlenecks
+
+Based on code analysis, we expect to find:
+
+1. **Protobuf deserialization** - Large responses with many fields
+2. **Vector data conversion** - Bytes → numpy arrays
+3. **Type conversions** - Protobuf types → Python types
+4. **Field iteration** - Processing many output fields
+5. **Memory copies** - Unnecessary data duplication
+
+These benchmarks will help us validate and quantify these hypotheses.
diff --git a/tests/benchmark/__init__.py b/tests/benchmark/__init__.py
diff --git a/tests/benchmark/conftest.py b/tests/benchmark/conftest.py
@@ -0,0 +1,85 @@
+from unittest.mock import MagicMock, patch
+
+import pytest
+from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusClient, StructFieldSchema
+from pymilvus.grpc_gen import common_pb2, milvus_pb2, schema_pb2
+
+from . import mock_responses
+
+
+def setup_search_mock(client, mock_fn):
+    client._get_connection()._stub.Search = MagicMock(side_effect=mock_fn)
+
+
+def setup_query_mock(client, mock_fn):
+    client._get_connection()._stub.Query = MagicMock(side_effect=mock_fn)
+
+
+def setup_hybrid_search_mock(client, mock_fn):
+    client._get_connection()._stub.HybridSearch = MagicMock(side_effect=mock_fn)
+
+
+def get_default_test_schema() -> CollectionSchema:
+    schema = MilvusClient.create_schema()
+    schema.add_field(field_name='id', datatype=DataType.INT64, is_primary=True)
+    schema.add_field(field_name='embedding', datatype=DataType.FLOAT_VECTOR, dim=128)
+    schema.add_field(field_name='name', datatype=DataType.VARCHAR, max_length=100)
+    schema.add_field(field_name='bool_field', datatype=DataType.BOOL)
+    schema.add_field(field_name='int8_field', datatype=DataType.INT8)
+    schema.add_field(field_name='int16_field', datatype=DataType.INT16)
+    schema.add_field(field_name='int32_field', datatype=DataType.INT32)
+    schema.add_field(field_name='age', datatype=DataType.INT32)
+    schema.add_field(field_name='float_field', datatype=DataType.FLOAT)
+    schema.add_field(field_name='score', datatype=DataType.FLOAT)
+    schema.add_field(field_name='double_field', datatype=DataType.DOUBLE)
+    schema.add_field(field_name='varchar_field', datatype=DataType.VARCHAR, max_length=100)
+    schema.add_field(field_name='json_field', datatype=DataType.JSON)
+    schema.add_field(field_name='array_field', datatype=DataType.ARRAY, element_type=DataType.INT64, max_capacity=10)
+    schema.add_field(field_name='geometry_field', datatype=DataType.GEOMETRY)
+    schema.add_field(field_name='timestamptz_field', datatype=DataType.TIMESTAMPTZ)
+    schema.add_field(field_name='binary_vector', datatype=DataType.BINARY_VECTOR, dim=128)
+    schema.add_field(field_name='float16_vector', datatype=DataType.FLOAT16_VECTOR, dim=128)
+    schema.add_field(field_name='bfloat16_vector', datatype=DataType.BFLOAT16_VECTOR, dim=128)
+    schema.add_field(field_name='sparse_vector', datatype=DataType.SPARSE_FLOAT_VECTOR)
+    schema.add_field(field_name='int8_vector', datatype=DataType.INT8_VECTOR, dim=128)
+
+    struct_schema = StructFieldSchema()
+    struct_schema.add_field('struct_int', DataType.INT32)
+    struct_schema.add_field('struct_str', DataType.VARCHAR, max_length=100)
+    schema.add_field(field_name='struct_array_field', datatype=DataType.ARRAY, element_type=DataType.STRUCT, struct_schema=struct_schema, max_capacity=10)
+    return schema
+
+
+@pytest.fixture
+def mocked_milvus_client():
+    with patch('grpc.insecure_channel') as mock_channel_func, \
+         patch('grpc.secure_channel') as mock_secure_channel_func, \
+         patch('grpc.channel_ready_future') as mock_ready_future, \
+         patch('pymilvus.grpc_gen.milvus_pb2_grpc.MilvusServiceStub') as mock_stub_class:
+
+        mock_channel = MagicMock()
+        mock_channel_func.return_value = mock_channel
+        mock_secure_channel_func.return_value = mock_channel
+
+        mock_future = MagicMock()
+        mock_future.result = MagicMock(return_value=None)
+        mock_ready_future.return_value = mock_future
+
+        mock_stub = MagicMock()
+
+
+        mock_connect_response = milvus_pb2.ConnectResponse()
+        mock_connect_response.status.error_code = common_pb2.ErrorCode.Success
+        mock_connect_response.status.code = 0
+        mock_connect_response.identifier = 12345
+        mock_stub.Connect = MagicMock(return_value=mock_connect_response)
+
+        mock_stub.Search = MagicMock()
+        mock_stub.Query = MagicMock()
+        mock_stub.HybridSearch = MagicMock()
+
+        mock_stub_class.return_value = mock_stub
+
+        client = MilvusClient()
+
+        yield client