Skip to content

Commit 920df7b

Browse files
authored
Support insert serialized json (#3024)
Signed-off-by: sunby <sunbingyi1992@gmail.com>
1 parent 977b47c commit 920df7b

File tree

2 files changed

+100
-2
lines changed

2 files changed

+100
-2
lines changed

pymilvus/client/entity_helper.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,27 @@ def preprocess_numpy_types(obj: Any):
197197
return bool(obj)
198198
return obj
199199

200+
# Handle JSON string input
201+
if isinstance(obj, str):
202+
try:
203+
# Validate JSON string by parsing it
204+
parsed_obj = orjson.loads(obj)
205+
# If it's a valid JSON string, validate dict keys if it's a dict
206+
if isinstance(parsed_obj, dict):
207+
for k in parsed_obj:
208+
if not isinstance(k, str):
209+
raise DataNotMatchException(message=ExceptionsMessage.JSONKeyMustBeStr)
210+
# Return the original string encoded as bytes (since it's already valid JSON)
211+
return obj.encode(Config.EncodeProtocol)
212+
except Exception as e:
213+
# Truncate the string if it's too long for better readability
214+
max_len = 200
215+
json_str_display = obj if len(obj) <= max_len else obj[:max_len] + "..."
216+
raise DataNotMatchException(
217+
message=f"Invalid JSON string: {e!s}. Input string: {json_str_display!r}"
218+
) from e
219+
220+
# Handle dict input
200221
if isinstance(obj, dict):
201222
for k in obj:
202223
if not isinstance(k, str):

tests/test_client_entity_helper.py

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@
77
import pytest
88
from pymilvus.client import entity_helper
99
from pymilvus.client.entity_helper import (
10-
convert_to_str_array,
11-
entity_to_array_arr,
10+
convert_to_str_array, entity_to_array_arr,
1211
entity_to_field_data,
1312
entity_to_str_arr,
1413
entity_type_to_dtype,
@@ -28,6 +27,7 @@
2827
from pymilvus.grpc_gen import schema_pb2 as schema_types
2928
from pymilvus.settings import Config
3029
from scipy.sparse import csr_matrix
30+
from pymilvus.exceptions import DataNotMatchException
3131

3232

3333
class TestEntityHelperSparse:
@@ -168,6 +168,83 @@ def test_convert_to_json_dict(self, data: dict):
168168
assert isinstance(result, bytes)
169169
assert json.loads(result.decode()) == data
170170

171+
@pytest.mark.parametrize("json_string,expected", [
172+
('{"key": "value", "number": 42}', {"key": "value", "number": 42}),
173+
('{"nested": {"inner": "value"}}', {"nested": {"inner": "value"}}),
174+
('[1, 2, 3, "four"]', [1, 2, 3, "four"]),
175+
('{"name": "Alice", "age": 30}', {"name": "Alice", "age": 30}),
176+
('null', None),
177+
('true', True),
178+
('false', False),
179+
('123', 123),
180+
('"simple string"', "simple string"),
181+
])
182+
def test_convert_to_json_string_valid(self, json_string: str, expected):
183+
"""Test JSON conversion for valid JSON string input"""
184+
result = entity_helper.convert_to_json(json_string)
185+
assert isinstance(result, bytes)
186+
# Verify the result is valid JSON
187+
parsed = json.loads(result.decode())
188+
assert parsed == expected
189+
190+
def test_convert_to_json_from_json_dumps(self):
191+
"""Test JSON conversion from json.dumps() output"""
192+
original_dict = {"key": "value", "count": 100, "nested": {"inner": "data"}}
193+
json_string = json.dumps(original_dict)
194+
195+
result = entity_helper.convert_to_json(json_string)
196+
assert isinstance(result, bytes)
197+
parsed = json.loads(result.decode())
198+
assert parsed == original_dict
199+
200+
@pytest.mark.parametrize("invalid_json_string", [
201+
"not a json string",
202+
'{"invalid": }',
203+
'{"key": "value"', # missing closing brace
204+
"{'key': 'value'}", # single quotes not valid in JSON
205+
"{key: value}", # unquoted keys
206+
"undefined",
207+
"{,}",
208+
])
209+
def test_convert_to_json_string_invalid(self, invalid_json_string: str):
210+
"""Test JSON conversion rejects invalid JSON strings"""
211+
212+
with pytest.raises(DataNotMatchException) as exc_info:
213+
entity_helper.convert_to_json(invalid_json_string)
214+
215+
# Verify error message contains the invalid JSON string
216+
error_message = str(exc_info.value)
217+
assert "Invalid JSON string" in error_message
218+
# Verify the original input string is in the error message
219+
assert invalid_json_string in error_message or invalid_json_string[:50] in error_message
220+
221+
def test_convert_to_json_string_with_non_string_keys(self):
222+
"""Test JSON conversion rejects JSON strings with non-string keys in dict"""
223+
224+
# This is actually not possible in standard JSON, as JSON object keys are always strings
225+
# But we can test that dict validation still works
226+
invalid_dict = {1: "value", 2: "another"}
227+
228+
with pytest.raises(DataNotMatchException) as exc_info:
229+
entity_helper.convert_to_json(invalid_dict)
230+
231+
error_message = str(exc_info.value)
232+
assert "JSON" in error_message
233+
234+
def test_convert_to_json_long_invalid_string_truncated(self):
235+
"""Test that long invalid JSON strings are truncated in error messages"""
236+
237+
# Create a long invalid JSON string
238+
long_invalid_json = "invalid json " * 50 # > 200 characters
239+
240+
with pytest.raises(DataNotMatchException) as exc_info:
241+
entity_helper.convert_to_json(long_invalid_json)
242+
243+
error_message = str(exc_info.value)
244+
assert "Invalid JSON string" in error_message
245+
# Should contain truncated version with "..."
246+
assert "..." in error_message
247+
171248
def test_pack_field_value_to_field_data(self):
172249
"""Test packing field values into field data protobuf"""
173250
# Test with scalar field

0 commit comments

Comments
 (0)