Skip to content

Commit d2fc1d9

Browse files
authored
fix(core): allow parsing more partial JSON (#9511)
2 parents 3424293 + 9d7d500 commit d2fc1d9

File tree

7 files changed

+772
-55
lines changed

7 files changed

+772
-55
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@langchain/core": patch
3+
---
4+
5+
allow parsing more partial JSON

libs/langchain-core/src/messages/ai.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,10 @@ export class AIMessageChunk<
318318
for (const chunks of groupedToolCallChunks) {
319319
let parsedArgs: Record<string, unknown> | null = null;
320320
const name = chunks[0]?.name ?? "";
321-
const joinedArgs = chunks.map((c) => c.args || "").join("");
321+
const joinedArgs = chunks
322+
.map((c) => c.args || "")
323+
.join("")
324+
.trim();
322325
const argsStr = joinedArgs.length ? joinedArgs : "{}";
323326
const id = chunks[0]?.id;
324327
try {

libs/langchain-core/src/output_parsers/json.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,11 @@ export class JsonOutputParser<
4444
async parsePartialResult(
4545
generations: ChatGeneration[] | Generation[]
4646
): Promise<T | undefined> {
47-
return parseJsonMarkdown(generations[0].text);
47+
return parseJsonMarkdown(generations[0].text) as T | undefined;
4848
}
4949

5050
async parse(text: string): Promise<T> {
51-
return parseJsonMarkdown(text, JSON.parse);
51+
return parseJsonMarkdown(text, JSON.parse) as T;
5252
}
5353

5454
getFormatInstructions(): string {

libs/langchain-core/src/output_parsers/tests/json.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,12 +267,12 @@ const MARKDOWN_STREAM_TEST_CASES = [
267267
{
268268
name: "Markdown with split code block",
269269
input: ['```json\n{"', 'countries": [{"n', 'ame": "China"}]}', "\n```"],
270-
expected: [{ countries: [{ name: "China" }] }],
270+
expected: [{}, { countries: [{}] }, { countries: [{ name: "China" }] }],
271271
},
272272
{
273273
name: "Markdown without json identifier, split",
274274
input: ['```\n{"', 'key": "val', '"}\n```'],
275-
expected: [{ key: "val" }],
275+
expected: [{}, { key: "val" }],
276276
},
277277
{
278278
name: "Ignores text after closing markdown backticks",

libs/langchain-core/src/utils/json.ts

Lines changed: 277 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -26,75 +26,302 @@ export function parseJsonMarkdown(s: string, parser = parsePartialJson) {
2626
return parser(finalContent.trim());
2727
}
2828

29-
// Adapted from https://github.com/KillianLucas/open-interpreter/blob/main/interpreter/core/llm/utils/parse_partial_json.py
30-
// MIT License
31-
export function parsePartialJson(s: string) {
32-
// If the input is undefined, return null to indicate failure.
33-
if (typeof s === "undefined") {
34-
return null;
35-
}
36-
37-
// Attempt to parse the string as-is.
29+
/**
30+
* Recursive descent partial JSON parser.
31+
* @param s - The string to parse.
32+
* @returns The parsed value.
33+
* @throws Error if the input is a malformed JSON string.
34+
*/
35+
export function strictParsePartialJson(s: string): unknown {
3836
try {
3937
return JSON.parse(s);
4038
} catch {
41-
// Pass
39+
// Continue to partial parsing
40+
}
41+
42+
const buffer = s.trim();
43+
if (buffer.length === 0) throw new Error("Unexpected end of JSON input");
44+
45+
let pos = 0;
46+
47+
function skipWhitespace(): void {
48+
while (pos < buffer.length && /\s/.test(buffer[pos])) {
49+
pos += 1;
50+
}
4251
}
4352

44-
// Initialize variables.
45-
let new_s = "";
46-
const stack = [];
47-
let isInsideString = false;
48-
let escaped = false;
49-
50-
// Process each character in the string one at a time.
51-
for (let char of s) {
52-
if (isInsideString) {
53-
if (char === '"' && !escaped) {
54-
isInsideString = false;
55-
} else if (char === "\n" && !escaped) {
56-
char = "\\n"; // Replace the newline character with the escape sequence.
53+
function parseString(): string {
54+
if (buffer[pos] !== '"') {
55+
throw new Error(`Expected '"' at position ${pos}, got '${buffer[pos]}'`);
56+
}
57+
58+
pos += 1;
59+
let result = "";
60+
let escaped = false;
61+
62+
while (pos < buffer.length) {
63+
const char = buffer[pos];
64+
65+
if (escaped) {
66+
if (char === "n") {
67+
result += "\n";
68+
} else if (char === "t") {
69+
result += "\t";
70+
} else if (char === "r") {
71+
result += "\r";
72+
} else if (char === "\\") {
73+
result += "\\";
74+
} else if (char === '"') {
75+
result += '"';
76+
} else if (char === "b") {
77+
result += "\b";
78+
} else if (char === "f") {
79+
result += "\f";
80+
} else if (char === "/") {
81+
result += "/";
82+
} else if (char === "u") {
83+
const hex = buffer.substring(pos + 1, pos + 5);
84+
if (/^[0-9A-Fa-f]{0,4}$/.test(hex)) {
85+
if (hex.length === 4) {
86+
result += String.fromCharCode(Number.parseInt(hex, 16));
87+
} else {
88+
result += `u${hex}`;
89+
}
90+
91+
pos += hex.length;
92+
} else {
93+
throw new Error(
94+
`Invalid unicode escape sequence '\\u${hex}' at position ${pos}`
95+
);
96+
}
97+
} else {
98+
throw new Error(
99+
`Invalid escape sequence '\\${char}' at position ${pos}`
100+
);
101+
}
102+
escaped = false;
57103
} else if (char === "\\") {
58-
escaped = !escaped;
104+
escaped = true;
105+
} else if (char === '"') {
106+
pos += 1;
107+
return result;
59108
} else {
60-
escaped = false;
109+
result += char;
61110
}
62-
} else {
63-
if (char === '"') {
64-
isInsideString = true;
65-
escaped = false;
66-
} else if (char === "{") {
67-
stack.push("}");
68-
} else if (char === "[") {
69-
stack.push("]");
70-
} else if (char === "}" || char === "]") {
71-
if (stack && stack[stack.length - 1] === char) {
72-
stack.pop();
73-
} else {
74-
// Mismatched closing character; the input is malformed.
75-
return null;
76-
}
111+
112+
pos += 1;
113+
}
114+
115+
if (escaped) result += "\\";
116+
return result;
117+
}
118+
119+
function parseNumber(): number {
120+
const start = pos;
121+
let numStr = "";
122+
123+
if (buffer[pos] === "-") {
124+
numStr += "-";
125+
pos += 1;
126+
}
127+
128+
if (pos < buffer.length && buffer[pos] === "0") {
129+
numStr += "0";
130+
pos += 1;
131+
132+
if (buffer[pos] >= "0" && buffer[pos] <= "9") {
133+
throw new Error(`Invalid number at position ${start}`);
134+
}
135+
}
136+
137+
if (pos < buffer.length && buffer[pos] >= "1" && buffer[pos] <= "9") {
138+
while (pos < buffer.length && buffer[pos] >= "0" && buffer[pos] <= "9") {
139+
numStr += buffer[pos];
140+
pos += 1;
141+
}
142+
}
143+
144+
if (pos < buffer.length && buffer[pos] === ".") {
145+
numStr += ".";
146+
pos += 1;
147+
while (pos < buffer.length && buffer[pos] >= "0" && buffer[pos] <= "9") {
148+
numStr += buffer[pos];
149+
pos += 1;
77150
}
78151
}
79152

80-
// Append the processed character to the new string.
81-
new_s += char;
153+
if (pos < buffer.length && (buffer[pos] === "e" || buffer[pos] === "E")) {
154+
numStr += buffer[pos];
155+
pos += 1;
156+
if (pos < buffer.length && (buffer[pos] === "+" || buffer[pos] === "-")) {
157+
numStr += buffer[pos];
158+
pos += 1;
159+
}
160+
while (pos < buffer.length && buffer[pos] >= "0" && buffer[pos] <= "9") {
161+
numStr += buffer[pos];
162+
pos += 1;
163+
}
164+
}
165+
166+
if (numStr === "-") return -0;
167+
168+
const num = Number.parseFloat(numStr);
169+
170+
if (Number.isNaN(num)) {
171+
pos = start;
172+
throw new Error(`Invalid number '${numStr}' at position ${start}`);
173+
}
174+
175+
return num;
82176
}
83177

84-
// If we're still inside a string at the end of processing,
85-
// we need to close the string.
86-
if (isInsideString) {
87-
new_s += '"';
178+
function parseValue(): unknown {
179+
skipWhitespace();
180+
181+
if (pos >= buffer.length) {
182+
throw new Error(`Unexpected end of input at position ${pos}`);
183+
}
184+
185+
const char = buffer[pos];
186+
187+
if (char === "{") return parseObject();
188+
if (char === "[") return parseArray();
189+
if (char === '"') return parseString();
190+
191+
if ("null".startsWith(buffer.substring(pos, pos + 4))) {
192+
pos += Math.min(4, buffer.length - pos);
193+
return null;
194+
}
195+
196+
if ("true".startsWith(buffer.substring(pos, pos + 4))) {
197+
pos += Math.min(4, buffer.length - pos);
198+
return true;
199+
}
200+
201+
if ("false".startsWith(buffer.substring(pos, pos + 5))) {
202+
pos += Math.min(5, buffer.length - pos);
203+
return false;
204+
}
205+
206+
if (char === "-" || (char >= "0" && char <= "9")) {
207+
return parseNumber();
208+
}
209+
210+
throw new Error(`Unexpected character '${char}' at position ${pos}`);
211+
}
212+
213+
function parseArray(): unknown[] {
214+
if (buffer[pos] !== "[") {
215+
throw new Error(`Expected '[' at position ${pos}, got '${buffer[pos]}'`);
216+
}
217+
218+
const arr: unknown[] = [];
219+
220+
pos += 1;
221+
skipWhitespace();
222+
223+
if (pos >= buffer.length) return arr;
224+
if (buffer[pos] === "]") {
225+
pos += 1;
226+
return arr;
227+
}
228+
229+
while (pos < buffer.length) {
230+
skipWhitespace();
231+
if (pos >= buffer.length) return arr;
232+
233+
arr.push(parseValue());
234+
235+
skipWhitespace();
236+
if (pos >= buffer.length) return arr;
237+
238+
if (buffer[pos] === "]") {
239+
pos += 1;
240+
return arr;
241+
} else if (buffer[pos] === ",") {
242+
pos += 1;
243+
continue;
244+
}
245+
246+
throw new Error(
247+
`Expected ',' or ']' at position ${pos}, got '${buffer[pos]}'`
248+
);
249+
}
250+
251+
return arr;
88252
}
89253

90-
// Close any remaining open structures in the reverse order that they were opened.
91-
for (let i = stack.length - 1; i >= 0; i -= 1) {
92-
new_s += stack[i];
254+
function parseObject(): Record<string, unknown> {
255+
if (buffer[pos] !== "{") {
256+
throw new Error(`Expected '{' at position ${pos}, got '${buffer[pos]}'`);
257+
}
258+
259+
const obj: Record<string, unknown> = {};
260+
pos += 1;
261+
skipWhitespace();
262+
263+
if (pos >= buffer.length) return obj;
264+
if (buffer[pos] === "}") {
265+
pos += 1;
266+
return obj;
267+
}
268+
269+
while (pos < buffer.length) {
270+
skipWhitespace();
271+
if (pos >= buffer.length) return obj;
272+
273+
const key = parseString();
274+
275+
skipWhitespace();
276+
if (pos >= buffer.length) return obj;
277+
278+
if (buffer[pos] !== ":") {
279+
throw new Error(
280+
`Expected ':' at position ${pos}, got '${buffer[pos]}'`
281+
);
282+
}
283+
pos += 1;
284+
285+
skipWhitespace();
286+
if (pos >= buffer.length) return obj;
287+
288+
obj[key] = parseValue();
289+
290+
skipWhitespace();
291+
if (pos >= buffer.length) return obj;
292+
293+
if (buffer[pos] === "}") {
294+
pos += 1;
295+
return obj;
296+
} else if (buffer[pos] === ",") {
297+
pos += 1;
298+
continue;
299+
}
300+
301+
throw new Error(
302+
`Expected ',' or '}' at position ${pos}, got '${buffer[pos]}'`
303+
);
304+
}
305+
306+
return obj;
93307
}
94308

309+
const value = parseValue();
310+
skipWhitespace();
311+
312+
if (pos < buffer.length) {
313+
throw new Error(`Unexpected character '${buffer[pos]}' at position ${pos}`);
314+
}
315+
316+
return value;
317+
}
318+
319+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
320+
export function parsePartialJson(s: string): any | null {
95321
// Attempt to parse the modified string as JSON.
96322
try {
97-
return JSON.parse(new_s);
323+
if (typeof s === "undefined") return null;
324+
return strictParsePartialJson(s);
98325
} catch {
99326
// If we still can't parse the string as JSON, return null to indicate failure.
100327
return null;

0 commit comments

Comments
 (0)