@@ -26,75 +26,302 @@ export function parseJsonMarkdown(s: string, parser = parsePartialJson) {
2626 return parser ( finalContent . trim ( ) ) ;
2727}
2828
29- // Adapted from https://github.com/KillianLucas/open-interpreter/blob/main/interpreter/core/llm/utils/parse_partial_json.py
30- // MIT License
31- export function parsePartialJson ( s : string ) {
32- // If the input is undefined, return null to indicate failure.
33- if ( typeof s === "undefined" ) {
34- return null ;
35- }
36-
37- // Attempt to parse the string as-is.
29+ /**
30+ * Recursive descent partial JSON parser.
31+ * @param s - The string to parse.
32+ * @returns The parsed value.
33+ * @throws Error if the input is a malformed JSON string.
34+ */
35+ export function strictParsePartialJson ( s : string ) : unknown {
3836 try {
3937 return JSON . parse ( s ) ;
4038 } catch {
41- // Pass
39+ // Continue to partial parsing
40+ }
41+
42+ const buffer = s . trim ( ) ;
43+ if ( buffer . length === 0 ) throw new Error ( "Unexpected end of JSON input" ) ;
44+
45+ let pos = 0 ;
46+
47+ function skipWhitespace ( ) : void {
48+ while ( pos < buffer . length && / \s / . test ( buffer [ pos ] ) ) {
49+ pos += 1 ;
50+ }
4251 }
4352
44- // Initialize variables.
45- let new_s = "" ;
46- const stack = [ ] ;
47- let isInsideString = false ;
48- let escaped = false ;
49-
50- // Process each character in the string one at a time.
51- for ( let char of s ) {
52- if ( isInsideString ) {
53- if ( char === '"' && ! escaped ) {
54- isInsideString = false ;
55- } else if ( char === "\n" && ! escaped ) {
56- char = "\\n" ; // Replace the newline character with the escape sequence.
53+ function parseString ( ) : string {
54+ if ( buffer [ pos ] !== '"' ) {
55+ throw new Error ( `Expected '"' at position ${ pos } , got '${ buffer [ pos ] } '` ) ;
56+ }
57+
58+ pos += 1 ;
59+ let result = "" ;
60+ let escaped = false ;
61+
62+ while ( pos < buffer . length ) {
63+ const char = buffer [ pos ] ;
64+
65+ if ( escaped ) {
66+ if ( char === "n" ) {
67+ result += "\n" ;
68+ } else if ( char === "t" ) {
69+ result += "\t" ;
70+ } else if ( char === "r" ) {
71+ result += "\r" ;
72+ } else if ( char === "\\" ) {
73+ result += "\\" ;
74+ } else if ( char === '"' ) {
75+ result += '"' ;
76+ } else if ( char === "b" ) {
77+ result += "\b" ;
78+ } else if ( char === "f" ) {
79+ result += "\f" ;
80+ } else if ( char === "/" ) {
81+ result += "/" ;
82+ } else if ( char === "u" ) {
83+ const hex = buffer . substring ( pos + 1 , pos + 5 ) ;
84+ if ( / ^ [ 0 - 9 A - F a - f ] { 0 , 4 } $ / . test ( hex ) ) {
85+ if ( hex . length === 4 ) {
86+ result += String . fromCharCode ( Number . parseInt ( hex , 16 ) ) ;
87+ } else {
88+ result += `u${ hex } ` ;
89+ }
90+
91+ pos += hex . length ;
92+ } else {
93+ throw new Error (
94+ `Invalid unicode escape sequence '\\u${ hex } ' at position ${ pos } `
95+ ) ;
96+ }
97+ } else {
98+ throw new Error (
99+ `Invalid escape sequence '\\${ char } ' at position ${ pos } `
100+ ) ;
101+ }
102+ escaped = false ;
57103 } else if ( char === "\\" ) {
58- escaped = ! escaped ;
104+ escaped = true ;
105+ } else if ( char === '"' ) {
106+ pos += 1 ;
107+ return result ;
59108 } else {
60- escaped = false ;
109+ result += char ;
61110 }
62- } else {
63- if ( char === '"' ) {
64- isInsideString = true ;
65- escaped = false ;
66- } else if ( char === "{" ) {
67- stack . push ( "}" ) ;
68- } else if ( char === "[" ) {
69- stack . push ( "]" ) ;
70- } else if ( char === "}" || char === "]" ) {
71- if ( stack && stack [ stack . length - 1 ] === char ) {
72- stack . pop ( ) ;
73- } else {
74- // Mismatched closing character; the input is malformed.
75- return null ;
76- }
111+
112+ pos += 1 ;
113+ }
114+
115+ if ( escaped ) result += "\\" ;
116+ return result ;
117+ }
118+
119+ function parseNumber ( ) : number {
120+ const start = pos ;
121+ let numStr = "" ;
122+
123+ if ( buffer [ pos ] === "-" ) {
124+ numStr += "-" ;
125+ pos += 1 ;
126+ }
127+
128+ if ( pos < buffer . length && buffer [ pos ] === "0" ) {
129+ numStr += "0" ;
130+ pos += 1 ;
131+
132+ if ( buffer [ pos ] >= "0" && buffer [ pos ] <= "9" ) {
133+ throw new Error ( `Invalid number at position ${ start } ` ) ;
134+ }
135+ }
136+
137+ if ( pos < buffer . length && buffer [ pos ] >= "1" && buffer [ pos ] <= "9" ) {
138+ while ( pos < buffer . length && buffer [ pos ] >= "0" && buffer [ pos ] <= "9" ) {
139+ numStr += buffer [ pos ] ;
140+ pos += 1 ;
141+ }
142+ }
143+
144+ if ( pos < buffer . length && buffer [ pos ] === "." ) {
145+ numStr += "." ;
146+ pos += 1 ;
147+ while ( pos < buffer . length && buffer [ pos ] >= "0" && buffer [ pos ] <= "9" ) {
148+ numStr += buffer [ pos ] ;
149+ pos += 1 ;
77150 }
78151 }
79152
80- // Append the processed character to the new string.
81- new_s += char ;
153+ if ( pos < buffer . length && ( buffer [ pos ] === "e" || buffer [ pos ] === "E" ) ) {
154+ numStr += buffer [ pos ] ;
155+ pos += 1 ;
156+ if ( pos < buffer . length && ( buffer [ pos ] === "+" || buffer [ pos ] === "-" ) ) {
157+ numStr += buffer [ pos ] ;
158+ pos += 1 ;
159+ }
160+ while ( pos < buffer . length && buffer [ pos ] >= "0" && buffer [ pos ] <= "9" ) {
161+ numStr += buffer [ pos ] ;
162+ pos += 1 ;
163+ }
164+ }
165+
166+ if ( numStr === "-" ) return - 0 ;
167+
168+ const num = Number . parseFloat ( numStr ) ;
169+
170+ if ( Number . isNaN ( num ) ) {
171+ pos = start ;
172+ throw new Error ( `Invalid number '${ numStr } ' at position ${ start } ` ) ;
173+ }
174+
175+ return num ;
82176 }
83177
84- // If we're still inside a string at the end of processing,
85- // we need to close the string.
86- if ( isInsideString ) {
87- new_s += '"' ;
178+ function parseValue ( ) : unknown {
179+ skipWhitespace ( ) ;
180+
181+ if ( pos >= buffer . length ) {
182+ throw new Error ( `Unexpected end of input at position ${ pos } ` ) ;
183+ }
184+
185+ const char = buffer [ pos ] ;
186+
187+ if ( char === "{" ) return parseObject ( ) ;
188+ if ( char === "[" ) return parseArray ( ) ;
189+ if ( char === '"' ) return parseString ( ) ;
190+
191+ if ( "null" . startsWith ( buffer . substring ( pos , pos + 4 ) ) ) {
192+ pos += Math . min ( 4 , buffer . length - pos ) ;
193+ return null ;
194+ }
195+
196+ if ( "true" . startsWith ( buffer . substring ( pos , pos + 4 ) ) ) {
197+ pos += Math . min ( 4 , buffer . length - pos ) ;
198+ return true ;
199+ }
200+
201+ if ( "false" . startsWith ( buffer . substring ( pos , pos + 5 ) ) ) {
202+ pos += Math . min ( 5 , buffer . length - pos ) ;
203+ return false ;
204+ }
205+
206+ if ( char === "-" || ( char >= "0" && char <= "9" ) ) {
207+ return parseNumber ( ) ;
208+ }
209+
210+ throw new Error ( `Unexpected character '${ char } ' at position ${ pos } ` ) ;
211+ }
212+
213+ function parseArray ( ) : unknown [ ] {
214+ if ( buffer [ pos ] !== "[" ) {
215+ throw new Error ( `Expected '[' at position ${ pos } , got '${ buffer [ pos ] } '` ) ;
216+ }
217+
218+ const arr : unknown [ ] = [ ] ;
219+
220+ pos += 1 ;
221+ skipWhitespace ( ) ;
222+
223+ if ( pos >= buffer . length ) return arr ;
224+ if ( buffer [ pos ] === "]" ) {
225+ pos += 1 ;
226+ return arr ;
227+ }
228+
229+ while ( pos < buffer . length ) {
230+ skipWhitespace ( ) ;
231+ if ( pos >= buffer . length ) return arr ;
232+
233+ arr . push ( parseValue ( ) ) ;
234+
235+ skipWhitespace ( ) ;
236+ if ( pos >= buffer . length ) return arr ;
237+
238+ if ( buffer [ pos ] === "]" ) {
239+ pos += 1 ;
240+ return arr ;
241+ } else if ( buffer [ pos ] === "," ) {
242+ pos += 1 ;
243+ continue ;
244+ }
245+
246+ throw new Error (
247+ `Expected ',' or ']' at position ${ pos } , got '${ buffer [ pos ] } '`
248+ ) ;
249+ }
250+
251+ return arr ;
88252 }
89253
90- // Close any remaining open structures in the reverse order that they were opened.
91- for ( let i = stack . length - 1 ; i >= 0 ; i -= 1 ) {
92- new_s += stack [ i ] ;
254+ function parseObject ( ) : Record < string , unknown > {
255+ if ( buffer [ pos ] !== "{" ) {
256+ throw new Error ( `Expected '{' at position ${ pos } , got '${ buffer [ pos ] } '` ) ;
257+ }
258+
259+ const obj : Record < string , unknown > = { } ;
260+ pos += 1 ;
261+ skipWhitespace ( ) ;
262+
263+ if ( pos >= buffer . length ) return obj ;
264+ if ( buffer [ pos ] === "}" ) {
265+ pos += 1 ;
266+ return obj ;
267+ }
268+
269+ while ( pos < buffer . length ) {
270+ skipWhitespace ( ) ;
271+ if ( pos >= buffer . length ) return obj ;
272+
273+ const key = parseString ( ) ;
274+
275+ skipWhitespace ( ) ;
276+ if ( pos >= buffer . length ) return obj ;
277+
278+ if ( buffer [ pos ] !== ":" ) {
279+ throw new Error (
280+ `Expected ':' at position ${ pos } , got '${ buffer [ pos ] } '`
281+ ) ;
282+ }
283+ pos += 1 ;
284+
285+ skipWhitespace ( ) ;
286+ if ( pos >= buffer . length ) return obj ;
287+
288+ obj [ key ] = parseValue ( ) ;
289+
290+ skipWhitespace ( ) ;
291+ if ( pos >= buffer . length ) return obj ;
292+
293+ if ( buffer [ pos ] === "}" ) {
294+ pos += 1 ;
295+ return obj ;
296+ } else if ( buffer [ pos ] === "," ) {
297+ pos += 1 ;
298+ continue ;
299+ }
300+
301+ throw new Error (
302+ `Expected ',' or '}' at position ${ pos } , got '${ buffer [ pos ] } '`
303+ ) ;
304+ }
305+
306+ return obj ;
93307 }
94308
309+ const value = parseValue ( ) ;
310+ skipWhitespace ( ) ;
311+
312+ if ( pos < buffer . length ) {
313+ throw new Error ( `Unexpected character '${ buffer [ pos ] } ' at position ${ pos } ` ) ;
314+ }
315+
316+ return value ;
317+ }
318+
319+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
320+ export function parsePartialJson ( s : string ) : any | null {
95321 // Attempt to parse the modified string as JSON.
96322 try {
97- return JSON . parse ( new_s ) ;
323+ if ( typeof s === "undefined" ) return null ;
324+ return strictParsePartialJson ( s ) ;
98325 } catch {
99326 // If we still can't parse the string as JSON, return null to indicate failure.
100327 return null ;
0 commit comments