77#include "pycore_pyhash.h" // _Py_HashSecret
88#include "pycore_traceback.h" // _PyTraceback_Add()
99
10+ #include <float.h> // FLT_MAX
1011#include <stdbool.h>
1112#include <stddef.h> // offsetof()
13+
1214#include "expat.h"
1315#include "pyexpat.h"
1416
@@ -138,31 +140,72 @@ set_error_attr(PyObject *err, const char *name, int value)
138140 return 1 ;
139141}
140142
143+ static PyObject *
144+ format_xml_error (enum XML_Error code , int lineno , int column )
145+ {
146+ const char * errmsg = XML_ErrorString (code );
147+ PyUnicodeWriter * writer = PyUnicodeWriter_Create (strlen (errmsg ) + 1 );
148+ if (writer == NULL ) {
149+ return NULL ;
150+ }
151+ if (PyUnicodeWriter_Format (writer ,
152+ "%s: line %i, column %i" ,
153+ errmsg , lineno , column ) < 0 )
154+ {
155+ PyUnicodeWriter_Discard (writer );
156+ return NULL ;
157+ }
158+ return PyUnicodeWriter_Finish (writer );
159+ }
160+
161+ static PyObject *
162+ set_xml_error (pyexpat_state * state ,
163+ enum XML_Error code , int lineno , int column ,
164+ const char * errmsg )
165+ {
166+ PyObject * arg = errmsg == NULL
167+ ? format_xml_error (code , lineno , column )
168+ : PyUnicode_FromStringAndSize (errmsg , strlen (errmsg ));
169+ if (arg == NULL ) {
170+ return NULL ;
171+ }
172+ PyObject * res = PyObject_CallOneArg (state -> error , arg );
173+ Py_DECREF (arg );
174+ if (
175+ res != NULL
176+ && set_error_attr (res , "code" , code )
177+ && set_error_attr (res , "lineno" , lineno )
178+ && set_error_attr (res , "offset" , column )
179+ ) {
180+ PyErr_SetObject (state -> error , res );
181+ Py_DECREF (res );
182+ }
183+ return NULL ;
184+ }
185+
186+ #define SET_XML_ERROR (STATE , SELF , CODE , ERRMSG ) \
187+ do { \
188+ XML_Parser parser = SELF->itself; \
189+ assert(parser != NULL); \
190+ int lineno = XML_GetErrorLineNumber(parser); \
191+ int column = XML_GetErrorColumnNumber(parser); \
192+ (void)set_xml_error(state, CODE, lineno, column, ERRMSG); \
193+ } while (0)
194+
141195/* Build and set an Expat exception, including positioning
142196 * information. Always returns NULL.
143197 */
144198static PyObject *
145199set_error (pyexpat_state * state , xmlparseobject * self , enum XML_Error code )
146200{
147- PyObject * err ;
148- PyObject * buffer ;
149- XML_Parser parser = self -> itself ;
150- int lineno = XML_GetErrorLineNumber (parser );
151- int column = XML_GetErrorColumnNumber (parser );
201+ SET_XML_ERROR (state , self , code , NULL );
202+ return NULL ;
203+ }
152204
153- buffer = PyUnicode_FromFormat ("%s: line %i, column %i" ,
154- XML_ErrorString (code ), lineno , column );
155- if (buffer == NULL )
156- return NULL ;
157- err = PyObject_CallOneArg (state -> error , buffer );
158- Py_DECREF (buffer );
159- if ( err != NULL
160- && set_error_attr (err , "code" , code )
161- && set_error_attr (err , "offset" , column )
162- && set_error_attr (err , "lineno" , lineno )) {
163- PyErr_SetObject (state -> error , err );
164- }
165- Py_XDECREF (err );
205+ static PyObject *
206+ set_invalid_arg (pyexpat_state * state , xmlparseobject * self , const char * errmsg )
207+ {
208+ SET_XML_ERROR (state , self , XML_ERROR_INVALID_ARGUMENT , errmsg );
166209 return NULL ;
167210}
168211
@@ -1133,6 +1176,89 @@ pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
11331176}
11341177#endif
11351178
1179+ #if XML_COMBINED_VERSION >= 20702
1180+ /*[clinic input]
1181+ @permit_long_summary
1182+ @permit_long_docstring_body
1183+ pyexpat.xmlparser.SetAllocTrackerMaximumAmplification
1184+
1185+ cls: defining_class
1186+ max_factor: float
1187+ /
1188+
1189+ Sets the maximum amplification factor between direct input and bytes of dynamic memory allocated.
1190+
1191+ By default, parsers objects have a maximum amplification factor of 100.
1192+
1193+ The amplification factor is calculated as "allocated / direct" while parsing,
1194+ where "direct" is the number of bytes read from the primary document in parsing
1195+ and "allocated" is the number of bytes of dynamic memory allocated in the parser
1196+ hierarchy.
1197+
1198+ The 'max_factor' value must be a non-NaN floating point value greater than
1199+ or equal to 1.0. Amplifications factors greater than 100 can been observed
1200+ near the start of parsing even with benign files in practice. As such, the
1201+ upper bound must be carefully chosen so to avoid false positives.
1202+ [clinic start generated code]*/
1203+
1204+ static PyObject *
1205+ pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl (xmlparseobject * self ,
1206+ PyTypeObject * cls ,
1207+ float max_factor )
1208+ /*[clinic end generated code: output=6e44bd48c9b112a0 input=18e8d07329c0efda]*/
1209+ {
1210+ assert (self -> itself != NULL );
1211+ if (XML_SetAllocTrackerMaximumAmplification (self -> itself , max_factor ) == XML_TRUE ) {
1212+ Py_RETURN_NONE ;
1213+ }
1214+ // XML_SetAllocTrackerMaximumAmplification() can fail if self->itself
1215+ // is not a root parser (currently, this is equivalent to be created
1216+ // by ExternalEntityParserCreate()) or if 'max_factor' is NaN or < 1.0.
1217+ //
1218+ // Expat does not provide a way to determine whether a parser is a root
1219+ // or not, nor does it provide a way to distinguish between failures in
1220+ // XML_SetAllocTrackerMaximumAmplification() (see gh-90949), we manually
1221+ // detect the factor out-of-range issue here so that users have a better
1222+ // error message.
1223+ pyexpat_state * state = PyType_GetModuleState (cls );
1224+ const char * message = (isnan (max_factor ) || max_factor < 1.0f )
1225+ ? "'max_factor' must be at least 1.0"
1226+ : "parser must be a root parser" ;
1227+ return set_invalid_arg (state , self , message );
1228+ }
1229+
1230+ /*[clinic input]
1231+ @permit_long_summary
1232+ @permit_long_docstring_body
1233+ pyexpat.xmlparser.SetAllocTrackerActivationThreshold
1234+
1235+ cls: defining_class
1236+ threshold: unsigned_long_long
1237+ /
1238+
1239+ Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM.
1240+
1241+ By default, parsers objects have an allocation activation threshold of 64 MiB.
1242+ [clinic start generated code]*/
1243+
1244+ static PyObject *
1245+ pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl (xmlparseobject * self ,
1246+ PyTypeObject * cls ,
1247+ unsigned long long threshold )
1248+ /*[clinic end generated code: output=bed7e93207ba08c5 input=8453509a137a47c0]*/
1249+ {
1250+ assert (self -> itself != NULL );
1251+ if (XML_SetAllocTrackerActivationThreshold (self -> itself , threshold ) == XML_TRUE ) {
1252+ Py_RETURN_NONE ;
1253+ }
1254+ // XML_SetAllocTrackerActivationThreshold() can only fail if self->itself
1255+ // is not a root parser (currently, this is equivalent to be created
1256+ // by ExternalEntityParserCreate()).
1257+ pyexpat_state * state = PyType_GetModuleState (cls );
1258+ return set_invalid_arg (state , self , "parser must be a root parser" );
1259+ }
1260+ #endif
1261+
11361262static struct PyMethodDef xmlparse_methods [] = {
11371263 PYEXPAT_XMLPARSER_PARSE_METHODDEF
11381264 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
@@ -1141,9 +1267,9 @@ static struct PyMethodDef xmlparse_methods[] = {
11411267 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
11421268 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
11431269 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
1144- #if XML_COMBINED_VERSION >= 19505
11451270 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
1146- #endif
1271+ PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF
1272+ PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF
11471273 PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF
11481274 PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF
11491275 {NULL , NULL } /* sentinel */
0 commit comments