44#include "static_string.h"
55
66PyTypeObject * StringScalar_Type = NULL ;
7- static PyTypeObject * StringNA_Type = NULL ;
8- PyObject * NA_OBJ = NULL ;
97
108/*
119 * Internal helper to create new instances
@@ -20,7 +18,7 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
2018 return NULL ;
2119 }
2220
23- Py_INCREF (na_object );
21+ Py_XINCREF (na_object );
2422 ((StringDTypeObject * )new )-> na_object = na_object ;
2523 ((StringDTypeObject * )new )-> coerce = coerce ;
2624
@@ -108,7 +106,7 @@ string_discover_descriptor_from_pyobject(PyTypeObject *NPY_UNUSED(cls),
108106 return NULL ;
109107 }
110108
111- PyArray_Descr * ret = (PyArray_Descr * )new_stringdtype_instance (NA_OBJ , 1 );
109+ PyArray_Descr * ret = (PyArray_Descr * )new_stringdtype_instance (NULL , 1 );
112110 if (ret == NULL ) {
113111 return NULL ;
114112 }
@@ -131,7 +129,7 @@ stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr)
131129
132130 // setting NA *must* check pointer equality since NA types might not
133131 // allow equality
134- if (obj == na_object ) {
132+ if (na_object != NULL && obj == na_object ) {
135133 // do nothing, ssfree already NULLed the struct ssdata points to
136134 // so it already contains a NA value
137135 }
@@ -173,11 +171,17 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
173171{
174172 PyObject * val_obj = NULL ;
175173 ss * sdata = (ss * )dataptr ;
174+ int hasnull = descr -> na_object != NULL ;
176175
177176 if (ss_isnull (sdata )) {
178- PyObject * na_object = descr -> na_object ;
179- Py_INCREF (na_object );
180- val_obj = na_object ;
177+ if (hasnull ) {
178+ PyObject * na_object = descr -> na_object ;
179+ Py_INCREF (na_object );
180+ val_obj = na_object ;
181+ }
182+ else {
183+ val_obj = PyUnicode_FromStringAndSize ("" , 0 );
184+ }
181185 }
182186 else {
183187 char * data = sdata -> buf ;
@@ -213,17 +217,37 @@ nonzero(void *data, void *NPY_UNUSED(arr))
213217// Implementation of PyArray_CompareFunc.
214218// Compares unicode strings by their code points.
215219int
216- compare (void * a , void * b , void * NPY_UNUSED (arr ))
220+ compare (void * a , void * b , void * arr )
221+ {
222+ StringDTypeObject * descr = (StringDTypeObject * )PyArray_DESCR (arr );
223+ return _compare (a , b , descr );
224+ }
225+
226+ int
227+ _compare (void * a , void * b , StringDTypeObject * descr )
217228{
218- ss * ss_a = (ss * )a ;
219- ss * ss_b = (ss * )b ;
229+ int hasnull = descr -> na_object != NULL ;
230+ const ss * ss_a = (ss * )a ;
231+ const ss * ss_b = (ss * )b ;
220232 int a_is_null = ss_isnull (ss_a );
221233 int b_is_null = ss_isnull (ss_b );
222- if (a_is_null ) {
223- return 1 ;
224- }
225- else if (b_is_null ) {
226- return -1 ;
234+ if (NPY_UNLIKELY (a_is_null || b_is_null )) {
235+ if (hasnull ) {
236+ if (a_is_null ) {
237+ return 1 ;
238+ }
239+ else if (b_is_null ) {
240+ return -1 ;
241+ }
242+ }
243+ else {
244+ if (a_is_null ) {
245+ ss_a = & EMPTY_STRING ;
246+ }
247+ if (b_is_null ) {
248+ ss_b = & EMPTY_STRING ;
249+ }
250+ }
227251 }
228252 return strcmp (ss_a -> buf , ss_b -> buf );
229253}
@@ -344,22 +368,18 @@ static PyType_Slot StringDType_Slots[] = {
344368static PyObject *
345369stringdtype_new (PyTypeObject * NPY_UNUSED (cls ), PyObject * args , PyObject * kwds )
346370{
347- static char * kwargs_strs [] = {"size" , "na_object " , "coerce " , NULL };
371+ static char * kwargs_strs [] = {"size" , "coerce " , "na_object " , NULL };
348372
349373 long size = 0 ;
350374 PyObject * na_object = NULL ;
351375 int coerce = 1 ;
352376
353- if (!PyArg_ParseTupleAndKeywords (args , kwds , "|lOp :StringDType" ,
354- kwargs_strs , & size , & na_object ,
355- & coerce )) {
377+ if (!PyArg_ParseTupleAndKeywords (args , kwds , "|lpO :StringDType" ,
378+ kwargs_strs , & size , & coerce ,
379+ & na_object )) {
356380 return NULL ;
357381 }
358382
359- if (na_object == NULL ) {
360- na_object = NA_OBJ ;
361- }
362-
363383 PyObject * ret = new_stringdtype_instance (na_object , coerce );
364384
365385 return ret ;
@@ -379,12 +399,11 @@ stringdtype_repr(StringDTypeObject *self)
379399 PyObject * na_object = self -> na_object ;
380400 int coerce = self -> coerce ;
381401
382- // TODO: handle non-default NA
383- if (na_object != NA_OBJ && coerce == 0 ) {
402+ if (na_object != NULL && coerce == 0 ) {
384403 ret = PyUnicode_FromFormat ("StringDType(na_object=%R, coerce=False)" ,
385404 na_object );
386405 }
387- else if (na_object != NA_OBJ ) {
406+ else if (na_object != NULL ) {
388407 ret = PyUnicode_FromFormat ("StringDType(na_object=%R)" , na_object );
389408 }
390409 else if (coerce == 0 ) {
@@ -424,9 +443,16 @@ stringdtype__reduce__(StringDTypeObject *self)
424443
425444 PyTuple_SET_ITEM (ret , 0 , obj );
426445
427- PyTuple_SET_ITEM (ret , 1 ,
428- Py_BuildValue ("(NOi)" , PyLong_FromLong (0 ),
429- self -> na_object , self -> coerce ));
446+ if (self -> na_object != NULL ) {
447+ PyTuple_SET_ITEM (ret , 1 ,
448+ Py_BuildValue ("(NiO)" , PyLong_FromLong (0 ),
449+ self -> coerce , self -> na_object ));
450+ }
451+ else {
452+ PyTuple_SET_ITEM (
453+ ret , 1 ,
454+ Py_BuildValue ("(Ni)" , PyLong_FromLong (0 ), self -> coerce ));
455+ }
430456
431457 PyTuple_SET_ITEM (ret , 2 , Py_BuildValue ("(l)" , PICKLE_VERSION ));
432458
@@ -571,11 +597,11 @@ init_string_dtype(void)
571597 return 0 ;
572598}
573599
574- int
575- init_string_na_object (PyObject * mod )
600+ void
601+ gil_error (PyObject * type , const char * msg )
576602{
577- NA_OBJ = PyObject_GetAttrString ( mod , "NA" ) ;
578- StringNA_Type = Py_TYPE ( NA_OBJ );
579- Py_INCREF ( StringNA_Type );
580- return 0 ;
603+ PyGILState_STATE gstate ;
604+ gstate = PyGILState_Ensure ( );
605+ PyErr_SetString ( type , msg );
606+ PyGILState_Release ( gstate ) ;
581607}
0 commit comments