Skip to content

Commit 048df43

Browse files
committed
WIP: add casts between stringdtype and pandasstringdtype
1 parent d2d34f8 commit 048df43

File tree

3 files changed

+90
-25
lines changed

3 files changed

+90
-25
lines changed

stringdtype/stringdtype/src/casts.c

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ static PyType_Slot s2s_slots[] = {
7575
{0, NULL}};
7676

7777
static char *s2s_name = "cast_StringDType_to_StringDType";
78+
static char *s2p_name = "cast_StringDType_to_PandasStringDType";
79+
static char *p2s_name = "cast_PandasStringDType_to_StringDType";
7880

7981
// unicode to string
8082

@@ -476,38 +478,83 @@ get_dtypes(PyArray_DTypeMeta *dt1, PyArray_DTypeMeta *dt2)
476478
}
477479

478480
PyArrayMethod_Spec **
479-
get_casts(void)
481+
get_casts(PyArray_DTypeMeta *this, PyArray_DTypeMeta *other,
482+
int pandas_available)
480483
{
481-
PyArray_DTypeMeta **s2s_dtypes = get_dtypes(NULL, NULL);
484+
PyArray_DTypeMeta **s2s_dtypes = get_dtypes(this, this);
482485

483486
PyArrayMethod_Spec *StringToStringCastSpec =
484487
get_cast_spec(s2s_name, NPY_NO_CASTING,
485488
NPY_METH_SUPPORTS_UNALIGNED, s2s_dtypes, s2s_slots);
486489

487-
PyArray_DTypeMeta **u2s_dtypes = get_dtypes(&PyArray_UnicodeDType, NULL);
490+
PyArrayMethod_Spec *ThisToOtherCastSpec = NULL;
491+
PyArrayMethod_Spec *OtherToThisCastSpec = NULL;
492+
493+
if (pandas_available) {
494+
char *t2o_name = NULL;
495+
char *o2t_name = NULL;
496+
497+
if (this == (PyArray_DTypeMeta *)&StringDType) {
498+
t2o_name = s2p_name;
499+
o2t_name = p2s_name;
500+
}
501+
else {
502+
t2o_name = p2s_name;
503+
o2t_name = s2p_name;
504+
}
505+
506+
PyArray_DTypeMeta **t2o_dtypes = get_dtypes(this, other);
507+
508+
ThisToOtherCastSpec = get_cast_spec(t2o_name, NPY_NO_CASTING,
509+
NPY_METH_SUPPORTS_UNALIGNED,
510+
t2o_dtypes, s2s_slots);
511+
512+
PyArray_DTypeMeta **o2t_dtypes = get_dtypes(other, this);
513+
514+
OtherToThisCastSpec = get_cast_spec(o2t_name, NPY_NO_CASTING,
515+
NPY_METH_SUPPORTS_UNALIGNED,
516+
o2t_dtypes, s2s_slots);
517+
}
518+
519+
PyArray_DTypeMeta **u2s_dtypes = get_dtypes(&PyArray_UnicodeDType, this);
488520

489521
PyArrayMethod_Spec *UnicodeToStringCastSpec = get_cast_spec(
490522
u2s_name, NPY_SAFE_CASTING, NPY_METH_NO_FLOATINGPOINT_ERRORS,
491523
u2s_dtypes, u2s_slots);
492524

493-
PyArray_DTypeMeta **s2u_dtypes = get_dtypes(NULL, &PyArray_UnicodeDType);
525+
PyArray_DTypeMeta **s2u_dtypes = get_dtypes(this, &PyArray_UnicodeDType);
494526

495527
PyArrayMethod_Spec *StringToUnicodeCastSpec = get_cast_spec(
496528
s2u_name, NPY_SAFE_CASTING, NPY_METH_NO_FLOATINGPOINT_ERRORS,
497529
s2u_dtypes, s2u_slots);
498530

499-
PyArray_DTypeMeta **s2b_dtypes = get_dtypes(NULL, &PyArray_BoolDType);
531+
PyArray_DTypeMeta **s2b_dtypes = get_dtypes(this, &PyArray_BoolDType);
500532

501533
PyArrayMethod_Spec *StringToBoolCastSpec = get_cast_spec(
502534
s2b_name, NPY_UNSAFE_CASTING, NPY_METH_NO_FLOATINGPOINT_ERRORS,
503535
s2b_dtypes, s2b_slots);
504536

505-
PyArrayMethod_Spec **casts = malloc(5 * sizeof(PyArrayMethod_Spec *));
537+
PyArrayMethod_Spec **casts = NULL;
538+
539+
if (pandas_available) {
540+
casts = malloc(7 * sizeof(PyArrayMethod_Spec *));
541+
}
542+
else {
543+
casts = malloc(5 * sizeof(PyArrayMethod_Spec *));
544+
}
545+
506546
casts[0] = StringToStringCastSpec;
507547
casts[1] = UnicodeToStringCastSpec;
508548
casts[2] = StringToUnicodeCastSpec;
509549
casts[3] = StringToBoolCastSpec;
510-
casts[4] = NULL;
550+
if (pandas_available) {
551+
casts[4] = ThisToOtherCastSpec;
552+
casts[5] = OtherToThisCastSpec;
553+
casts[6] = NULL;
554+
}
555+
else {
556+
casts[4] = NULL;
557+
}
511558

512559
return casts;
513560
}

stringdtype/stringdtype/src/casts.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
#include "numpy/ndarraytypes.h"
1212

1313
PyArrayMethod_Spec **
14-
get_casts(void);
14+
get_casts(PyArray_DTypeMeta *this_dtype, PyArray_DTypeMeta *other_dtype,
15+
int pandas_available);
1516

1617
size_t
1718
utf8_char_to_ucs4_code(unsigned char *, Py_UCS4 *);

stringdtype/stringdtype/src/dtype.c

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -523,12 +523,24 @@ StringDType_type PandasStringDType = {
523523
int
524524
init_string_dtype(void)
525525
{
526-
PyArrayMethod_Spec **casts = get_casts();
526+
PyObject *pandas_mod = PyImport_ImportModule("pandas");
527+
528+
if (pandas_mod == NULL) {
529+
// clear ImportError
530+
PyErr_Clear();
531+
}
532+
else {
533+
PANDAS_AVAILABLE = 1;
534+
}
535+
536+
PyArrayMethod_Spec **StringDType_casts = get_casts(
537+
(PyArray_DTypeMeta *)&StringDType,
538+
(PyArray_DTypeMeta *)&PandasStringDType, PANDAS_AVAILABLE);
527539

528540
PyArrayDTypeMeta_Spec StringDType_DTypeSpec = {
529541
.typeobj = StringScalar_Type,
530542
.slots = StringDType_Slots,
531-
.casts = casts,
543+
.casts = StringDType_casts,
532544
};
533545

534546
/* Loaded dynamically, so may need to be set here: */
@@ -545,6 +557,10 @@ init_string_dtype(void)
545557
return -1;
546558
}
547559

560+
// Partially initialize PandasStringDType so cast setup succeeds
561+
((PyObject *)&PandasStringDType)->ob_type = &PyArrayDTypeMeta_Type;
562+
((PyTypeObject *)&PandasStringDType)->tp_base = &PyArrayDescr_Type;
563+
548564
if (PyArrayInitDTypeMeta_FromSpec((PyArray_DTypeMeta *)&StringDType,
549565
&StringDType_DTypeSpec) < 0) {
550566
return -1;
@@ -559,27 +575,32 @@ init_string_dtype(void)
559575

560576
StringDType.base.singleton = singleton;
561577

578+
for (int i = 0; StringDType_casts[i] != NULL; i++) {
579+
free(StringDType_casts[i]->dtypes);
580+
free(StringDType_casts[i]);
581+
}
582+
562583
/* and once again for PandasStringDType */
563584

564-
PyObject *mod = PyImport_ImportModule("pandas");
585+
if (PANDAS_AVAILABLE) {
586+
PyArrayMethod_Spec **PandasStringDType_casts =
587+
get_casts((PyArray_DTypeMeta *)&PandasStringDType,
588+
(PyArray_DTypeMeta *)&StringDType, PANDAS_AVAILABLE);
565589

566-
if (mod != NULL) {
567590
PyArrayDTypeMeta_Spec PandasStringDType_DTypeSpec = {
568591
.typeobj = PandasStringScalar_Type,
569592
.slots = StringDType_Slots,
570-
.casts = casts,
593+
.casts = PandasStringDType_casts,
571594
};
572595

573-
PyObject *pandas_na_obj = PyObject_GetAttrString(mod, "NA");
596+
PyObject *pandas_na_obj = PyObject_GetAttrString(pandas_mod, "NA");
574597

575-
Py_DECREF(mod);
598+
Py_DECREF(pandas_mod);
576599

577600
if (pandas_na_obj == NULL) {
578601
return -1;
579602
}
580603

581-
((PyObject *)&PandasStringDType)->ob_type = &PyArrayDTypeMeta_Type;
582-
((PyTypeObject *)&PandasStringDType)->tp_base = &PyArrayDescr_Type;
583604
((PyTypeObject *)&PandasStringDType)->tp_dict = PyDict_New();
584605
// C attribute for fast access
585606
Py_INCREF(pandas_na_obj);
@@ -605,15 +626,11 @@ init_string_dtype(void)
605626
}
606627

607628
PandasStringDType.base.singleton = singleton;
608-
PANDAS_AVAILABLE = 1;
609-
}
610-
else {
611-
PyErr_Clear();
612-
}
613629

614-
for (int i = 0; casts[i] != NULL; i++) {
615-
free(casts[i]->dtypes);
616-
free(casts[i]);
630+
for (int i = 0; PandasStringDType_casts[i] != NULL; i++) {
631+
free(PandasStringDType_casts[i]->dtypes);
632+
free(PandasStringDType_casts[i]);
633+
}
617634
}
618635

619636
return 0;

0 commit comments

Comments
 (0)