Skip to content

Commit 968b4bb

Browse files
authored
Merge branch 'main' into arrow-to-csv
2 parents 3d95a92 + 7012d6a commit 968b4bb

File tree

155 files changed

+1881
-1108
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

155 files changed

+1881
-1108
lines changed

.github/workflows/unit-tests.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,14 @@ jobs:
7373
env_file: actions-311.yaml
7474
pattern: "not slow and not network and not single_cpu"
7575
pandas_copy_on_write: "warn"
76+
- name: "Copy-on-Write 3.10 (warnings)"
77+
env_file: actions-310.yaml
78+
pattern: "not slow and not network and not single_cpu"
79+
pandas_copy_on_write: "warn"
80+
- name: "Copy-on-Write 3.9 (warnings)"
81+
env_file: actions-39.yaml
82+
pattern: "not slow and not network and not single_cpu"
83+
pandas_copy_on_write: "warn"
7684
- name: "Pypy"
7785
env_file: actions-pypy-39.yaml
7886
pattern: "not slow and not network and not single_cpu"

.pre-commit-config.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,9 @@ repos:
240240
# pytest raises without context
241241
|\s\ pytest.raises
242242
243+
# TODO
243244
# pytest.warns (use tm.assert_produces_warning instead)
244-
|pytest\.warns
245+
# |pytest\.warns
245246
246247
# os.remove
247248
|os\.remove

asv_bench/benchmarks/algorithms.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44

55
import pandas as pd
66

7-
from .pandas_vb_common import tm
8-
97
for imp in ["pandas.util", "pandas.tools.hashing"]:
108
try:
119
hashing = import_module(imp)
@@ -47,9 +45,12 @@ def setup(self, unique, sort, dtype):
4745
elif dtype == "datetime64[ns, tz]":
4846
data = pd.date_range("2011-01-01", freq="h", periods=N, tz="Asia/Tokyo")
4947
elif dtype == "object_str":
50-
data = tm.makeStringIndex(N)
48+
data = pd.Index([f"i-{i}" for i in range(N)], dtype=object)
5149
elif dtype == "string[pyarrow]":
52-
data = pd.array(tm.makeStringIndex(N), dtype="string[pyarrow]")
50+
data = pd.array(
51+
pd.Index([f"i-{i}" for i in range(N)], dtype=object),
52+
dtype="string[pyarrow]",
53+
)
5354
else:
5455
raise NotImplementedError
5556

@@ -88,7 +89,7 @@ def setup(self, unique, keep, dtype):
8889
elif dtype == "float64":
8990
data = pd.Index(np.random.randn(N), dtype="float64")
9091
elif dtype == "string":
91-
data = tm.makeStringIndex(N)
92+
data = pd.Index([f"i-{i}" for i in range(N)], dtype=object)
9293
elif dtype == "datetime64[ns]":
9394
data = pd.date_range("2011-01-01", freq="h", periods=N)
9495
elif dtype == "datetime64[ns, tz]":
@@ -136,7 +137,9 @@ def setup_cache(self):
136137
df = pd.DataFrame(
137138
{
138139
"strings": pd.Series(
139-
tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=N))
140+
pd.Index([f"i-{i}" for i in range(10000)], dtype=object).take(
141+
np.random.randint(0, 10000, size=N)
142+
)
140143
),
141144
"floats": np.random.randn(N),
142145
"ints": np.arange(N),

asv_bench/benchmarks/algos/isin.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
date_range,
99
)
1010

11-
from ..pandas_vb_common import tm
12-
1311

1412
class IsIn:
1513
params = [
@@ -60,7 +58,9 @@ def setup(self, dtype):
6058

6159
elif dtype in ["str", "string[python]", "string[pyarrow]"]:
6260
try:
63-
self.series = Series(tm.makeStringIndex(N), dtype=dtype)
61+
self.series = Series(
62+
Index([f"i-{i}" for i in range(N)], dtype=object), dtype=dtype
63+
)
6464
except ImportError:
6565
raise NotImplementedError
6666
self.values = list(self.series[:2])

asv_bench/benchmarks/categoricals.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66

77
import pandas as pd
88

9-
from .pandas_vb_common import tm
10-
119
try:
1210
from pandas.api.types import union_categoricals
1311
except ImportError:
@@ -189,7 +187,7 @@ def setup(self):
189187
N = 10**5
190188
ncats = 15
191189

192-
self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
190+
self.s_str = pd.Series(np.random.randint(0, ncats, size=N).astype(str))
193191
self.s_str_cat = pd.Series(self.s_str, dtype="category")
194192
with warnings.catch_warnings(record=True):
195193
str_cat_type = pd.CategoricalDtype(set(self.s_str), ordered=True)
@@ -242,7 +240,7 @@ def time_categorical_series_is_monotonic_decreasing(self):
242240
class Contains:
243241
def setup(self):
244242
N = 10**5
245-
self.ci = tm.makeCategoricalIndex(N)
243+
self.ci = pd.CategoricalIndex(np.arange(N))
246244
self.c = self.ci.values
247245
self.key = self.ci.categories[0]
248246

@@ -325,7 +323,7 @@ def time_sort_values(self):
325323
class SearchSorted:
326324
def setup(self):
327325
N = 10**5
328-
self.ci = tm.makeCategoricalIndex(N).sort_values()
326+
self.ci = pd.CategoricalIndex(np.arange(N)).sort_values()
329327
self.c = self.ci.values
330328
self.key = self.ci.categories[1]
331329

asv_bench/benchmarks/ctors.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
date_range,
1010
)
1111

12-
from .pandas_vb_common import tm
13-
1412

1513
def no_change(arr):
1614
return arr
@@ -115,7 +113,7 @@ def time_dtindex_from_index_with_series(self):
115113
class MultiIndexConstructor:
116114
def setup(self):
117115
N = 10**4
118-
self.iterables = [tm.makeStringIndex(N), range(20)]
116+
self.iterables = [Index([f"i-{i}" for i in range(N)], dtype=object), range(20)]
119117

120118
def time_multiindex_from_iterables(self):
121119
MultiIndex.from_product(self.iterables)

asv_bench/benchmarks/dtypes.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
import numpy as np
44

55
import pandas as pd
6-
from pandas import DataFrame
6+
from pandas import (
7+
DataFrame,
8+
Index,
9+
)
710
import pandas._testing as tm
811
from pandas.api.types import (
912
is_extension_array_dtype,
@@ -73,8 +76,8 @@ class SelectDtypes:
7376

7477
def setup(self, dtype):
7578
N, K = 5000, 50
76-
self.index = tm.makeStringIndex(N)
77-
self.columns = tm.makeStringIndex(K)
79+
self.index = Index([f"i-{i}" for i in range(N)], dtype=object)
80+
self.columns = Index([f"i-{i}" for i in range(K)], dtype=object)
7881

7982
def create_df(data):
8083
return DataFrame(data, index=self.index, columns=self.columns)

asv_bench/benchmarks/frame_ctor.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
date_range,
1313
)
1414

15-
from .pandas_vb_common import tm
16-
1715
try:
1816
from pandas.tseries.offsets import (
1917
Hour,
@@ -30,8 +28,8 @@
3028
class FromDicts:
3129
def setup(self):
3230
N, K = 5000, 50
33-
self.index = tm.makeStringIndex(N)
34-
self.columns = tm.makeStringIndex(K)
31+
self.index = pd.Index([f"i-{i}" for i in range(N)], dtype=object)
32+
self.columns = pd.Index([f"i-{i}" for i in range(K)], dtype=object)
3533
frame = DataFrame(np.random.randn(N, K), index=self.index, columns=self.columns)
3634
self.data = frame.to_dict()
3735
self.dict_list = frame.to_dict(orient="records")

asv_bench/benchmarks/frame_methods.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from pandas import (
77
DataFrame,
8+
Index,
89
MultiIndex,
910
NaT,
1011
Series,
@@ -14,8 +15,6 @@
1415
timedelta_range,
1516
)
1617

17-
from .pandas_vb_common import tm
18-
1918

2019
class AsType:
2120
params = [
@@ -703,8 +702,12 @@ def setup(self, monotonic):
703702
K = 10
704703
df = DataFrame(
705704
{
706-
"key1": tm.makeStringIndex(N).values.repeat(K),
707-
"key2": tm.makeStringIndex(N).values.repeat(K),
705+
"key1": Index([f"i-{i}" for i in range(N)], dtype=object).values.repeat(
706+
K
707+
),
708+
"key2": Index([f"i-{i}" for i in range(N)], dtype=object).values.repeat(
709+
K
710+
),
708711
"value": np.random.randn(N * K),
709712
}
710713
)

asv_bench/benchmarks/gil.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,14 @@
55

66
from pandas import (
77
DataFrame,
8+
Index,
89
Series,
910
date_range,
1011
factorize,
1112
read_csv,
1213
)
1314
from pandas.core.algorithms import take_nd
1415

15-
from .pandas_vb_common import tm
16-
1716
try:
1817
from pandas import (
1918
rolling_kurt,
@@ -34,7 +33,6 @@
3433
except ImportError:
3534
from pandas import algos
3635

37-
3836
from .pandas_vb_common import BaseIO # isort:skip
3937

4038

@@ -305,7 +303,7 @@ class ParallelFactorize:
305303
param_names = ["threads"]
306304

307305
def setup(self, threads):
308-
strings = tm.makeStringIndex(100000)
306+
strings = Index([f"i-{i}" for i in range(100000)], dtype=object)
309307

310308
@test_parallel(num_threads=threads)
311309
def parallel():

0 commit comments

Comments
 (0)