Skip to content

Commit 774a20f

Browse files
committed
Merge remote-tracking branch 'upstream/main' into ci/nightlies/sp_action
2 parents d7f3411 + e450f0c commit 774a20f

File tree

9 files changed

+131
-26
lines changed

9 files changed

+131
-26
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,6 +1346,7 @@ Reshaping
13461346
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
13471347
- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
13481348
- Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`)
1349+
- Bug in :meth:`DataFrame.merge` where specifying both ``right_on`` and ``right_index`` did not raise a ``MergeError`` if ``left_on`` is also specified. Now raises a ``MergeError`` in such cases. (:issue:`63242`)
13491350
- Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)
13501351
- Bug in :meth:`DataFrame.merge` with :class:`CategoricalDtype` columns incorrectly raising ``RecursionError`` (:issue:`56376`)
13511352
- Bug in :meth:`DataFrame.merge` with a ``float32`` index incorrectly casting the index to ``float64`` (:issue:`41626`)

pandas/core/arrays/interval.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2128,9 +2128,8 @@ def _combined(self) -> IntervalSide:
21282128
)
21292129
comb = comb.view("complex128")[:, 0]
21302130
else:
2131-
comb = (np.array(left.ravel(), dtype="complex128")) + (
2132-
1j * np.array(right.ravel(), dtype="complex128")
2133-
)
2131+
comb = np.asarray(left.ravel(), dtype="complex128")
2132+
comb.imag = right.ravel()
21342133
return comb
21352134

21362135
def _from_combined(self, combined: np.ndarray) -> IntervalArray:

pandas/core/groupby/groupby.py

Lines changed: 62 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ class providing the base-class of operations.
6464
Pandas4Warning,
6565
)
6666
from pandas.util._decorators import (
67-
Appender,
6867
Substitution,
6968
cache_readonly,
7069
doc,
@@ -738,11 +737,65 @@ def pipe(
738737
**kwargs: Any,
739738
) -> T: ...
740739

741-
@Substitution(
742-
klass="GroupBy",
743-
examples=dedent(
744-
"""\
745-
>>> df = pd.DataFrame({'A': 'a b a b'.split(), 'B': [1, 2, 3, 4]})
740+
def pipe(
741+
self,
742+
func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
743+
*args: Any,
744+
**kwargs: Any,
745+
) -> T:
746+
"""
747+
Apply a ``func`` with arguments to this GroupBy object and return its result.
748+
749+
Use `.pipe` when you want to improve readability by chaining together
750+
functions that expect Series, DataFrames, GroupBy or Resampler objects.
751+
Instead of writing
752+
753+
>>> h = lambda x, arg2, arg3: x + 1 - arg2 * arg3
754+
>>> g = lambda x, arg1: x * 5 / arg1
755+
>>> f = lambda x: x**4
756+
>>> df = pd.DataFrame([["a", 4], ["b", 5]], columns=["group", "value"])
757+
>>> h(g(f(df.groupby("group")), arg1=1), arg2=2, arg3=3) # doctest: +SKIP
758+
759+
You can write
760+
761+
>>> (
762+
... df.groupby("group").pipe(f).pipe(g, arg1=1).pipe(h, arg2=2, arg3=3)
763+
... ) # doctest: +SKIP
764+
765+
which is much more readable.
766+
767+
Parameters
768+
----------
769+
func : callable or tuple of (callable, str)
770+
Function to apply to this GroupBy object or, alternatively,
771+
a `(callable, data_keyword)` tuple where `data_keyword` is a
772+
string indicating the keyword of `callable` that expects the
773+
GroupBy object.
774+
*args : iterable, optional
775+
Positional arguments passed into `func`.
776+
**kwargs : dict, optional
777+
A dictionary of keyword arguments passed into `func`.
778+
779+
Returns
780+
-------
781+
GroupBy
782+
The return type of `func`.
783+
784+
See Also
785+
--------
786+
Series.pipe : Apply a function with arguments to a series.
787+
DataFrame.pipe : Apply a function with arguments to a dataframe.
788+
apply : Apply function to each group instead of to the
789+
full GroupBy object.
790+
791+
Notes
792+
-----
793+
See more `here
794+
<https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls>`_
795+
796+
Examples
797+
--------
798+
>>> df = pd.DataFrame({"A": "a b a b".split(), "B": [1, 2, 3, 4]})
746799
>>> df
747800
A B
748801
0 a 1
@@ -753,20 +806,12 @@ def pipe(
753806
To get the difference between each groups maximum and minimum value in one
754807
pass, you can do
755808
756-
>>> df.groupby('A').pipe(lambda x: x.max() - x.min())
809+
>>> df.groupby("A").pipe(lambda x: x.max() - x.min())
757810
B
758811
A
759812
a 2
760-
b 2"""
761-
),
762-
)
763-
@Appender(_pipe_template)
764-
def pipe(
765-
self,
766-
func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
767-
*args: Any,
768-
**kwargs: Any,
769-
) -> T:
813+
b 2
814+
"""
770815
return com.pipe(self, func, *args, **kwargs)
771816

772817
@final

pandas/core/reshape/merge.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1928,6 +1928,10 @@ def _validate_left_right_on(self, left_on, right_on):
19281928
)
19291929
if not self.right_index and right_on is None:
19301930
raise MergeError('Must pass "right_on" OR "right_index".')
1931+
if self.right_index and right_on is not None:
1932+
raise MergeError(
1933+
'Can only pass argument "right_on" OR "right_index" not both.'
1934+
)
19311935
n = len(left_on)
19321936
if self.right_index:
19331937
if len(left_on) != self.right.index.nlevels:

pandas/tests/arithmetic/test_interval.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,9 @@ def elementwise_comparison(self, op, interval_array, other):
107107
Helper that performs elementwise comparisons between `array` and `other`
108108
"""
109109
other = other if is_list_like(other) else [other] * len(interval_array)
110-
expected = np.array([op(x, y) for x, y in zip(interval_array, other)])
110+
expected = np.array(
111+
[op(x, y) for x, y in zip(interval_array, other, strict=True)]
112+
)
111113
if isinstance(other, Series):
112114
return Series(expected, index=other.index)
113115
return expected

pandas/tests/arithmetic/test_period.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -962,11 +962,11 @@ def test_pi_add_sub_int_array_freqn_gt1(self):
962962
pi = period_range("2016-01-01", periods=10, freq="2D")
963963
arr = np.arange(10)
964964
result = pi + arr
965-
expected = pd.Index([x + y for x, y in zip(pi, arr)])
965+
expected = pd.Index([x + y for x, y in zip(pi, arr, strict=True)])
966966
tm.assert_index_equal(result, expected)
967967

968968
result = pi - arr
969-
expected = pd.Index([x - y for x, y in zip(pi, arr)])
969+
expected = pd.Index([x - y for x, y in zip(pi, arr, strict=True)])
970970
tm.assert_index_equal(result, expected)
971971

972972
def test_pi_sub_isub_offset(self):

pandas/tests/arrays/interval/test_interval.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,20 @@ def test_unique_with_negatives(self):
136136
)
137137
tm.assert_index_equal(result, expected)
138138

139+
@pytest.mark.parametrize(
140+
"data",
141+
[
142+
[Interval(-np.inf, 0), Interval(-np.inf, 1)],
143+
[Interval(0, np.inf), Interval(1, np.inf)],
144+
],
145+
)
146+
def test_unique_with_infinty(self, data):
147+
# https://github.com/pandas-dev/pandas/issues/63218
148+
s = pd.Series(data)
149+
tm.assert_interval_array_equal(s.unique(), s.array)
150+
assert s.nunique() == 2
151+
tm.assert_series_equal(s.drop_duplicates(), s)
152+
139153

140154
class TestSetitem:
141155
def test_set_na(self, left_right_dtypes):

pandas/tests/reshape/merge/test_merge.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2936,6 +2936,41 @@ def test_merge_multiindex_single_level():
29362936
tm.assert_frame_equal(result, expected)
29372937

29382938

2939+
def test_merge_multiindex_reset_index_mixed():
2940+
# GH#62150
2941+
df = DataFrame(
2942+
{("column_1", ""): [1, 1], ("column_2", ""): [2, 2]},
2943+
index=MultiIndex.from_arrays(
2944+
[[1, 1], ["metadata_1", "metadata_2"]], names=["index", "metadata"]
2945+
),
2946+
)
2947+
2948+
df2 = DataFrame(
2949+
data=[1, 1],
2950+
index=Index([1, 1], name="index"),
2951+
columns=MultiIndex.from_product([["new_data"], [""]]),
2952+
)
2953+
2954+
with tm.assert_produces_warning(pd.errors.PerformanceWarning):
2955+
result = df.reset_index().merge(df2.reset_index(), on="index")
2956+
2957+
expected = DataFrame(
2958+
{
2959+
("index", ""): [1, 1, 1, 1],
2960+
("metadata", ""): ["metadata_1", "metadata_1", "metadata_2", "metadata_2"],
2961+
("column_1", ""): [1, 1, 1, 1],
2962+
("column_2", ""): [2, 2, 2, 2],
2963+
("new_data", ""): [1, 1, 1, 1],
2964+
}
2965+
)
2966+
expected.columns = MultiIndex.from_tuples(expected.columns)
2967+
2968+
tm.assert_frame_equal(result, expected)
2969+
2970+
result2 = df.reset_index().merge(df2.reset_index(), on=[("index", "")])
2971+
tm.assert_frame_equal(result2, expected)
2972+
2973+
29392974
@pytest.mark.parametrize("on_index", [True, False])
29402975
@pytest.mark.parametrize("left_unique", [True, False])
29412976
@pytest.mark.parametrize("left_monotonic", [True, False])
@@ -3149,3 +3184,11 @@ def test_merge_pyarrow_datetime_duplicates():
31493184
)
31503185
expected = expected.convert_dtypes(dtype_backend="pyarrow")
31513186
tm.assert_frame_equal(result, expected)
3187+
3188+
3189+
def test_merge_right_on_and_right_index():
3190+
df1 = DataFrame({"col": [1, 2, 3]})
3191+
df2 = DataFrame({"col": [2, 3, 4]})
3192+
3193+
with pytest.raises(pd.errors.MergeError):
3194+
df1.merge(df2, left_on="col", right_on="col", right_index=True)

pyproject.toml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -463,9 +463,6 @@ exclude = [
463463
"pandas/_testing/asserters.py" = ["B905"]
464464
"pandas/_testing/_warnings.py" = ["B905"]
465465
"pandas/tests/apply/test_series_apply.py" = ["B905"]
466-
"pandas/tests/arithmetic/test_interval.py" = ["B905"]
467-
"pandas/tests/arithmetic/test_numeric.py" = ["B905"]
468-
"pandas/tests/arithmetic/test_period.py" = ["B905"]
469466
"pandas/tests/arrays/categorical/test_map.py" = ["B905"]
470467
"pandas/tests/arrays/integer/test_construction.py" = ["B905"]
471468
"pandas/tests/arrays/integer/test_function.py" = ["B905"]

0 commit comments

Comments
 (0)