Merge remote-tracking branch 'upstream/main' into ci/nightlies/sp_action

mroeschke · mroeschke · commit 774a20f4ebf2 · 2025-12-05T09:46:08.000-08:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -1346,6 +1346,7 @@ Reshaping
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
 - Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
 - Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`)
+- Bug in :meth:`DataFrame.merge` where specifying both ``right_on`` and ``right_index`` did not raise a ``MergeError`` if ``left_on`` is also specified. Now raises a ``MergeError`` in such cases. (:issue:`63242`)
 - Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)
 - Bug in :meth:`DataFrame.merge` with :class:`CategoricalDtype` columns incorrectly raising ``RecursionError`` (:issue:`56376`)
 - Bug in :meth:`DataFrame.merge` with a ``float32`` index incorrectly casting the index to ``float64`` (:issue:`41626`)
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -2128,9 +2128,8 @@ def _combined(self) -> IntervalSide:
             )
             comb = comb.view("complex128")[:, 0]
         else:
-            comb = (np.array(left.ravel(), dtype="complex128")) + (
-                1j * np.array(right.ravel(), dtype="complex128")
-            )
+            comb = np.asarray(left.ravel(), dtype="complex128")
+            comb.imag = right.ravel()
         return comb
 
     def _from_combined(self, combined: np.ndarray) -> IntervalArray:
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -64,7 +64,6 @@ class providing the base-class of operations.
     Pandas4Warning,
 )
 from pandas.util._decorators import (
-    Appender,
     Substitution,
     cache_readonly,
     doc,
@@ -738,11 +737,65 @@ def pipe(
         **kwargs: Any,
     ) -> T: ...
 
-    @Substitution(
-        klass="GroupBy",
-        examples=dedent(
-            """\
-        >>> df = pd.DataFrame({'A': 'a b a b'.split(), 'B': [1, 2, 3, 4]})
+    def pipe(
+        self,
+        func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
+        *args: Any,
+        **kwargs: Any,
+    ) -> T:
+        """
+        Apply a ``func`` with arguments to this GroupBy object and return its result.
+
+        Use `.pipe` when you want to improve readability by chaining together
+        functions that expect Series, DataFrames, GroupBy or Resampler objects.
+        Instead of writing
+
+        >>> h = lambda x, arg2, arg3: x + 1 - arg2 * arg3
+        >>> g = lambda x, arg1: x * 5 / arg1
+        >>> f = lambda x: x**4
+        >>> df = pd.DataFrame([["a", 4], ["b", 5]], columns=["group", "value"])
+        >>> h(g(f(df.groupby("group")), arg1=1), arg2=2, arg3=3)  # doctest: +SKIP
+
+        You can write
+
+        >>> (
+        ...     df.groupby("group").pipe(f).pipe(g, arg1=1).pipe(h, arg2=2, arg3=3)
+        ... )  # doctest: +SKIP
+
+        which is much more readable.
+
+        Parameters
+        ----------
+        func : callable or tuple of (callable, str)
+            Function to apply to this GroupBy object or, alternatively,
+            a `(callable, data_keyword)` tuple where `data_keyword` is a
+            string indicating the keyword of `callable` that expects the
+            GroupBy object.
+        *args : iterable, optional
+            Positional arguments passed into `func`.
+        **kwargs : dict, optional
+            A dictionary of keyword arguments passed into `func`.
+
+        Returns
+        -------
+        GroupBy
+            The return type of `func`.
+
+        See Also
+        --------
+        Series.pipe : Apply a function with arguments to a series.
+        DataFrame.pipe : Apply a function with arguments to a dataframe.
+        apply : Apply function to each group instead of to the
+            full GroupBy object.
+
+        Notes
+        -----
+        See more `here
+        <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls>`_
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"A": "a b a b".split(), "B": [1, 2, 3, 4]})
         >>> df
            A  B
         0  a  1
@@ -753,20 +806,12 @@ def pipe(
         To get the difference between each groups maximum and minimum value in one
         pass, you can do
 
-        >>> df.groupby('A').pipe(lambda x: x.max() - x.min())
+        >>> df.groupby("A").pipe(lambda x: x.max() - x.min())
            B
         A
         a  2
-        b  2"""
-        ),
-    )
-    @Appender(_pipe_template)
-    def pipe(
-        self,
-        func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
-        *args: Any,
-        **kwargs: Any,
-    ) -> T:
+        b  2
+        """
         return com.pipe(self, func, *args, **kwargs)
 
     @final
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -1928,6 +1928,10 @@ def _validate_left_right_on(self, left_on, right_on):
                 )
             if not self.right_index and right_on is None:
                 raise MergeError('Must pass "right_on" OR "right_index".')
+            if self.right_index and right_on is not None:
+                raise MergeError(
+                    'Can only pass argument "right_on" OR "right_index" not both.'
+                )
             n = len(left_on)
             if self.right_index:
                 if len(left_on) != self.right.index.nlevels:
diff --git a/pandas/tests/arithmetic/test_interval.py b/pandas/tests/arithmetic/test_interval.py
@@ -107,7 +107,9 @@ def elementwise_comparison(self, op, interval_array, other):
         Helper that performs elementwise comparisons between `array` and `other`
         """
         other = other if is_list_like(other) else [other] * len(interval_array)
-        expected = np.array([op(x, y) for x, y in zip(interval_array, other)])
+        expected = np.array(
+            [op(x, y) for x, y in zip(interval_array, other, strict=True)]
+        )
         if isinstance(other, Series):
             return Series(expected, index=other.index)
         return expected
diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py
@@ -962,11 +962,11 @@ def test_pi_add_sub_int_array_freqn_gt1(self):
         pi = period_range("2016-01-01", periods=10, freq="2D")
         arr = np.arange(10)
         result = pi + arr
-        expected = pd.Index([x + y for x, y in zip(pi, arr)])
+        expected = pd.Index([x + y for x, y in zip(pi, arr, strict=True)])
         tm.assert_index_equal(result, expected)
 
         result = pi - arr
-        expected = pd.Index([x - y for x, y in zip(pi, arr)])
+        expected = pd.Index([x - y for x, y in zip(pi, arr, strict=True)])
         tm.assert_index_equal(result, expected)
 
     def test_pi_sub_isub_offset(self):
diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py
@@ -136,6 +136,20 @@ def test_unique_with_negatives(self):
         )
         tm.assert_index_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "data",
+        [
+            [Interval(-np.inf, 0), Interval(-np.inf, 1)],
+            [Interval(0, np.inf), Interval(1, np.inf)],
+        ],
+    )
+    def test_unique_with_infinty(self, data):
+        # https://github.com/pandas-dev/pandas/issues/63218
+        s = pd.Series(data)
+        tm.assert_interval_array_equal(s.unique(), s.array)
+        assert s.nunique() == 2
+        tm.assert_series_equal(s.drop_duplicates(), s)
+
 
 class TestSetitem:
     def test_set_na(self, left_right_dtypes):
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -2936,6 +2936,41 @@ def test_merge_multiindex_single_level():
     tm.assert_frame_equal(result, expected)
 
 
+def test_merge_multiindex_reset_index_mixed():
+    # GH#62150
+    df = DataFrame(
+        {("column_1", ""): [1, 1], ("column_2", ""): [2, 2]},
+        index=MultiIndex.from_arrays(
+            [[1, 1], ["metadata_1", "metadata_2"]], names=["index", "metadata"]
+        ),
+    )
+
+    df2 = DataFrame(
+        data=[1, 1],
+        index=Index([1, 1], name="index"),
+        columns=MultiIndex.from_product([["new_data"], [""]]),
+    )
+
+    with tm.assert_produces_warning(pd.errors.PerformanceWarning):
+        result = df.reset_index().merge(df2.reset_index(), on="index")
+
+    expected = DataFrame(
+        {
+            ("index", ""): [1, 1, 1, 1],
+            ("metadata", ""): ["metadata_1", "metadata_1", "metadata_2", "metadata_2"],
+            ("column_1", ""): [1, 1, 1, 1],
+            ("column_2", ""): [2, 2, 2, 2],
+            ("new_data", ""): [1, 1, 1, 1],
+        }
+    )
+    expected.columns = MultiIndex.from_tuples(expected.columns)
+
+    tm.assert_frame_equal(result, expected)
+
+    result2 = df.reset_index().merge(df2.reset_index(), on=[("index", "")])
+    tm.assert_frame_equal(result2, expected)
+
+
 @pytest.mark.parametrize("on_index", [True, False])
 @pytest.mark.parametrize("left_unique", [True, False])
 @pytest.mark.parametrize("left_monotonic", [True, False])
@@ -3149,3 +3184,11 @@ def test_merge_pyarrow_datetime_duplicates():
     )
     expected = expected.convert_dtypes(dtype_backend="pyarrow")
     tm.assert_frame_equal(result, expected)
+
+
+def test_merge_right_on_and_right_index():
+    df1 = DataFrame({"col": [1, 2, 3]})
+    df2 = DataFrame({"col": [2, 3, 4]})
+
+    with pytest.raises(pd.errors.MergeError):
+        df1.merge(df2, left_on="col", right_on="col", right_index=True)
diff --git a/pyproject.toml b/pyproject.toml
@@ -463,9 +463,6 @@ exclude = [
 "pandas/_testing/asserters.py" = ["B905"]
 "pandas/_testing/_warnings.py" = ["B905"]
 "pandas/tests/apply/test_series_apply.py" = ["B905"]
-"pandas/tests/arithmetic/test_interval.py" = ["B905"]
-"pandas/tests/arithmetic/test_numeric.py" = ["B905"]
-"pandas/tests/arithmetic/test_period.py" = ["B905"]
 "pandas/tests/arrays/categorical/test_map.py" = ["B905"]
 "pandas/tests/arrays/integer/test_construction.py" = ["B905"]
 "pandas/tests/arrays/integer/test_function.py" = ["B905"]