From ec6e096062bd6469dc334f8e5a7a843ad7773da5 Mon Sep 17 00:00:00 2001 From: zachyattack23 Date: Wed, 3 Dec 2025 13:50:00 -0500 Subject: [PATCH 1/2] TST: Add regression test for MultiIndex merge after reset_index - Add test for GH#62150 - Bug was present in 2.3.1, fixed in 3.0.0 - Test ensures merge works when one df has single-level index and other has multi-level index after reset_index() Closes #62150 --- pandas/tests/reshape/merge/test_merge.py | 38 ++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index f6796694621c3..658957770c4d1 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2936,6 +2936,44 @@ def test_merge_multiindex_single_level(): tm.assert_frame_equal(result, expected) +def test_merge_multiindex_reset_index_mixed(): + # GH#62150 - merging reset MultiIndex when one df has single-level index + # Bug existed in 2.3.1, fixed in 3.0.0, this test prevents regression + + # Create first dataframe with MultiIndex on both index and columns + df = DataFrame(data={("column_1", ""): [1, 1], ("column_2", ""): [2, 2]}) + df.index = MultiIndex.from_arrays( + [[1, 1], ["metadata_1", "metadata_2"]], names=["index", "metadata"] + ) + + # Create second dataframe with single index and MultiIndex columns + df2 = DataFrame(data=[1, 1], index=[1, 1]).rename_axis("index", axis=0) + df2.columns = MultiIndex.from_product([["new_data"], [""]]) + + # Test merge with on='index' - should work + # Note: PerformanceWarning is expected due to non-lexsorted MultiIndex + with tm.assert_produces_warning(pd.errors.PerformanceWarning): + result = df.reset_index().merge(df2.reset_index(), on="index") + + expected = DataFrame( + { + ("index", ""): [1, 1, 1, 1], + ("metadata", ""): ["metadata_1", "metadata_1", "metadata_2", "metadata_2"], + ("column_1", ""): [1, 1, 1, 1], + ("column_2", ""): [2, 2, 2, 2], + ("new_data", ""): [1, 1, 1, 1], + } + ) + expected.columns = MultiIndex.from_tuples(expected.columns) + + tm.assert_frame_equal(result, expected) + + # Test merge with on=[('index', '')] - should also work + # Warning may or may not be raised depending on internal state + result2 = df.reset_index().merge(df2.reset_index(), on=[("index", "")]) + tm.assert_frame_equal(result2, expected) + + @pytest.mark.parametrize("on_index", [True, False]) @pytest.mark.parametrize("left_unique", [True, False]) @pytest.mark.parametrize("left_monotonic", [True, False]) From 42f2406a2bff9744d1e216e109b012ad60337771 Mon Sep 17 00:00:00 2001 From: zachyattack23 Date: Thu, 4 Dec 2025 13:35:15 -0500 Subject: [PATCH 2/2] Address review feedback: simplify test construction and comments --- pandas/tests/reshape/merge/test_merge.py | 25 +++++++++++------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 658957770c4d1..b293b9d73d678 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2937,21 +2937,20 @@ def test_merge_multiindex_single_level(): def test_merge_multiindex_reset_index_mixed(): - # GH#62150 - merging reset MultiIndex when one df has single-level index - # Bug existed in 2.3.1, fixed in 3.0.0, this test prevents regression - - # Create first dataframe with MultiIndex on both index and columns - df = DataFrame(data={("column_1", ""): [1, 1], ("column_2", ""): [2, 2]}) - df.index = MultiIndex.from_arrays( - [[1, 1], ["metadata_1", "metadata_2"]], names=["index", "metadata"] + # GH#62150 + df = DataFrame( + {("column_1", ""): [1, 1], ("column_2", ""): [2, 2]}, + index=MultiIndex.from_arrays( + [[1, 1], ["metadata_1", "metadata_2"]], names=["index", "metadata"] + ), ) - # Create second dataframe with single index and MultiIndex columns - df2 = DataFrame(data=[1, 1], index=[1, 1]).rename_axis("index", axis=0) - df2.columns = MultiIndex.from_product([["new_data"], [""]]) + df2 = DataFrame( + data=[1, 1], + index=Index([1, 1], name="index"), + columns=MultiIndex.from_product([["new_data"], [""]]), + ) - # Test merge with on='index' - should work - # Note: PerformanceWarning is expected due to non-lexsorted MultiIndex with tm.assert_produces_warning(pd.errors.PerformanceWarning): result = df.reset_index().merge(df2.reset_index(), on="index") @@ -2968,8 +2967,6 @@ def test_merge_multiindex_reset_index_mixed(): tm.assert_frame_equal(result, expected) - # Test merge with on=[('index', '')] - should also work - # Warning may or may not be raised depending on internal state result2 = df.reset_index().merge(df2.reset_index(), on=[("index", "")]) tm.assert_frame_equal(result2, expected)