Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2936,6 +2936,44 @@ def test_merge_multiindex_single_level():
tm.assert_frame_equal(result, expected)


def test_merge_multiindex_reset_index_mixed():
# GH#62150 - merging reset MultiIndex when one df has single-level index
# Bug existed in 2.3.1, fixed in 3.0.0, this test prevents regression
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you remove all the comments (and the comments below) except GH#62150?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure


# Create first dataframe with MultiIndex on both index and columns
df = DataFrame(data={("column_1", ""): [1, 1], ("column_2", ""): [2, 2]})
df.index = MultiIndex.from_arrays(
[[1, 1], ["metadata_1", "metadata_2"]], names=["index", "metadata"]
)

# Create second dataframe with single index and MultiIndex columns
df2 = DataFrame(data=[1, 1], index=[1, 1]).rename_axis("index", axis=0)
df2.columns = MultiIndex.from_product([["new_data"], [""]])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned in #62150 (comment), it would be good to construct these DataFrames directly

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha, I'll do that


# Test merge with on='index' - should work
# Note: PerformanceWarning is expected due to non-lexsorted MultiIndex
with tm.assert_produces_warning(pd.errors.PerformanceWarning):
result = df.reset_index().merge(df2.reset_index(), on="index")

expected = DataFrame(
{
("index", ""): [1, 1, 1, 1],
("metadata", ""): ["metadata_1", "metadata_1", "metadata_2", "metadata_2"],
("column_1", ""): [1, 1, 1, 1],
("column_2", ""): [2, 2, 2, 2],
("new_data", ""): [1, 1, 1, 1],
}
)
expected.columns = MultiIndex.from_tuples(expected.columns)

tm.assert_frame_equal(result, expected)

# Test merge with on=[('index', '')] - should also work
# Warning may or may not be raised depending on internal state
result2 = df.reset_index().merge(df2.reset_index(), on=[("index", "")])
tm.assert_frame_equal(result2, expected)


@pytest.mark.parametrize("on_index", [True, False])
@pytest.mark.parametrize("left_unique", [True, False])
@pytest.mark.parametrize("left_monotonic", [True, False])
Expand Down
Loading