Skip to content

Commit c3e13ba

Browse files
authored
ENH: use stable sort in value_counts (#63158)
1 parent 1a3230d commit c3e13ba

File tree

5 files changed

+29
-6
lines changed

5 files changed

+29
-6
lines changed

pandas/core/algorithms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,7 @@ def value_counts_internal(
948948
result = Series(counts, index=idx, name=name, copy=False)
949949

950950
if sort:
951-
result = result.sort_values(ascending=ascending)
951+
result = result.sort_values(ascending=ascending, kind="stable")
952952

953953
if normalize:
954954
result = result / counts.sum()

pandas/core/base.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -993,7 +993,12 @@ def value_counts(
993993
If True then the object returned will contain the relative
994994
frequencies of the unique values.
995995
sort : bool, default True
996-
Sort by frequencies when True. Preserve the order of the data when False.
996+
Stable sort by frequencies when True. Preserve the order of the data
997+
when False.
998+
999+
.. versionchanged:: 3.0.0
1000+
1001+
Prior to 3.0.0, the sort was unstable.
9971002
ascending : bool, default False
9981003
Sort in ascending order.
9991004
bins : int, optional

pandas/core/frame.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7761,11 +7761,16 @@ def value_counts(
77617761
normalize : bool, default False
77627762
Return proportions rather than frequencies.
77637763
sort : bool, default True
7764-
Sort by frequencies when True. Preserve the order of the data when False.
7764+
Stable sort by frequencies when True. Preserve the order of the data
7765+
when False.
77657766
77667767
.. versionchanged:: 3.0.0
77677768
77687769
Prior to 3.0.0, ``sort=False`` would sort by the columns values.
7770+
7771+
.. versionchanged:: 3.0.0
7772+
7773+
Prior to 3.0.0, the sort was unstable.
77697774
ascending : bool, default False
77707775
Sort in ascending order.
77717776
dropna : bool, default True
@@ -7875,7 +7880,7 @@ def value_counts(
78757880
counts.name = name
78767881

78777882
if sort:
7878-
counts = counts.sort_values(ascending=ascending)
7883+
counts = counts.sort_values(ascending=ascending, kind="stable")
78797884
if normalize:
78807885
counts /= counts.sum()
78817886

pandas/core/groupby/generic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2884,8 +2884,8 @@ def value_counts(
28842884
normalize : bool, default False
28852885
Return proportions rather than frequencies.
28862886
sort : bool, default True
2887-
Sort by frequencies when True. When False, non-grouping columns will appear
2888-
in the order they occur in within groups.
2887+
Stable sort by frequencies when True. When False, non-grouping
2888+
columns will appear in the order they occur in within groups.
28892889
28902890
.. versionchanged:: 3.0.0
28912891

pandas/tests/test_algos.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1446,6 +1446,19 @@ def test_value_counts_series(self):
14461446
)
14471447
tm.assert_series_equal(result, expected)
14481448

1449+
def test_value_counts_stability(self):
1450+
# GH 63155
1451+
arr = np.random.default_rng(2).integers(0, 32, 64)
1452+
result = algos.value_counts_internal(arr, sort=True)
1453+
1454+
value_counts = Series(arr).value_counts(sort=False)
1455+
expected = value_counts.sort_values(ascending=False, kind="stable")
1456+
tm.assert_series_equal(result, expected)
1457+
1458+
unstable_sorted = value_counts.sort_values(ascending=False, kind="quicksort")
1459+
with pytest.raises(AssertionError):
1460+
tm.assert_series_equal(result, unstable_sorted)
1461+
14491462

14501463
class TestDuplicated:
14511464
def test_duplicated_with_nas(self):

0 commit comments

Comments
 (0)