Skip to content

Commit ad91b9c

Browse files
committed
Merge remote-tracking branch 'upstream/main' into sum-string-dtype
2 parents d2a062b + fa1360d commit ad91b9c

File tree

12 files changed

+91
-21
lines changed

12 files changed

+91
-21
lines changed

doc/source/user_guide/copy_on_write.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,9 +249,9 @@ two subsequent indexing operations, e.g.
249249
In [3]: df
250250
Out[3]:
251251
foo bar
252-
0 100 4
252+
0 1 4
253253
1 2 5
254-
2 3 6
254+
2 100 6
255255
256256
The column ``foo`` was updated where the column ``bar`` is greater than 5.
257257
This violated the CoW principles though, because it would have to modify the

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,6 +1321,7 @@ Groupby/resample/rolling
13211321
- Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`)
13221322
- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
13231323
- Bug in :meth:`DataFrame.groupby` methods when operating on NumPy-nullable data failing when the NA mask was not C-contiguous (:issue:`61031`)
1324+
- Bug in :meth:`DataFrame.groupby` when grouping by a Series and that Series was modified after calling :meth:`DataFrame.groupby` but prior to the groupby operation (:issue:`63219`)
13241325

13251326
Reshaping
13261327
^^^^^^^^^

pandas/core/algorithms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,7 @@ def value_counts_internal(
948948
result = Series(counts, index=idx, name=name, copy=False)
949949

950950
if sort:
951-
result = result.sort_values(ascending=ascending)
951+
result = result.sort_values(ascending=ascending, kind="stable")
952952

953953
if normalize:
954954
result = result / counts.sum()

pandas/core/base.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -993,7 +993,12 @@ def value_counts(
993993
If True then the object returned will contain the relative
994994
frequencies of the unique values.
995995
sort : bool, default True
996-
Sort by frequencies when True. Preserve the order of the data when False.
996+
Stable sort by frequencies when True. Preserve the order of the data
997+
when False.
998+
999+
.. versionchanged:: 3.0.0
1000+
1001+
Prior to 3.0.0, the sort was unstable.
9971002
ascending : bool, default False
9981003
Sort in ascending order.
9991004
bins : int, optional

pandas/core/frame.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7761,11 +7761,16 @@ def value_counts(
77617761
normalize : bool, default False
77627762
Return proportions rather than frequencies.
77637763
sort : bool, default True
7764-
Sort by frequencies when True. Preserve the order of the data when False.
7764+
Stable sort by frequencies when True. Preserve the order of the data
7765+
when False.
77657766
77667767
.. versionchanged:: 3.0.0
77677768
77687769
Prior to 3.0.0, ``sort=False`` would sort by the columns values.
7770+
7771+
.. versionchanged:: 3.0.0
7772+
7773+
Prior to 3.0.0, the sort was unstable.
77697774
ascending : bool, default False
77707775
Sort in ascending order.
77717776
dropna : bool, default True
@@ -7875,7 +7880,7 @@ def value_counts(
78757880
counts.name = name
78767881

78777882
if sort:
7878-
counts = counts.sort_values(ascending=ascending)
7883+
counts = counts.sort_values(ascending=ascending, kind="stable")
78797884
if normalize:
78807885
counts /= counts.sum()
78817886

pandas/core/groupby/generic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2884,8 +2884,8 @@ def value_counts(
28842884
normalize : bool, default False
28852885
Return proportions rather than frequencies.
28862886
sort : bool, default True
2887-
Sort by frequencies when True. When False, non-grouping columns will appear
2888-
in the order they occur in within groups.
2887+
Stable sort by frequencies when True. When False, non-grouping
2888+
columns will appear in the order they occur in within groups.
28892889
28902890
.. versionchanged:: 3.0.0
28912891

pandas/core/groupby/grouper.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,8 @@ def __init__(
460460
dropna: bool = True,
461461
uniques: ArrayLike | None = None,
462462
) -> None:
463+
if isinstance(grouper, Series):
464+
grouper = grouper.copy(deep=False)
463465
self.level = level
464466
self._orig_grouper = grouper
465467
grouping_vector = _convert_grouper(index, grouper)

pandas/core/indexes/multi.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2706,14 +2706,18 @@ def swaplevel(self, i=-2, j=-1) -> MultiIndex:
27062706
27072707
Calling this method does not change the ordering of the values.
27082708
2709+
Default is to swap the last two levels of the MultiIndex.
2710+
27092711
Parameters
27102712
----------
27112713
i : int, str, default -2
27122714
First level of index to be swapped. Can pass level name as string.
2713-
Type of parameters can be mixed.
2715+
Type of parameters can be mixed. If i is a negative int, the first
2716+
level is indexed relative to the end of the MultiIndex.
27142717
j : int, str, default -1
27152718
Second level of index to be swapped. Can pass level name as string.
2716-
Type of parameters can be mixed.
2719+
Type of parameters can be mixed. If j is a negative int, the second
2720+
level is indexed relative to the end of the MultiIndex.
27172721
27182722
Returns
27192723
-------
@@ -2729,20 +2733,33 @@ def swaplevel(self, i=-2, j=-1) -> MultiIndex:
27292733
Examples
27302734
--------
27312735
>>> mi = pd.MultiIndex(
2732-
... levels=[["a", "b"], ["bb", "aa"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]]
2736+
... levels=[["a", "b"], ["bb", "aa"], ["aaa", "bbb"]],
2737+
... codes=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]],
27332738
... )
27342739
>>> mi
2735-
MultiIndex([('a', 'bb'),
2736-
('a', 'aa'),
2737-
('b', 'bb'),
2738-
('b', 'aa')],
2740+
MultiIndex([('a', 'bb', 'bbb'),
2741+
('a', 'aa', 'aaa'),
2742+
('b', 'bb', 'bbb'),
2743+
('b', 'aa', 'aaa')],
27392744
)
2740-
>>> mi.swaplevel(0, 1)
2741-
MultiIndex([('bb', 'a'),
2742-
('aa', 'a'),
2743-
('bb', 'b'),
2744-
('aa', 'b')],
2745+
>>> mi.swaplevel()
2746+
MultiIndex([('a', 'bbb', 'bb'),
2747+
('a', 'aaa', 'aa'),
2748+
('b', 'bbb', 'bb'),
2749+
('b', 'aaa', 'aa')],
2750+
)
2751+
>>> mi.swaplevel(0)
2752+
MultiIndex([('bbb', 'bb', 'a'),
2753+
('aaa', 'aa', 'a'),
2754+
('bbb', 'bb', 'b'),
2755+
('aaa', 'aa', 'b')],
27452756
)
2757+
>>> mi.swaplevel(0, 1)
2758+
MultiIndex([('bb', 'a', 'bbb'),
2759+
('aa', 'a', 'aaa'),
2760+
('bb', 'b', 'bbb'),
2761+
('aa', 'b', 'aaa')],
2762+
)
27462763
"""
27472764
new_levels = list(self.levels)
27482765
new_codes = list(self.codes)

pandas/io/feather_format.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def read_feather(
9696
path : str, path object, or file-like object
9797
String, path object (implementing ``os.PathLike[str]``), or file-like
9898
object implementing a binary ``read()`` function. The string could be a URL.
99-
Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
99+
Valid URL schemes include http, ftp, s3, gs and file. For file URLs, a host is
100100
expected. A local file could be: ``file://localhost/path/to/table.feather``.
101101
columns : sequence, default None
102102
If not provided, all columns are read.

pandas/tests/copy_view/test_methods.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,19 @@ def test_groupby_column_index_in_references():
226226
tm.assert_frame_equal(result, expected)
227227

228228

229+
def test_groupby_modify_series():
230+
# https://github.com/pandas-dev/pandas/issues/63219
231+
# Modifying a Series after using it to groupby should not impact
232+
# the groupby operation.
233+
ser = Series([1, 2, 1])
234+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
235+
gb = df.groupby(ser)
236+
ser.iloc[0] = 100
237+
result = gb.sum()
238+
expected = DataFrame({"a": [4, 2], "b": [10, 5]}, index=[1, 2])
239+
tm.assert_frame_equal(result, expected)
240+
241+
229242
def test_rename_columns():
230243
# Case: renaming columns returns a new dataframe
231244
# + afterwards modifying the result

0 commit comments

Comments
 (0)