diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index 73548ad3eb283..842eed3ac6aa3 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -20,7 +20,6 @@ ) from pandas.tests.io.pytables.common import ( _maybe_remove, - ensure_clean_store, ) from pandas.io.pytables import Term @@ -28,7 +27,7 @@ pytestmark = [pytest.mark.single_cpu] -def test_select_columns_in_where(setup_path): +def test_select_columns_in_where(tmp_path): # GH 6169 # recreate multi-indexes when columns is passed # in the `where` argument @@ -44,8 +43,8 @@ def test_select_columns_in_where(setup_path): index=index, columns=["A", "B", "C"], ) - - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h5" + with HDFStore(path) as store: store.put("df", df, format="table") expected = df[["A"]] @@ -55,19 +54,21 @@ def test_select_columns_in_where(setup_path): # With a Series s = Series(np.random.default_rng(2).standard_normal(10), index=index, name="A") - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h6" + with HDFStore(path) as store: store.put("s", s, format="table") tm.assert_series_equal(store.select("s", where="columns=['A']"), s) -def test_select_with_dups(setup_path): +def test_select_with_dups(tmp_path): # single dtypes df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "A", "B", "B"] ) - df.index = date_range("20130101 9:30", periods=10, freq="min", unit="ns") + df.index = date_range("20130101 9:30", periods=10, freq="min") - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h7" + with HDFStore(path) as store: store.append("df", df) result = store.select("df") @@ -96,9 +97,9 @@ def test_select_with_dups(setup_path): ], axis=1, ) - df.index = date_range("20130101 9:30", periods=10, freq="min", unit="ns") - - with ensure_clean_store(setup_path) as store: + df.index = date_range("20130101 9:30", periods=10, freq="min") + path = tmp_path / "file.h8" + with HDFStore(path) as store: store.append("df", df) result = store.select("df") @@ -118,7 +119,8 @@ def test_select_with_dups(setup_path): tm.assert_frame_equal(result, expected, by_blocks=True) # duplicates on both index and columns - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h9" + with HDFStore(path) as store: store.append("df", df) store.append("df", df) @@ -128,8 +130,9 @@ def test_select_with_dups(setup_path): tm.assert_frame_equal(result, expected, by_blocks=True) -def test_select(setup_path): - with ensure_clean_store(setup_path) as store: +def test_select(tmp_path): + path = tmp_path / "file.h10" + with HDFStore(path) as store: # select with columns= df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), @@ -169,12 +172,13 @@ def test_select(setup_path): tm.assert_frame_equal(expected, result) -def test_select_dtypes(setup_path, request): - with ensure_clean_store(setup_path) as store: +def test_select_dtypes(tmp_path, request): + path = tmp_path / "file.h11" + with HDFStore(path) as store: # with a Timestamp data column (GH #2637) df = DataFrame( { - "ts": bdate_range("2012-01-01", periods=300, unit="ns"), + "ts": bdate_range("2012-01-01", periods=300), "A": np.random.default_rng(2).standard_normal(300), } ) @@ -232,7 +236,8 @@ def test_select_dtypes(setup_path, request): expected = df.reindex(index=list(df.index)[0:10], columns=["A"]) tm.assert_frame_equal(expected, result) - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h12" + with HDFStore(path) as store: # floats w/o NaN df = DataFrame({"cols": range(11), "values": range(11)}, dtype="float64") df["cols"] = (df["cols"] + 10).apply(str) @@ -251,7 +256,7 @@ def test_select_dtypes(setup_path, request): tm.assert_frame_equal(expected, result) # https://github.com/PyTables/PyTables/issues/282 - # bug in selection when 0th row has an np.nan and an index + # bug in selection when 0th row has a np.nan and an index # store.append('df3',df,data_columns=True) # result = store.select( # 'df3', where='values>2.0') @@ -270,7 +275,8 @@ def test_select_dtypes(setup_path, request): # test selection with comparison against numpy scalar # GH 11283 - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h13" + with HDFStore(path) as store: df = DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=Index(list("ABCD")), @@ -292,11 +298,12 @@ def test_select_dtypes(setup_path, request): tm.assert_frame_equal(expected, result) -def test_select_with_many_inputs(setup_path): - with ensure_clean_store(setup_path) as store: +def test_select_with_many_inputs(tmp_path): + path = tmp_path / "file.h14" + with HDFStore(path) as store: df = DataFrame( { - "ts": bdate_range("2012-01-01", periods=300, unit="ns"), + "ts": bdate_range("2012-01-01", periods=300), "A": np.random.default_rng(2).standard_normal(300), "B": range(300), "users": ["a"] * 50 @@ -340,9 +347,10 @@ def test_select_with_many_inputs(setup_path): assert len(result) == 100 -def test_select_iterator(tmp_path, setup_path): +def test_select_iterator(tmp_path): # single table - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h15" + with HDFStore(path) as store: df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), @@ -366,7 +374,7 @@ def test_select_iterator(tmp_path, setup_path): result = concat(results) tm.assert_frame_equal(result, expected) - path = tmp_path / setup_path + path = tmp_path / "file.h15" df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), @@ -382,7 +390,7 @@ def test_select_iterator(tmp_path, setup_path): with pytest.raises(TypeError, match=msg): read_hdf(path, "df_non_table", iterator=True) - path = tmp_path / setup_path + path = tmp_path / "file.h15" df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), @@ -400,7 +408,8 @@ def test_select_iterator(tmp_path, setup_path): # multiple - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h16" + with HDFStore(path) as store: df1 = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), @@ -422,17 +431,18 @@ def test_select_iterator(tmp_path, setup_path): tm.assert_frame_equal(expected, result) -def test_select_iterator_complete_8014(setup_path): +def test_select_iterator_complete_8014(tmp_path): # GH 8014 # using iterator and where clause chunksize = 1e4 # no iterator - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h17" + with HDFStore(path) as store: expected = DataFrame( np.random.default_rng(2).standard_normal((100064, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=100064, freq="s", unit="ns"), + index=date_range("2000-01-01", periods=100064, freq="s"), ) _maybe_remove(store, "df") store.append("df", expected) @@ -463,11 +473,12 @@ def test_select_iterator_complete_8014(setup_path): tm.assert_frame_equal(expected, result) # with iterator, full range - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h18" + with HDFStore(path) as store: expected = DataFrame( np.random.default_rng(2).standard_normal((100064, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=100064, freq="s", unit="ns"), + index=date_range("2000-01-01", periods=100064, freq="s"), ) _maybe_remove(store, "df") store.append("df", expected) @@ -499,17 +510,18 @@ def test_select_iterator_complete_8014(setup_path): tm.assert_frame_equal(expected, result) -def test_select_iterator_non_complete_8014(setup_path): +def test_select_iterator_non_complete_8014(tmp_path): # GH 8014 # using iterator and where clause chunksize = 1e4 # with iterator, non complete range - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h19" + with HDFStore(path) as store: expected = DataFrame( np.random.default_rng(2).standard_normal((100064, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=100064, freq="s", unit="ns"), + index=date_range("2000-01-01", periods=100064, freq="s"), ) _maybe_remove(store, "df") store.append("df", expected) @@ -539,11 +551,12 @@ def test_select_iterator_non_complete_8014(setup_path): tm.assert_frame_equal(rexpected, result) # with iterator, empty where - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h20" + with HDFStore(path) as store: expected = DataFrame( np.random.default_rng(2).standard_normal((100064, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=100064, freq="s", unit="ns"), + index=date_range("2000-01-01", periods=100064, freq="s"), ) _maybe_remove(store, "df") store.append("df", expected) @@ -556,18 +569,19 @@ def test_select_iterator_non_complete_8014(setup_path): assert 0 == len(results) -def test_select_iterator_many_empty_frames(setup_path): +def test_select_iterator_many_empty_frames(tmp_path): # GH 8014 # using iterator and where clause can return many empty # frames. chunksize = 10_000 # with iterator, range limited to the first chunk - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h21" + with HDFStore(path) as store: expected = DataFrame( np.random.default_rng(2).standard_normal((100064, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=100064, freq="s", unit="ns"), + index=date_range("2000-01-01", periods=100064, freq="s"), ) _maybe_remove(store, "df") store.append("df", expected) @@ -615,14 +629,15 @@ def test_select_iterator_many_empty_frames(setup_path): assert len(results) == 0 -def test_frame_select(setup_path, request): +def test_frame_select(tmp_path, request): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), ) - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h22" + with HDFStore(path) as store: store.put("frame", df, format="table") date = df.index[len(df) // 2] @@ -664,7 +679,7 @@ def test_frame_select(setup_path, request): # store.select('frame', [crit1, crit2]) -def test_frame_select_complex(setup_path): +def test_frame_select_complex(tmp_path): # select via complex criteria df = DataFrame( @@ -675,7 +690,8 @@ def test_frame_select_complex(setup_path): df["string"] = "foo" df.loc[df.index[0:4], "string"] = "bar" - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h23" + with HDFStore(path) as store: store.put("df", df, format="table", data_columns=["string"]) # empty @@ -781,7 +797,7 @@ def test_frame_select_complex2(tmp_path): tm.assert_frame_equal(result, expected) -def test_invalid_filtering(setup_path): +def test_invalid_filtering(tmp_path): # can't use more than one filter (atm) df = DataFrame( @@ -790,7 +806,8 @@ def test_invalid_filtering(setup_path): index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), ) - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h24" + with HDFStore(path) as store: store.put("df", df, format="table") msg = "unable to collapse Joint Filters" @@ -803,9 +820,10 @@ def test_invalid_filtering(setup_path): store.select("df", "columns=['A','B'] & columns=['C']") -def test_string_select(setup_path): +def test_string_select(tmp_path): # GH 2973 - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h25" + with HDFStore(path) as store: df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), @@ -849,7 +867,7 @@ def test_string_select(setup_path): tm.assert_frame_equal(result, expected) -def test_select_as_multiple(setup_path): +def test_select_as_multiple(tmp_path): df1 = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), @@ -858,7 +876,8 @@ def test_select_as_multiple(setup_path): df2 = df1.copy().rename(columns="{}_2".format) df2["foo"] = "bar" - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h26" + with HDFStore(path) as store: msg = "keys must be a list/tuple" # no tables stored with pytest.raises(TypeError, match=msg): @@ -924,8 +943,9 @@ def test_select_as_multiple(setup_path): ) -def test_nan_selection_bug_4858(setup_path): - with ensure_clean_store(setup_path) as store: +def test_nan_selection_bug_4858(tmp_path): + path = tmp_path / "file.h27" + with HDFStore(path) as store: df = DataFrame({"cols": range(6), "values": range(6)}, dtype="float64") df["cols"] = (df["cols"] + 10).apply(str) df.iloc[0] = np.nan @@ -941,7 +961,7 @@ def test_nan_selection_bug_4858(setup_path): tm.assert_frame_equal(result, expected) -def test_query_with_nested_special_character(setup_path): +def test_query_with_nested_special_character(tmp_path): df = DataFrame( { "a": ["a", "a", "c", "b", "test & test", "c", "b", "e"], @@ -949,17 +969,19 @@ def test_query_with_nested_special_character(setup_path): } ) expected = df[df.a == "test & test"] - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h28" + with HDFStore(path) as store: store.append("test", df, format="table", data_columns=True) result = store.select("test", 'a = "test & test"') tm.assert_frame_equal(expected, result) -def test_query_long_float_literal(setup_path): +def test_query_long_float_literal(tmp_path): # GH 14241 df = DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]}) - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h29" + with HDFStore(path) as store: store.append("test", df, format="table", data_columns=True) cutoff = 1000000000.0006 @@ -977,19 +999,20 @@ def test_query_long_float_literal(setup_path): tm.assert_frame_equal(expected, result) -def test_query_compare_column_type(setup_path): +def test_query_compare_column_type(tmp_path): # GH 15492 df = DataFrame( { "date": ["2014-01-01", "2014-01-02"], - "real_date": date_range("2014-01-01", periods=2, unit="ns"), + "real_date": date_range("2014-01-01", periods=2), "float": [1.1, 1.2], "int": [1, 2], }, columns=["date", "real_date", "float", "int"], ) - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h30" + with HDFStore(path) as store: store.append("test", df, format="table", data_columns=True) ts = Timestamp("2014-01-01") # noqa: F841