initial commit
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
33
venv/Lib/site-packages/pandas/tests/resample/conftest.py
Normal file
33
venv/Lib/site-packages/pandas/tests/resample/conftest.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import pytest
|
||||
|
||||
# The various methods we support
|
||||
downsample_methods = [
|
||||
"min",
|
||||
"max",
|
||||
"first",
|
||||
"last",
|
||||
"sum",
|
||||
"mean",
|
||||
"sem",
|
||||
"median",
|
||||
"prod",
|
||||
"var",
|
||||
"std",
|
||||
"ohlc",
|
||||
"quantile",
|
||||
]
|
||||
upsample_methods = ["count", "size"]
|
||||
series_methods = ["nunique"]
|
||||
resample_methods = downsample_methods + upsample_methods + series_methods
|
||||
|
||||
|
||||
@pytest.fixture(params=downsample_methods)
|
||||
def downsample_method(request):
|
||||
"""Fixture for parametrization of Grouper downsample methods."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=resample_methods)
|
||||
def resample_method(request):
|
||||
"""Fixture for parametrization of Grouper resample methods."""
|
||||
return request.param
|
||||
554
venv/Lib/site-packages/pandas/tests/resample/test_base.py
Normal file
554
venv/Lib/site-packages/pandas/tests/resample/test_base.py
Normal file
@@ -0,0 +1,554 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import Pandas4Warning
|
||||
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.groupby.groupby import DataError
|
||||
from pandas.core.groupby.grouper import Grouper
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.indexes.period import period_range
|
||||
from pandas.core.indexes.timedeltas import timedelta_range
|
||||
from pandas.core.resample import _asfreq_compat
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"linear",
|
||||
"time",
|
||||
"index",
|
||||
"values",
|
||||
"nearest",
|
||||
"zero",
|
||||
"slinear",
|
||||
"quadratic",
|
||||
"cubic",
|
||||
"barycentric",
|
||||
"krogh",
|
||||
"from_derivatives",
|
||||
"piecewise_polynomial",
|
||||
"pchip",
|
||||
"akima",
|
||||
],
|
||||
)
|
||||
def all_1d_no_arg_interpolation_methods(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.parametrize("freq", ["2D", "1h"])
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
timedelta_range("1 day", "10 day", freq="D"),
|
||||
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
|
||||
],
|
||||
)
|
||||
def test_asfreq(frame_or_series, index, freq):
|
||||
obj = frame_or_series(range(len(index)), index=index)
|
||||
idx_range = date_range if isinstance(index, DatetimeIndex) else timedelta_range
|
||||
|
||||
result = obj.resample(freq).asfreq()
|
||||
new_index = idx_range(obj.index[0], obj.index[-1], freq=freq)
|
||||
expected = obj.reindex(new_index)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
timedelta_range("1 day", "10 day", freq="D"),
|
||||
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
|
||||
],
|
||||
)
|
||||
def test_asfreq_fill_value(index):
|
||||
# test for fill value during resampling, issue 3715
|
||||
|
||||
ser = Series(range(len(index)), index=index, name="a")
|
||||
idx_range = date_range if isinstance(index, DatetimeIndex) else timedelta_range
|
||||
|
||||
result = ser.resample("1h").asfreq()
|
||||
new_index = idx_range(ser.index[0], ser.index[-1], freq="1h")
|
||||
expected = ser.reindex(new_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Explicit cast to float to avoid implicit cast when setting None
|
||||
frame = ser.astype("float").to_frame("value")
|
||||
frame.iloc[1] = None
|
||||
result = frame.resample("1h").asfreq(fill_value=4.0)
|
||||
new_index = idx_range(frame.index[0], frame.index[-1], freq="1h")
|
||||
expected = frame.reindex(new_index, fill_value=4.0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
timedelta_range("1 day", "3 day", freq="D"),
|
||||
date_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),
|
||||
period_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),
|
||||
],
|
||||
)
|
||||
def test_resample_interpolate(index):
|
||||
# GH#12925
|
||||
df = DataFrame(range(len(index)), index=index)
|
||||
result = df.resample("1min").asfreq().interpolate()
|
||||
expected = df.resample("1min").interpolate()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_interpolate_inplace_deprecated():
|
||||
# GH#58690
|
||||
dti = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
|
||||
|
||||
df = DataFrame(range(len(dti)), index=dti)
|
||||
rs = df.resample("1min")
|
||||
msg = "The 'inplace' keyword in DatetimeIndexResampler.interpolate"
|
||||
with tm.assert_produces_warning(Pandas4Warning, match=msg):
|
||||
rs.interpolate(inplace=False)
|
||||
|
||||
msg2 = "Cannot interpolate inplace on a resampled object"
|
||||
with pytest.raises(ValueError, match=msg2):
|
||||
with tm.assert_produces_warning(Pandas4Warning, match=msg):
|
||||
rs.interpolate(inplace=True)
|
||||
|
||||
|
||||
def test_resample_interpolate_regular_sampling_off_grid(
|
||||
all_1d_no_arg_interpolation_methods,
|
||||
):
|
||||
pytest.importorskip("scipy")
|
||||
# GH#21351
|
||||
index = date_range("2000-01-01 00:01:00", periods=5, freq="2h")
|
||||
ser = Series(np.arange(5.0), index)
|
||||
|
||||
method = all_1d_no_arg_interpolation_methods
|
||||
result = ser.resample("1h").interpolate(method)
|
||||
|
||||
if method == "linear":
|
||||
values = np.repeat(np.arange(0.0, 4.0), 2) + np.tile([1 / 3, 2 / 3], 4)
|
||||
elif method == "nearest":
|
||||
values = np.repeat(np.arange(0.0, 5.0), 2)[1:-1]
|
||||
elif method == "zero":
|
||||
values = np.repeat(np.arange(0.0, 4.0), 2)
|
||||
else:
|
||||
values = 0.491667 + np.arange(0.0, 4.0, 0.5)
|
||||
values = np.insert(values, 0, np.nan)
|
||||
index = date_range("2000-01-01 00:00:00", periods=9, freq="1h")
|
||||
expected = Series(values, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_interpolate_irregular_sampling(all_1d_no_arg_interpolation_methods):
|
||||
pytest.importorskip("scipy")
|
||||
# GH#21351
|
||||
ser = Series(
|
||||
np.linspace(0.0, 1.0, 5),
|
||||
index=DatetimeIndex(
|
||||
[
|
||||
"2000-01-01 00:00:03",
|
||||
"2000-01-01 00:00:22",
|
||||
"2000-01-01 00:00:24",
|
||||
"2000-01-01 00:00:31",
|
||||
"2000-01-01 00:00:39",
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
# Resample to 5 second sampling and interpolate with the given method
|
||||
ser_resampled = ser.resample("5s").interpolate(all_1d_no_arg_interpolation_methods)
|
||||
|
||||
# Check that none of the resampled values are NaN, except the first one
|
||||
# which lies 3 seconds before the first actual data point
|
||||
assert np.isnan(ser_resampled.iloc[0])
|
||||
assert not ser_resampled.iloc[1:].isna().any()
|
||||
|
||||
|
||||
def test_raises_on_non_datetimelike_index():
|
||||
# this is a non datetimelike index
|
||||
xp = DataFrame()
|
||||
msg = (
|
||||
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
|
||||
"but got an instance of 'RangeIndex'"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
xp.resample("YE")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
PeriodIndex([], freq="D", name="a"),
|
||||
DatetimeIndex([], name="a"),
|
||||
TimedeltaIndex([], name="a"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_resample_empty_series(freq, index, resample_method):
|
||||
# GH12771 & GH12868
|
||||
|
||||
ser = Series(index=index, dtype=float)
|
||||
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
rs = ser.resample(freq)
|
||||
result = getattr(rs, resample_method)()
|
||||
|
||||
if resample_method == "ohlc":
|
||||
expected = DataFrame(
|
||||
[], index=ser.index[:0], columns=["open", "high", "low", "close"]
|
||||
)
|
||||
expected.index = _asfreq_compat(ser.index, freq)
|
||||
tm.assert_frame_equal(result, expected, check_dtype=False)
|
||||
else:
|
||||
expected = ser.copy()
|
||||
expected.index = _asfreq_compat(ser.index, freq)
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
tm.assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
|
||||
|
||||
@pytest.mark.parametrize("min_count", [0, 1])
|
||||
def test_resample_empty_sum_string(string_dtype_no_object, min_count):
|
||||
# https://github.com/pandas-dev/pandas/issues/60229
|
||||
dtype = string_dtype_no_object
|
||||
ser = Series(
|
||||
pd.NA,
|
||||
index=DatetimeIndex(
|
||||
[
|
||||
"2000-01-01 00:00:00",
|
||||
"2000-01-01 00:00:10",
|
||||
"2000-01-01 00:00:20",
|
||||
"2000-01-01 00:00:30",
|
||||
]
|
||||
),
|
||||
dtype=dtype,
|
||||
)
|
||||
rs = ser.resample("20s")
|
||||
result = rs.sum(min_count=min_count)
|
||||
|
||||
value = "" if min_count == 0 else pd.NA
|
||||
index = date_range(start="2000-01-01", freq="20s", periods=2, unit="us")
|
||||
expected = Series(value, index=index, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq",
|
||||
[
|
||||
pytest.param("ME", marks=pytest.mark.xfail(reason="Don't know why this fails")),
|
||||
"D",
|
||||
"h",
|
||||
],
|
||||
)
|
||||
def test_resample_nat_index_series(freq, resample_method):
|
||||
# GH39227
|
||||
|
||||
ser = Series(range(5), index=PeriodIndex([NaT] * 5, freq=freq))
|
||||
|
||||
rs = ser.resample(freq)
|
||||
result = getattr(rs, resample_method)()
|
||||
|
||||
if resample_method == "ohlc":
|
||||
expected = DataFrame(
|
||||
[], index=ser.index[:0], columns=["open", "high", "low", "close"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected, check_dtype=False)
|
||||
else:
|
||||
expected = ser[:0].copy()
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
tm.assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
PeriodIndex([], freq="D", name="a"),
|
||||
DatetimeIndex([], name="a"),
|
||||
TimedeltaIndex([], name="a"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
@pytest.mark.parametrize("resample_method", ["count", "size"])
|
||||
def test_resample_count_empty_series(freq, index, resample_method):
|
||||
# GH28427
|
||||
ser = Series(index=index)
|
||||
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
rs = ser.resample(freq)
|
||||
|
||||
result = getattr(rs, resample_method)()
|
||||
|
||||
index = _asfreq_compat(ser.index, freq)
|
||||
|
||||
expected = Series([], dtype="int64", index=index, name=ser.name)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index", [DatetimeIndex([]), TimedeltaIndex([]), PeriodIndex([], freq="D")]
|
||||
)
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_resample_empty_dataframe(index, freq, resample_method):
|
||||
# GH13212
|
||||
df = DataFrame(index=index)
|
||||
# count retains dimensions too
|
||||
if freq == "ME" and isinstance(df.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.resample(freq, group_keys=False)
|
||||
return
|
||||
elif freq == "ME" and isinstance(df.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
rs = df.resample(freq, group_keys=False)
|
||||
result = getattr(rs, resample_method)()
|
||||
if resample_method == "ohlc":
|
||||
# TODO: no tests with len(df.columns) > 0
|
||||
mi = MultiIndex.from_product([df.columns, ["open", "high", "low", "close"]])
|
||||
expected = DataFrame([], index=df.index[:0], columns=mi, dtype=np.float64)
|
||||
expected.index = _asfreq_compat(df.index, freq)
|
||||
|
||||
elif resample_method != "size":
|
||||
expected = df.copy()
|
||||
else:
|
||||
# GH14962
|
||||
expected = Series([], dtype=np.int64)
|
||||
|
||||
expected.index = _asfreq_compat(df.index, freq)
|
||||
|
||||
tm.assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
# test size for GH13212 (currently stays as df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index", [DatetimeIndex([]), TimedeltaIndex([]), PeriodIndex([], freq="D")]
|
||||
)
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_resample_count_empty_dataframe(freq, index):
|
||||
# GH28427
|
||||
empty_frame_dti = DataFrame(index=index, columns=Index(["a"], dtype=object))
|
||||
|
||||
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
empty_frame_dti.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
result = empty_frame_dti.resample(freq).count()
|
||||
|
||||
index = _asfreq_compat(empty_frame_dti.index, freq)
|
||||
|
||||
expected = DataFrame(dtype="int64", index=index, columns=Index(["a"], dtype=object))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index", [DatetimeIndex([]), TimedeltaIndex([]), PeriodIndex([], freq="D")]
|
||||
)
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_resample_size_empty_dataframe(freq, index):
|
||||
# GH28427
|
||||
|
||||
empty_frame_dti = DataFrame(index=index, columns=Index(["a"], dtype=object))
|
||||
|
||||
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
empty_frame_dti.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
result = empty_frame_dti.resample(freq).size()
|
||||
|
||||
index = _asfreq_compat(empty_frame_dti.index, freq)
|
||||
|
||||
expected = Series([], dtype="int64", index=index)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [DatetimeIndex([]), TimedeltaIndex([])])
|
||||
@pytest.mark.parametrize("freq", ["D", "h"])
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["ffill", "bfill", "nearest", "asfreq", "interpolate", "mean"]
|
||||
)
|
||||
def test_resample_apply_empty_dataframe(index, freq, method):
|
||||
# GH#55572
|
||||
empty_frame_dti = DataFrame(index=index)
|
||||
|
||||
rs = empty_frame_dti.resample(freq)
|
||||
result = rs.apply(getattr(rs, method))
|
||||
|
||||
expected_index = _asfreq_compat(empty_frame_dti.index, freq)
|
||||
expected = DataFrame([], index=expected_index)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
PeriodIndex([], freq="M", name="a"),
|
||||
DatetimeIndex([], name="a"),
|
||||
TimedeltaIndex([], name="a"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"])
|
||||
def test_resample_empty_dtypes(index, dtype, resample_method):
|
||||
# Empty series were sometimes causing a segfault (for the functions
|
||||
# with Cython bounds-checking disabled) or an IndexError. We just run
|
||||
# them to ensure they no longer do. (GH #10228)
|
||||
empty_series_dti = Series([], index, dtype)
|
||||
rs = empty_series_dti.resample("D", group_keys=False)
|
||||
try:
|
||||
getattr(rs, resample_method)()
|
||||
except DataError:
|
||||
# Ignore these since some combinations are invalid
|
||||
# (ex: doing mean with dtype of np.object_)
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
PeriodIndex([], freq="D", name="a"),
|
||||
DatetimeIndex([], name="a"),
|
||||
TimedeltaIndex([], name="a"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_apply_to_empty_series(index, freq):
|
||||
# GH 14313
|
||||
ser = Series(index=index)
|
||||
|
||||
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
result = ser.resample(freq, group_keys=False).apply(lambda x: 1)
|
||||
expected = ser.resample(freq).apply("sum")
|
||||
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
timedelta_range("1 day", "10 day", freq="D"),
|
||||
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
|
||||
period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
|
||||
],
|
||||
)
|
||||
def test_resampler_is_iterable(index):
|
||||
# GH 15314
|
||||
series = Series(range(len(index)), index=index)
|
||||
freq = "h"
|
||||
tg = Grouper(freq=freq, convention="start")
|
||||
grouped = series.groupby(tg)
|
||||
resampled = series.resample(freq)
|
||||
for (rk, rv), (gk, gv) in zip(resampled, grouped):
|
||||
assert rk == gk
|
||||
tm.assert_series_equal(rv, gv)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
timedelta_range("1 day", "10 day", freq="D"),
|
||||
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
|
||||
period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
|
||||
],
|
||||
)
|
||||
def test_resample_quantile(index):
|
||||
# GH 15023
|
||||
ser = Series(range(len(index)), index=index)
|
||||
q = 0.75
|
||||
freq = "h"
|
||||
|
||||
result = ser.resample(freq).quantile(q)
|
||||
expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("how", ["first", "last"])
|
||||
def test_first_last_skipna(any_real_nullable_dtype, skipna, how):
|
||||
# GH#57019
|
||||
if is_extension_array_dtype(any_real_nullable_dtype):
|
||||
na_value = Series(dtype=any_real_nullable_dtype).dtype.na_value
|
||||
else:
|
||||
na_value = np.nan
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [2, 1, 1, 2],
|
||||
"b": [na_value, 3.0, na_value, 4.0],
|
||||
"c": [na_value, 3.0, na_value, 4.0],
|
||||
},
|
||||
index=date_range("2020-01-01", periods=4, freq="D", unit="ns"),
|
||||
dtype=any_real_nullable_dtype,
|
||||
)
|
||||
rs = df.resample("ME")
|
||||
method = getattr(rs, how)
|
||||
result = method(skipna=skipna)
|
||||
|
||||
ts = pd.to_datetime("2020-01-31").as_unit("ns")
|
||||
gb = df.groupby(df.shape[0] * [ts])
|
||||
expected = getattr(gb, how)(skipna=skipna)
|
||||
expected.index.freq = "ME"
|
||||
tm.assert_frame_equal(result, expected)
|
||||
2190
venv/Lib/site-packages/pandas/tests/resample/test_datetime_index.py
Normal file
2190
venv/Lib/site-packages/pandas/tests/resample/test_datetime_index.py
Normal file
File diff suppressed because it is too large
Load Diff
1032
venv/Lib/site-packages/pandas/tests/resample/test_period_index.py
Normal file
1032
venv/Lib/site-packages/pandas/tests/resample/test_period_index.py
Normal file
File diff suppressed because it is too large
Load Diff
1018
venv/Lib/site-packages/pandas/tests/resample/test_resample_api.py
Normal file
1018
venv/Lib/site-packages/pandas/tests/resample/test_resample_api.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,671 @@
|
||||
from textwrap import dedent
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import is_platform_windows
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_frame():
|
||||
return DataFrame(
|
||||
{"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
|
||||
index=date_range("1/1/2000", freq="s", periods=40, unit="ns"),
|
||||
)
|
||||
|
||||
|
||||
def test_tab_complete_ipython6_warning(ip):
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
|
||||
code = dedent(
|
||||
"""\
|
||||
import numpy as np
|
||||
from pandas import Series, date_range
|
||||
data = np.arange(10, dtype=np.float64)
|
||||
index = date_range("2020-01-01", periods=len(data))
|
||||
s = Series(data, index=index)
|
||||
rs = s.resample("D")
|
||||
"""
|
||||
)
|
||||
ip.run_cell(code)
|
||||
|
||||
# GH 31324 newer jedi version raises Deprecation warning;
|
||||
# appears resolved 2021-02-02
|
||||
with tm.assert_produces_warning(None, raise_on_extra_warnings=False):
|
||||
with provisionalcompleter("ignore"):
|
||||
list(ip.Completer.completions("rs.", 1))
|
||||
|
||||
|
||||
def test_deferred_with_groupby():
|
||||
# GH 12486
|
||||
# support deferred resample ops with groupby
|
||||
data = [
|
||||
["2010-01-01", "A", 2],
|
||||
["2010-01-02", "A", 3],
|
||||
["2010-01-05", "A", 8],
|
||||
["2010-01-10", "A", 7],
|
||||
["2010-01-13", "A", 3],
|
||||
["2010-01-01", "B", 5],
|
||||
["2010-01-03", "B", 2],
|
||||
["2010-01-04", "B", 1],
|
||||
["2010-01-11", "B", 7],
|
||||
["2010-01-14", "B", 3],
|
||||
]
|
||||
|
||||
df = DataFrame(data, columns=["date", "id", "score"])
|
||||
df.date = pd.to_datetime(df.date)
|
||||
|
||||
def f_0(x):
|
||||
return x.set_index("date").resample("D").asfreq()
|
||||
|
||||
expected = df.groupby("id").apply(f_0)
|
||||
result = df.set_index("date").groupby("id").resample("D").asfreq()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"date": date_range(start="2016-01-01", periods=4, freq="W"),
|
||||
"group": [1, 1, 2, 2],
|
||||
"val": [5, 6, 7, 8],
|
||||
}
|
||||
).set_index("date")
|
||||
|
||||
def f_1(x):
|
||||
return x.resample("1D").ffill()
|
||||
|
||||
expected = df.groupby("group").apply(f_1)
|
||||
result = df.groupby("group").resample("1D").ffill()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem(test_frame):
|
||||
g = test_frame.groupby("A")
|
||||
|
||||
expected = g.B.apply(lambda x: x.resample("2s").mean())
|
||||
|
||||
result = g.resample("2s").B.mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = g.B.resample("2s").mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = g.resample("2s").mean().B
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_multiple():
|
||||
# GH 13174
|
||||
# multiple calls after selection causing an issue with aliasing
|
||||
data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}]
|
||||
df = DataFrame(data, index=date_range("2016-01-01", periods=2))
|
||||
r = df.groupby("id").resample("1D")
|
||||
result = r["buyer"].count()
|
||||
|
||||
exp_mi = pd.MultiIndex.from_arrays([[1, 2], df.index], names=("id", None))
|
||||
expected = Series(
|
||||
[1, 1],
|
||||
index=exp_mi,
|
||||
name="buyer",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = r["buyer"].count()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_api_with_getitem():
|
||||
# GH 17813
|
||||
df = DataFrame(
|
||||
{"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1}
|
||||
)
|
||||
exp = df.set_index("date").groupby("id").resample("2D")["data"].sum()
|
||||
result = df.groupby("id").resample("2D", on="date")["data"].sum()
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
|
||||
def test_groupby_with_origin():
|
||||
# GH 31809
|
||||
|
||||
freq = "1399min" # prime number that is smaller than 24h
|
||||
start, end = "1/1/2000 00:00:00", "1/31/2000 00:00"
|
||||
middle = "1/15/2000 00:00:00"
|
||||
|
||||
rng = date_range(start, end, freq="1231min") # prime number
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
ts2 = ts[middle:end]
|
||||
|
||||
# proves that grouper without a fixed origin does not work
|
||||
# when dealing with unusual frequencies
|
||||
simple_grouper = pd.Grouper(freq=freq)
|
||||
count_ts = ts.groupby(simple_grouper).agg("count")
|
||||
count_ts = count_ts[middle:end]
|
||||
count_ts2 = ts2.groupby(simple_grouper).agg("count")
|
||||
with pytest.raises(AssertionError, match="Index are different"):
|
||||
tm.assert_index_equal(count_ts.index, count_ts2.index)
|
||||
|
||||
# test origin on 1970-01-01 00:00:00
|
||||
origin = Timestamp(0)
|
||||
adjusted_grouper = pd.Grouper(freq=freq, origin=origin)
|
||||
adjusted_count_ts = ts.groupby(adjusted_grouper).agg("count")
|
||||
adjusted_count_ts = adjusted_count_ts[middle:end]
|
||||
adjusted_count_ts2 = ts2.groupby(adjusted_grouper).agg("count")
|
||||
tm.assert_series_equal(adjusted_count_ts, adjusted_count_ts2)
|
||||
|
||||
# test origin on 2049-10-18 20:00:00
|
||||
origin_future = Timestamp(0) + pd.Timedelta("1399min") * 30_000
|
||||
adjusted_grouper2 = pd.Grouper(freq=freq, origin=origin_future)
|
||||
adjusted2_count_ts = ts.groupby(adjusted_grouper2).agg("count")
|
||||
adjusted2_count_ts = adjusted2_count_ts[middle:end]
|
||||
adjusted2_count_ts2 = ts2.groupby(adjusted_grouper2).agg("count")
|
||||
tm.assert_series_equal(adjusted2_count_ts, adjusted2_count_ts2)
|
||||
|
||||
# both grouper use an adjusted timestamp that is a multiple of 1399 min
|
||||
# they should be equals even if the adjusted_timestamp is in the future
|
||||
tm.assert_series_equal(adjusted_count_ts, adjusted2_count_ts2)
|
||||
|
||||
|
||||
def test_nearest():
|
||||
# GH 17496
|
||||
# Resample nearest
|
||||
index = date_range("1/1/2000", periods=3, freq="min", unit="ns")
|
||||
result = Series(range(3), index=index).resample("20s").nearest()
|
||||
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 1, 2, 2],
|
||||
index=pd.DatetimeIndex(
|
||||
[
|
||||
"2000-01-01 00:00:00",
|
||||
"2000-01-01 00:00:20",
|
||||
"2000-01-01 00:00:40",
|
||||
"2000-01-01 00:01:00",
|
||||
"2000-01-01 00:01:20",
|
||||
"2000-01-01 00:01:40",
|
||||
"2000-01-01 00:02:00",
|
||||
],
|
||||
dtype="datetime64[ns]",
|
||||
freq="20s",
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
"first",
|
||||
"last",
|
||||
"median",
|
||||
"sem",
|
||||
"sum",
|
||||
"mean",
|
||||
"min",
|
||||
"max",
|
||||
"size",
|
||||
"count",
|
||||
"nearest",
|
||||
"bfill",
|
||||
"ffill",
|
||||
"asfreq",
|
||||
"ohlc",
|
||||
],
|
||||
)
|
||||
def test_methods(f, test_frame):
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_methods_nunique(test_frame):
|
||||
# series only
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
result = r.B.nunique()
|
||||
expected = g.B.apply(lambda x: x.resample("2s").nunique())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", ["std", "var"])
|
||||
def test_methods_std_var(f, test_frame):
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
result = getattr(r, f)(ddof=1)
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply(test_frame):
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
|
||||
# reduction
|
||||
expected = g.resample("2s").sum()
|
||||
|
||||
def f_0(x):
|
||||
return x.resample("2s").sum()
|
||||
|
||||
result = r.apply(f_0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def f_1(x):
|
||||
return x.resample("2s").apply(lambda y: y.sum())
|
||||
|
||||
result = g.apply(f_1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_with_mutated_index():
|
||||
# GH 15169
|
||||
index = date_range("1-1-2015", "12-31-15", freq="D")
|
||||
df = DataFrame(
|
||||
data={"col1": np.random.default_rng(2).random(len(index))}, index=index
|
||||
)
|
||||
|
||||
def f(x):
|
||||
s = Series([1, 2], index=["a", "b"])
|
||||
return s
|
||||
|
||||
expected = df.groupby(pd.Grouper(freq="ME")).apply(f)
|
||||
|
||||
result = df.resample("ME").apply(f)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A case for series
|
||||
expected = df["col1"].groupby(pd.Grouper(freq="ME"), group_keys=False).apply(f)
|
||||
result = df["col1"].resample("ME").apply(f)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_columns_multilevel():
|
||||
# GH 16231
|
||||
cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")])
|
||||
ind = date_range(start="2017-01-01", freq="15Min", periods=8)
|
||||
df = DataFrame(
|
||||
np.array([0] * 16, dtype=np.int64).reshape(8, 2), index=ind, columns=cols
|
||||
)
|
||||
agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns}
|
||||
result = df.resample("h").apply(lambda x: agg_dict[x.name](x))
|
||||
expected = DataFrame(
|
||||
2 * [[0, 0.0]],
|
||||
index=date_range(start="2017-01-01", freq="1h", periods=2),
|
||||
columns=pd.MultiIndex.from_tuples(
|
||||
[("A", "a", "", "one"), ("B", "b", "i", "two")]
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_non_naive_index():
|
||||
def weighted_quantile(series, weights, q):
|
||||
series = series.sort_values()
|
||||
cumsum = weights.reindex(series.index).fillna(0).cumsum()
|
||||
cutoff = cumsum.iloc[-1] * q
|
||||
return series[cumsum >= cutoff].iloc[0]
|
||||
|
||||
times = date_range("2017-6-23 18:00", periods=8, freq="15min", tz="UTC")
|
||||
data = Series([1.0, 1, 1, 1, 1, 2, 2, 0], index=times)
|
||||
weights = Series([160.0, 91, 65, 43, 24, 10, 1, 0], index=times)
|
||||
|
||||
result = data.resample("D").apply(weighted_quantile, weights=weights, q=0.5)
|
||||
ind = date_range(
|
||||
"2017-06-23 00:00:00+00:00", "2017-06-23 00:00:00+00:00", freq="D", tz="UTC"
|
||||
)
|
||||
expected = Series([1.0], index=ind)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_with_label(unit):
|
||||
# GH 13235
|
||||
index = date_range("2000-01-01", freq="2D", periods=5, unit=unit)
|
||||
df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]})
|
||||
result = df.groupby("col0").resample("1W", label="left").sum()
|
||||
|
||||
mi = [
|
||||
np.array([0, 0, 1, 2], dtype=np.int64),
|
||||
np.array(
|
||||
["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"],
|
||||
dtype=f"M8[{unit}]",
|
||||
),
|
||||
]
|
||||
mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None])
|
||||
expected = DataFrame(data={"col1": [1, 1, 2, 1]}, index=mindex)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_consistency_with_window(test_frame):
|
||||
# consistent return values with window
|
||||
df = test_frame
|
||||
expected = Index([1, 2, 3], name="A")
|
||||
result = df.groupby("A").resample("2s").mean()
|
||||
assert result.index.nlevels == 2
|
||||
tm.assert_index_equal(result.index.levels[0], expected)
|
||||
|
||||
result = df.groupby("A").rolling(20).mean()
|
||||
assert result.index.nlevels == 2
|
||||
tm.assert_index_equal(result.index.levels[0], expected)
|
||||
|
||||
|
||||
def test_median_duplicate_columns():
|
||||
# GH 14233
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((20, 3)),
|
||||
columns=list("aaa"),
|
||||
index=date_range("2012-01-01", periods=20, freq="s"),
|
||||
)
|
||||
result = df.resample("5s").median()
|
||||
df.columns = ["a", "b", "c"]
|
||||
expected = df.resample("5s").median()
|
||||
expected.columns = result.columns
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_to_one_column_of_df():
|
||||
# GH: 36951
|
||||
df = DataFrame(
|
||||
{"col": range(10), "col1": range(10, 20)},
|
||||
index=date_range("2012-01-01", periods=10, freq="20min"),
|
||||
)
|
||||
|
||||
# access "col" via getattr -> make sure we handle AttributeError
|
||||
result = df.resample("h").apply(lambda group: group.col.sum())
|
||||
expected = Series(
|
||||
[3, 12, 21, 9], index=date_range("2012-01-01", periods=4, freq="h")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# access "col" via _getitem__ -> make sure we handle KeyErrpr
|
||||
result = df.resample("h").apply(lambda group: group["col"].sum())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_agg():
|
||||
# GH: 33548
|
||||
df = DataFrame(
|
||||
{
|
||||
"cat": [
|
||||
"cat_1",
|
||||
"cat_1",
|
||||
"cat_2",
|
||||
"cat_1",
|
||||
"cat_2",
|
||||
"cat_1",
|
||||
"cat_2",
|
||||
"cat_1",
|
||||
],
|
||||
"num": [5, 20, 22, 3, 4, 30, 10, 50],
|
||||
"date": [
|
||||
"2019-2-1",
|
||||
"2018-02-03",
|
||||
"2020-3-11",
|
||||
"2019-2-2",
|
||||
"2019-2-2",
|
||||
"2018-12-4",
|
||||
"2020-3-11",
|
||||
"2020-12-12",
|
||||
],
|
||||
}
|
||||
)
|
||||
df["date"] = pd.to_datetime(df["date"])
|
||||
|
||||
resampled = df.groupby("cat").resample("YE", on="date")
|
||||
expected = resampled[["num"]].sum()
|
||||
result = resampled.agg({"num": "sum"})
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_agg_listlike():
|
||||
# GH 42905
|
||||
ts = Timestamp("2021-02-28 00:00:00")
|
||||
df = DataFrame({"class": ["beta"], "value": [69]}, index=Index([ts], name="date"))
|
||||
resampled = df.groupby("class").resample("ME")["value"]
|
||||
result = resampled.agg(["sum", "size"])
|
||||
expected = DataFrame(
|
||||
[[69, 1]],
|
||||
index=pd.MultiIndex.from_tuples([("beta", ts)], names=["class", "date"]),
|
||||
columns=["sum", "size"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
|
||||
def test_empty(keys):
|
||||
# GH 26411
|
||||
df = DataFrame([], columns=["a", "b"], index=TimedeltaIndex([]))
|
||||
result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
|
||||
expected_columns = ["b"] if keys == ["a"] else []
|
||||
expected = (
|
||||
DataFrame(columns=["a", "b"])
|
||||
.set_index(keys, drop=False)
|
||||
.set_index(TimedeltaIndex([]), append=True)[expected_columns]
|
||||
)
|
||||
if len(keys) == 1:
|
||||
expected.index.name = keys[0]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("consolidate", [True, False])
|
||||
def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
|
||||
# https://github.com/pandas-dev/pandas/issues/39329
|
||||
|
||||
dates = date_range("2020-01-01", periods=15, freq="D", unit="ns")
|
||||
df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"})
|
||||
df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)})
|
||||
df = pd.concat([df1, df2], ignore_index=True)
|
||||
if consolidate:
|
||||
df = df._consolidate()
|
||||
|
||||
result = df.groupby(["key"]).resample("W", on="date").min()
|
||||
idx = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
["A"] * 3 + ["B"] * 3,
|
||||
pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2).as_unit(
|
||||
"ns"
|
||||
),
|
||||
],
|
||||
names=["key", "date"],
|
||||
)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"col1": [0, 5, 12] * 2,
|
||||
"col_object": ["val"] * 3 + [np.nan] * 3,
|
||||
},
|
||||
index=idx,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("min_count", [0, 1])
|
||||
def test_groupby_resample_empty_sum_string(
|
||||
string_dtype_no_object, test_frame, min_count
|
||||
):
|
||||
# https://github.com/pandas-dev/pandas/issues/60229
|
||||
dtype = string_dtype_no_object
|
||||
test_frame = test_frame.assign(B=pd.array([pd.NA] * len(test_frame), dtype=dtype))
|
||||
gbrs = test_frame.groupby("A").resample("40s")
|
||||
result = gbrs.sum(min_count=min_count)
|
||||
|
||||
index = pd.MultiIndex(
|
||||
levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", unit="ns").as_unit("ns")]],
|
||||
codes=[[0, 1, 2], [0, 0, 0]],
|
||||
names=["A", None],
|
||||
)
|
||||
value = "" if min_count == 0 else pd.NA
|
||||
expected = DataFrame({"B": value}, index=index, dtype=dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_with_list_of_keys():
|
||||
# GH 47362
|
||||
df = DataFrame(
|
||||
data={
|
||||
"date": date_range(start="2016-01-01", periods=8),
|
||||
"group": [0, 0, 0, 0, 1, 1, 1, 1],
|
||||
"val": [1, 7, 5, 2, 3, 10, 5, 1],
|
||||
}
|
||||
)
|
||||
result = df.groupby("group").resample("2D", on="date")[["val"]].mean()
|
||||
|
||||
mi_exp = pd.MultiIndex.from_arrays(
|
||||
[[0, 0, 1, 1], df["date"]._values[::2]], names=["group", "date"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
data={
|
||||
"val": [4.0, 3.5, 6.5, 3.0],
|
||||
},
|
||||
index=mi_exp,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
|
||||
def test_resample_no_index(keys):
|
||||
# GH 47705
|
||||
df = DataFrame([], columns=["a", "b", "date"])
|
||||
df["date"] = pd.to_datetime(df["date"])
|
||||
df = df.set_index("date")
|
||||
result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
|
||||
expected_columns = ["b"] if keys == ["a"] else []
|
||||
expected = DataFrame(columns=["a", "b", "date"]).set_index(keys, drop=False)
|
||||
expected["date"] = pd.to_datetime(expected["date"])
|
||||
expected = expected.set_index("date", append=True, drop=True)[expected_columns]
|
||||
if len(keys) == 1:
|
||||
expected.index.name = keys[0]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_no_columns():
|
||||
# GH#52484
|
||||
df = DataFrame(
|
||||
index=Index(
|
||||
pd.to_datetime(
|
||||
["2018-01-01 00:00:00", "2018-01-01 12:00:00", "2018-01-02 00:00:00"]
|
||||
),
|
||||
name="date",
|
||||
)
|
||||
)
|
||||
result = df.groupby([0, 0, 1]).resample(rule=pd.to_timedelta("06:00:00")).mean()
|
||||
index = pd.to_datetime(
|
||||
[
|
||||
"2018-01-01 00:00:00",
|
||||
"2018-01-01 06:00:00",
|
||||
"2018-01-01 12:00:00",
|
||||
"2018-01-02 00:00:00",
|
||||
]
|
||||
)
|
||||
expected = DataFrame(
|
||||
index=pd.MultiIndex(
|
||||
levels=[np.array([0, 1], dtype=np.intp), index],
|
||||
codes=[[0, 0, 0, 1], [0, 1, 2, 3]],
|
||||
names=[None, "date"],
|
||||
)
|
||||
)
|
||||
|
||||
# GH#52710 - Index comes out as 32-bit on 64-bit Windows
|
||||
tm.assert_frame_equal(result, expected, check_index_type=not is_platform_windows())
|
||||
|
||||
|
||||
def test_groupby_resample_size_all_index_same():
|
||||
# GH 46826
|
||||
df = DataFrame(
|
||||
{"A": [1] * 3 + [2] * 3 + [1] * 3 + [2] * 3, "B": np.arange(12)},
|
||||
index=date_range("31/12/2000 18:00", freq="h", periods=12, unit="ns"),
|
||||
)
|
||||
result = df.groupby("A").resample("D").size()
|
||||
|
||||
mi_exp = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 1, 2, 2],
|
||||
pd.DatetimeIndex(["2000-12-31", "2001-01-01"] * 2, dtype="M8[ns]"),
|
||||
],
|
||||
names=["A", None],
|
||||
)
|
||||
expected = Series(
|
||||
3,
|
||||
index=mi_exp,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_index_with_list_of_keys():
|
||||
# GH 50840
|
||||
df = DataFrame(
|
||||
data={
|
||||
"group": [0, 0, 0, 0, 1, 1, 1, 1],
|
||||
"val": [3, 1, 4, 1, 5, 9, 2, 6],
|
||||
},
|
||||
index=date_range(start="2016-01-01", periods=8, name="date"),
|
||||
)
|
||||
result = df.groupby("group").resample("2D")[["val"]].mean()
|
||||
|
||||
mi_exp = pd.MultiIndex.from_arrays(
|
||||
[[0, 0, 1, 1], df.index[::2]], names=["group", "date"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
data={
|
||||
"val": [2.0, 2.5, 7.0, 4.0],
|
||||
},
|
||||
index=mi_exp,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_index_with_list_of_keys_multi_columns():
|
||||
# GH 50876
|
||||
df = DataFrame(
|
||||
data={
|
||||
"group": [0, 0, 0, 0, 1, 1, 1, 1],
|
||||
"first_val": [3, 1, 4, 1, 5, 9, 2, 6],
|
||||
"second_val": [2, 7, 1, 8, 2, 8, 1, 8],
|
||||
"third_val": [1, 4, 1, 4, 2, 1, 3, 5],
|
||||
},
|
||||
index=date_range(start="2016-01-01", periods=8, name="date"),
|
||||
)
|
||||
result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean()
|
||||
|
||||
mi_exp = pd.MultiIndex.from_arrays(
|
||||
[[0, 0, 1, 1], df.index[::2]], names=["group", "date"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
data={
|
||||
"first_val": [2.0, 2.5, 7.0, 4.0],
|
||||
"second_val": [4.5, 4.5, 5.0, 4.5],
|
||||
},
|
||||
index=mi_exp,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_index_with_list_of_keys_missing_column():
|
||||
# GH 50876
|
||||
df = DataFrame(
|
||||
data={
|
||||
"group": [0, 0, 0, 0, 1, 1, 1, 1],
|
||||
"val": [3, 1, 4, 1, 5, 9, 2, 6],
|
||||
},
|
||||
index=Series(
|
||||
date_range(start="2016-01-01", periods=8),
|
||||
name="date",
|
||||
),
|
||||
)
|
||||
gb = df.groupby("group")
|
||||
rs = gb.resample("2D")
|
||||
with pytest.raises(KeyError, match="Columns not found"):
|
||||
rs[["val_not_in_dataframe"]]
|
||||
@@ -0,0 +1,439 @@
|
||||
from datetime import datetime
|
||||
from operator import methodcaller
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.groupby.grouper import Grouper
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_series():
|
||||
return Series(
|
||||
np.random.default_rng(2).standard_normal(1000),
|
||||
index=date_range("1/1/2000", periods=1000),
|
||||
)
|
||||
|
||||
|
||||
def test_apply(test_series):
|
||||
grouper = Grouper(freq="YE", label="right", closed="right")
|
||||
|
||||
grouped = test_series.groupby(grouper)
|
||||
|
||||
def f(x):
|
||||
return x.sort_values()[-3:]
|
||||
|
||||
applied = grouped.apply(f)
|
||||
expected = test_series.groupby(lambda x: x.year).apply(f)
|
||||
|
||||
applied.index = applied.index.droplevel(0)
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_series_equal(applied, expected)
|
||||
|
||||
|
||||
def test_count(test_series):
|
||||
test_series[::3] = np.nan
|
||||
|
||||
expected = test_series.groupby(lambda x: x.year).count()
|
||||
|
||||
grouper = Grouper(freq="YE", label="right", closed="right")
|
||||
result = test_series.groupby(grouper).count()
|
||||
expected.index = result.index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = test_series.resample("YE").count()
|
||||
expected.index = result.index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_numpy_reduction(test_series):
|
||||
result = test_series.resample("YE", closed="right").prod()
|
||||
expected = test_series.groupby(lambda x: x.year).agg(np.prod)
|
||||
expected.index = result.index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_iteration():
|
||||
# #2300
|
||||
N = 1000
|
||||
ind = date_range(start="2000-01-01", freq="D", periods=N)
|
||||
df = DataFrame({"open": 1, "close": 2}, index=ind)
|
||||
tg = Grouper(freq="ME")
|
||||
|
||||
grouper, _ = tg._get_grouper(df)
|
||||
|
||||
# Errors
|
||||
grouped = df.groupby(grouper, group_keys=False)
|
||||
|
||||
def f(df):
|
||||
return df["close"] / df["open"]
|
||||
|
||||
# it works!
|
||||
result = grouped.apply(f)
|
||||
tm.assert_index_equal(result.index, df.index)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
Index([1, 2]),
|
||||
Index(["a", "b"]),
|
||||
Index([1.1, 2.2]),
|
||||
pd.MultiIndex.from_arrays([[1, 2], ["a", "b"]]),
|
||||
],
|
||||
)
|
||||
def test_fails_on_no_datetime_index(index):
|
||||
name = type(index).__name__
|
||||
df = DataFrame({"a": range(len(index))}, index=index)
|
||||
|
||||
msg = (
|
||||
"Only valid with DatetimeIndex, TimedeltaIndex "
|
||||
f"or PeriodIndex, but got an instance of '{name}'"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.groupby(Grouper(freq="D"))
|
||||
|
||||
|
||||
def test_aaa_group_order():
|
||||
# GH 12840
|
||||
# check TimeGrouper perform stable sorts
|
||||
n = 20
|
||||
data = np.random.default_rng(2).standard_normal((n, 4))
|
||||
df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3),
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
grouped = df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
|
||||
|
||||
|
||||
def test_aggregate_normal(resample_method):
|
||||
"""Check TimeGrouper's aggregation is identical as normal groupby."""
|
||||
|
||||
data = np.random.default_rng(2).standard_normal((20, 4))
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, 3, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = Index(
|
||||
[
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3),
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
]
|
||||
* 4,
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
expected = getattr(normal_grouped, resample_method)()
|
||||
dt_result = getattr(dt_grouped, resample_method)()
|
||||
expected.index = date_range(
|
||||
start="2013-01-01", freq="D", periods=5, unit="ns", name="key"
|
||||
)
|
||||
tm.assert_equal(expected, dt_result)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="if TimeGrouper is used included, 'nth' doesn't work yet")
|
||||
def test_aggregate_nth():
|
||||
"""Check TimeGrouper's aggregation is identical as normal groupby."""
|
||||
|
||||
data = np.random.default_rng(2).standard_normal((20, 4))
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, 3, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3),
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
expected = normal_grouped.nth(3)
|
||||
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
|
||||
dt_result = dt_grouped.nth(3)
|
||||
tm.assert_frame_equal(expected, dt_result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, method_args, unit",
|
||||
[
|
||||
("sum", {}, 0),
|
||||
("sum", {"min_count": 0}, 0),
|
||||
("sum", {"min_count": 1}, np.nan),
|
||||
("prod", {}, 1),
|
||||
("prod", {"min_count": 0}, 1),
|
||||
("prod", {"min_count": 1}, np.nan),
|
||||
],
|
||||
)
|
||||
def test_resample_entirely_nat_window(method, method_args, unit):
|
||||
ser = Series([0] * 2 + [np.nan] * 2, index=date_range("2017", periods=4, unit="ns"))
|
||||
result = methodcaller(method, **method_args)(ser.resample("2D"))
|
||||
|
||||
exp_dti = pd.DatetimeIndex(["2017-01-01", "2017-01-03"], dtype="M8[ns]", freq="2D")
|
||||
expected = Series([0.0, unit], index=exp_dti)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, fill_value",
|
||||
[("min", np.nan), ("max", np.nan), ("sum", 0), ("prod", 1), ("count", 0)],
|
||||
)
|
||||
def test_aggregate_with_nat(func, fill_value):
|
||||
# check TimeGrouper's aggregation is identical as normal groupby
|
||||
# if NaT is included, 'var', 'std', 'mean', 'first','last'
|
||||
# and 'nth' doesn't work yet
|
||||
|
||||
n = 20
|
||||
data = np.random.default_rng(2).standard_normal((n, 4)).astype("int64")
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = Index(
|
||||
[
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
pd.NaT,
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
]
|
||||
* 4,
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
normal_result = getattr(normal_grouped, func)()
|
||||
dt_result = getattr(dt_grouped, func)()
|
||||
|
||||
pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"])
|
||||
expected = pd.concat([normal_result, pad])
|
||||
expected = expected.sort_index()
|
||||
dti = date_range(
|
||||
start="2013-01-01",
|
||||
freq="D",
|
||||
periods=5,
|
||||
name="key",
|
||||
unit=dt_df["key"]._values.unit,
|
||||
)
|
||||
expected.index = dti._with_freq(None) # TODO: is this desired?
|
||||
tm.assert_frame_equal(expected, dt_result)
|
||||
assert dt_result.index.name == "key"
|
||||
|
||||
|
||||
def test_aggregate_with_nat_size():
|
||||
# GH 9925
|
||||
n = 20
|
||||
data = np.random.default_rng(2).standard_normal((n, 4)).astype("int64")
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = Index(
|
||||
[
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
pd.NaT,
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
]
|
||||
* 4,
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
normal_result = normal_grouped.size()
|
||||
dt_result = dt_grouped.size()
|
||||
|
||||
pad = Series([0], index=[3])
|
||||
expected = pd.concat([normal_result, pad])
|
||||
expected = expected.sort_index()
|
||||
expected.index = date_range(
|
||||
start="2013-01-01",
|
||||
freq="D",
|
||||
periods=5,
|
||||
name="key",
|
||||
unit=dt_df["key"]._values.unit,
|
||||
)._with_freq(None)
|
||||
tm.assert_series_equal(expected, dt_result)
|
||||
assert dt_result.index.name == "key"
|
||||
|
||||
|
||||
def test_repr():
|
||||
# GH18203
|
||||
result = repr(Grouper(key="A", freq="h"))
|
||||
expected = (
|
||||
"TimeGrouper(key='A', freq=<Hour>, sort=True, dropna=True, "
|
||||
"closed='left', label='left', how='mean', "
|
||||
"convention='e', origin='start_day')"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
result = repr(Grouper(key="A", freq="h", origin="2000-01-01"))
|
||||
expected = (
|
||||
"TimeGrouper(key='A', freq=<Hour>, sort=True, dropna=True, "
|
||||
"closed='left', label='left', how='mean', "
|
||||
"convention='e', origin=Timestamp('2000-01-01 00:00:00'))"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, method_args, expected_values",
|
||||
[
|
||||
("sum", {}, [1, 0, 1]),
|
||||
("sum", {"min_count": 0}, [1, 0, 1]),
|
||||
("sum", {"min_count": 1}, [1, np.nan, 1]),
|
||||
("sum", {"min_count": 2}, [np.nan, np.nan, np.nan]),
|
||||
("prod", {}, [1, 1, 1]),
|
||||
("prod", {"min_count": 0}, [1, 1, 1]),
|
||||
("prod", {"min_count": 1}, [1, np.nan, 1]),
|
||||
("prod", {"min_count": 2}, [np.nan, np.nan, np.nan]),
|
||||
],
|
||||
)
|
||||
def test_upsample_sum(method, method_args, expected_values):
|
||||
ser = Series(1, index=date_range("2017", periods=2, freq="h", unit="ns"))
|
||||
resampled = ser.resample("30min")
|
||||
index = pd.DatetimeIndex(
|
||||
["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"],
|
||||
dtype="M8[ns]",
|
||||
freq="30min",
|
||||
)
|
||||
result = methodcaller(method, **method_args)(resampled)
|
||||
expected = Series(expected_values, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def groupy_test_df():
|
||||
return DataFrame(
|
||||
{"price": [10, 11, 9], "volume": [50, 60, 50]},
|
||||
index=date_range("01/01/2018", periods=3, freq="W", unit="ns"),
|
||||
)
|
||||
|
||||
|
||||
def test_groupby_resample_interpolate_raises(groupy_test_df):
|
||||
# GH 35325
|
||||
|
||||
# Make a copy of the test data frame that has index.name=None
|
||||
groupy_test_df_without_index_name = groupy_test_df.copy()
|
||||
groupy_test_df_without_index_name.index.name = None
|
||||
|
||||
dfs = [groupy_test_df, groupy_test_df_without_index_name]
|
||||
|
||||
for df in dfs:
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Direct interpolation of MultiIndex data frames is not supported",
|
||||
):
|
||||
df.groupby("volume").resample("1D").interpolate(method="linear")
|
||||
|
||||
|
||||
def test_groupby_resample_interpolate_with_apply_syntax(groupy_test_df):
|
||||
# GH 35325
|
||||
|
||||
# Make a copy of the test data frame that has index.name=None
|
||||
groupy_test_df_without_index_name = groupy_test_df.copy()
|
||||
groupy_test_df_without_index_name.index.name = None
|
||||
|
||||
dfs = [groupy_test_df, groupy_test_df_without_index_name]
|
||||
|
||||
for df in dfs:
|
||||
result = df.groupby("volume").apply(
|
||||
lambda x: x.resample("1D").interpolate(method="linear"),
|
||||
)
|
||||
|
||||
volume = [50] * 15 + [60]
|
||||
week_starting = [
|
||||
*list(date_range("2018-01-07", "2018-01-21", unit="ns")),
|
||||
Timestamp("2018-01-14"),
|
||||
]
|
||||
expected_ind = pd.MultiIndex.from_arrays(
|
||||
[volume, week_starting],
|
||||
names=["volume", df.index.name],
|
||||
)
|
||||
|
||||
expected = DataFrame(
|
||||
data={
|
||||
"price": [
|
||||
10.0,
|
||||
9.928571428571429,
|
||||
9.857142857142858,
|
||||
9.785714285714286,
|
||||
9.714285714285714,
|
||||
9.642857142857142,
|
||||
9.571428571428571,
|
||||
9.5,
|
||||
9.428571428571429,
|
||||
9.357142857142858,
|
||||
9.285714285714286,
|
||||
9.214285714285714,
|
||||
9.142857142857142,
|
||||
9.071428571428571,
|
||||
9.0,
|
||||
11.0,
|
||||
]
|
||||
},
|
||||
index=expected_ind,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_interpolate_with_apply_syntax_off_grid(groupy_test_df):
|
||||
"""Similar test as test_groupby_resample_interpolate_with_apply_syntax but
|
||||
with resampling that results in missing anchor points when interpolating.
|
||||
See GH#21351."""
|
||||
# GH#21351
|
||||
result = groupy_test_df.groupby("volume").apply(
|
||||
lambda x: x.resample("265h").interpolate(method="linear")
|
||||
)
|
||||
|
||||
volume = [50, 50, 60]
|
||||
week_starting = pd.DatetimeIndex(
|
||||
[
|
||||
Timestamp("2018-01-07"),
|
||||
Timestamp("2018-01-18 01:00:00"),
|
||||
Timestamp("2018-01-14"),
|
||||
]
|
||||
).as_unit("ns")
|
||||
expected_ind = pd.MultiIndex.from_arrays(
|
||||
[volume, week_starting],
|
||||
names=["volume", "week_starting"],
|
||||
)
|
||||
|
||||
expected = DataFrame(
|
||||
data={"price": [10.0, 9.5, 11.0]},
|
||||
index=expected_ind,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected, check_names=False)
|
||||
218
venv/Lib/site-packages/pandas/tests/resample/test_timedelta.py
Normal file
218
venv/Lib/site-packages/pandas/tests/resample/test_timedelta.py
Normal file
@@ -0,0 +1,218 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.timedeltas import timedelta_range
|
||||
|
||||
|
||||
def test_asfreq_bug():
|
||||
df = DataFrame(data=[1, 3], index=[timedelta(), timedelta(minutes=3)])
|
||||
result = df.resample("1min").asfreq()
|
||||
expected = DataFrame(
|
||||
data=[1, np.nan, np.nan, 3],
|
||||
index=timedelta_range("0 day", periods=4, freq="1min", unit="us"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_with_nat():
|
||||
# GH 13223
|
||||
index = pd.to_timedelta(["0s", pd.NaT, "2s"])
|
||||
result = DataFrame({"value": [2, 3, 5]}, index).resample("1s").mean()
|
||||
expected = DataFrame(
|
||||
{"value": [2.5, np.nan, 5.0]},
|
||||
index=timedelta_range("0 day", periods=3, freq="1s"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_as_freq_with_subperiod():
|
||||
# GH 13022
|
||||
index = timedelta_range("00:00:00", "00:10:00", freq="5min")
|
||||
df = DataFrame(data={"value": [1, 5, 10]}, index=index)
|
||||
result = df.resample("2min").asfreq()
|
||||
expected_data = {"value": [1, np.nan, np.nan, np.nan, np.nan, 10]}
|
||||
expected = DataFrame(
|
||||
data=expected_data, index=timedelta_range("00:00:00", "00:10:00", freq="2min")
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_with_timedeltas():
|
||||
expected = DataFrame({"A": np.arange(1480)})
|
||||
expected = expected.groupby(expected.index // 30).sum()
|
||||
expected.index = timedelta_range("0 days", freq="30min", periods=50)
|
||||
|
||||
df = DataFrame(
|
||||
{"A": np.arange(1480)},
|
||||
index=pd.to_timedelta(np.arange(1480), unit="min").as_unit("us"),
|
||||
)
|
||||
result = df.resample("30min").sum()
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
s = df["A"]
|
||||
result = s.resample("30min").sum()
|
||||
tm.assert_series_equal(result, expected["A"])
|
||||
|
||||
|
||||
def test_resample_single_period_timedelta():
|
||||
s = Series(list(range(5)), index=timedelta_range("1 day", freq="s", periods=5))
|
||||
result = s.resample("2s").sum()
|
||||
expected = Series([1, 5, 4], index=timedelta_range("1 day", freq="2s", periods=3))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_timedelta_idempotency():
|
||||
# GH 12072
|
||||
index = timedelta_range("0", periods=9, freq="10ms")
|
||||
series = Series(range(9), index=index)
|
||||
result = series.resample("10ms").mean()
|
||||
expected = series.astype(float)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_offset_with_timedeltaindex():
|
||||
# GH 10530 & 31809
|
||||
rng = timedelta_range(start="0s", periods=25, freq="s")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
|
||||
with_base = ts.resample("2s", offset="5s").mean()
|
||||
without_base = ts.resample("2s").mean()
|
||||
|
||||
exp_without_base = timedelta_range(start="0s", end="25s", freq="2s")
|
||||
exp_with_base = timedelta_range(start="5s", end="29s", freq="2s")
|
||||
|
||||
tm.assert_index_equal(without_base.index, exp_without_base)
|
||||
tm.assert_index_equal(with_base.index, exp_with_base)
|
||||
|
||||
|
||||
def test_resample_categorical_data_with_timedeltaindex():
|
||||
# GH #12169
|
||||
df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s"))
|
||||
df["Group"] = df["Group_obj"].astype("category")
|
||||
result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
|
||||
exp_tdi = pd.TimedeltaIndex(np.array([0, 10], dtype="m8[s]"), freq="10s")
|
||||
expected = DataFrame(
|
||||
{"Group_obj": ["A", "A"], "Group": ["A", "A"]},
|
||||
index=exp_tdi,
|
||||
)
|
||||
expected = expected.reindex(["Group_obj", "Group"], axis=1)
|
||||
expected["Group"] = expected["Group_obj"].astype("category")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_timedelta_values():
|
||||
# GH 13119
|
||||
# check that timedelta dtype is preserved when NaT values are
|
||||
# introduced by the resampling
|
||||
|
||||
times = timedelta_range("1 day", "6 day", freq="4D")
|
||||
df = DataFrame({"time": times}, index=times)
|
||||
|
||||
times2 = timedelta_range("1 day", "6 day", freq="2D")
|
||||
exp = Series(times2, index=times2, name="time")
|
||||
exp.iloc[1] = pd.NaT
|
||||
|
||||
res = df.resample("2D").first()["time"]
|
||||
tm.assert_series_equal(res, exp)
|
||||
res = df["time"].resample("2D").first()
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, end, freq, resample_freq",
|
||||
[
|
||||
("8h", "21h59min50s", "10s", "3h"), # GH 30353 example
|
||||
("3h", "22h", "1h", "5h"),
|
||||
("527D", "5006D", "3D", "10D"),
|
||||
("1D", "10D", "1D", "2D"), # GH 13022 example
|
||||
# tests that worked before GH 33498:
|
||||
("8h", "21h59min50s", "10s", "2h"),
|
||||
("0h", "21h59min50s", "10s", "3h"),
|
||||
("10D", "85D", "D", "2D"),
|
||||
],
|
||||
)
|
||||
def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
|
||||
# GH 33498
|
||||
# check that the timedelta bins does not contains an extra bin
|
||||
idx = timedelta_range(start=start, end=end, freq=freq)
|
||||
s = Series(np.arange(len(idx)), index=idx)
|
||||
result = s.resample(resample_freq).min()
|
||||
expected_index = timedelta_range(freq=resample_freq, start=start, end=end)
|
||||
tm.assert_index_equal(result.index, expected_index)
|
||||
assert result.index.freq == expected_index.freq
|
||||
assert not np.isnan(result.iloc[-1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("duplicates", [True, False])
|
||||
def test_resample_with_timedelta_yields_no_empty_groups(duplicates):
|
||||
# GH 10603
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).normal(size=(10000, 4)),
|
||||
index=timedelta_range(start="0s", periods=10000, freq="3906250ns"),
|
||||
)
|
||||
if duplicates:
|
||||
# case with non-unique columns
|
||||
df.columns = ["A", "B", "A", "C"]
|
||||
|
||||
result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))
|
||||
|
||||
expected = DataFrame(
|
||||
[[768] * 4] * 12 + [[528] * 4],
|
||||
index=timedelta_range(start="1s", periods=13, freq="3s", unit="ns"),
|
||||
)
|
||||
expected.columns = df.columns
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_quantile_timedelta(unit):
|
||||
# GH: 29485
|
||||
dtype = np.dtype(f"m8[{unit}]")
|
||||
df = DataFrame(
|
||||
{"value": pd.to_timedelta(np.arange(4), unit="s").astype(dtype)},
|
||||
index=pd.date_range("20200101", periods=4, tz="UTC"),
|
||||
)
|
||||
result = df.resample("2D").quantile(0.99)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"value": [
|
||||
pd.Timedelta("0 days 00:00:00.990000"),
|
||||
pd.Timedelta("0 days 00:00:02.990000"),
|
||||
]
|
||||
},
|
||||
index=pd.date_range("20200101", periods=2, tz="UTC", freq="2D"),
|
||||
).astype(dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_closed_right():
|
||||
# GH#45414
|
||||
idx = pd.Index([pd.Timedelta(seconds=120 + i * 30) for i in range(10)])
|
||||
ser = Series(range(10), index=idx)
|
||||
result = ser.resample("min", closed="right", label="right").sum()
|
||||
expected = Series(
|
||||
[0, 3, 7, 11, 15, 9],
|
||||
index=pd.TimedeltaIndex(
|
||||
[pd.Timedelta(seconds=120 + i * 60) for i in range(6)], freq="min"
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_if_no("pyarrow")
|
||||
def test_arrow_duration_resample():
|
||||
# GH 56371
|
||||
idx = pd.Index(timedelta_range("1 day", periods=5), dtype="duration[ns][pyarrow]")
|
||||
expected = Series(np.arange(5, dtype=np.float64), index=idx)
|
||||
result = expected.resample("1D").mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
Reference in New Issue
Block a user