initial commit

This commit is contained in:
2026-05-11 12:36:20 +05:30
commit 384cbe8019
15377 changed files with 2360544 additions and 0 deletions

View File

@@ -0,0 +1,33 @@
import pytest
# The various methods we support
downsample_methods = [
"min",
"max",
"first",
"last",
"sum",
"mean",
"sem",
"median",
"prod",
"var",
"std",
"ohlc",
"quantile",
]
upsample_methods = ["count", "size"]
series_methods = ["nunique"]
resample_methods = downsample_methods + upsample_methods + series_methods
@pytest.fixture(params=downsample_methods)
def downsample_method(request):
"""Fixture for parametrization of Grouper downsample methods."""
return request.param
@pytest.fixture(params=resample_methods)
def resample_method(request):
"""Fixture for parametrization of Grouper resample methods."""
return request.param

View File

@@ -0,0 +1,554 @@
from datetime import datetime
import numpy as np
import pytest
from pandas.errors import Pandas4Warning
from pandas.core.dtypes.common import is_extension_array_dtype
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
NaT,
PeriodIndex,
Series,
TimedeltaIndex,
)
import pandas._testing as tm
from pandas.core.groupby.groupby import DataError
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import period_range
from pandas.core.indexes.timedeltas import timedelta_range
from pandas.core.resample import _asfreq_compat
@pytest.fixture(
params=[
"linear",
"time",
"index",
"values",
"nearest",
"zero",
"slinear",
"quadratic",
"cubic",
"barycentric",
"krogh",
"from_derivatives",
"piecewise_polynomial",
"pchip",
"akima",
],
)
def all_1d_no_arg_interpolation_methods(request):
return request.param
@pytest.mark.parametrize("freq", ["2D", "1h"])
@pytest.mark.parametrize(
"index",
[
timedelta_range("1 day", "10 day", freq="D"),
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
],
)
def test_asfreq(frame_or_series, index, freq):
obj = frame_or_series(range(len(index)), index=index)
idx_range = date_range if isinstance(index, DatetimeIndex) else timedelta_range
result = obj.resample(freq).asfreq()
new_index = idx_range(obj.index[0], obj.index[-1], freq=freq)
expected = obj.reindex(new_index)
tm.assert_almost_equal(result, expected)
@pytest.mark.parametrize(
"index",
[
timedelta_range("1 day", "10 day", freq="D"),
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
],
)
def test_asfreq_fill_value(index):
# test for fill value during resampling, issue 3715
ser = Series(range(len(index)), index=index, name="a")
idx_range = date_range if isinstance(index, DatetimeIndex) else timedelta_range
result = ser.resample("1h").asfreq()
new_index = idx_range(ser.index[0], ser.index[-1], freq="1h")
expected = ser.reindex(new_index)
tm.assert_series_equal(result, expected)
# Explicit cast to float to avoid implicit cast when setting None
frame = ser.astype("float").to_frame("value")
frame.iloc[1] = None
result = frame.resample("1h").asfreq(fill_value=4.0)
new_index = idx_range(frame.index[0], frame.index[-1], freq="1h")
expected = frame.reindex(new_index, fill_value=4.0)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"index",
[
timedelta_range("1 day", "3 day", freq="D"),
date_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),
period_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),
],
)
def test_resample_interpolate(index):
# GH#12925
df = DataFrame(range(len(index)), index=index)
result = df.resample("1min").asfreq().interpolate()
expected = df.resample("1min").interpolate()
tm.assert_frame_equal(result, expected)
def test_resample_interpolate_inplace_deprecated():
# GH#58690
dti = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
df = DataFrame(range(len(dti)), index=dti)
rs = df.resample("1min")
msg = "The 'inplace' keyword in DatetimeIndexResampler.interpolate"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
rs.interpolate(inplace=False)
msg2 = "Cannot interpolate inplace on a resampled object"
with pytest.raises(ValueError, match=msg2):
with tm.assert_produces_warning(Pandas4Warning, match=msg):
rs.interpolate(inplace=True)
def test_resample_interpolate_regular_sampling_off_grid(
all_1d_no_arg_interpolation_methods,
):
pytest.importorskip("scipy")
# GH#21351
index = date_range("2000-01-01 00:01:00", periods=5, freq="2h")
ser = Series(np.arange(5.0), index)
method = all_1d_no_arg_interpolation_methods
result = ser.resample("1h").interpolate(method)
if method == "linear":
values = np.repeat(np.arange(0.0, 4.0), 2) + np.tile([1 / 3, 2 / 3], 4)
elif method == "nearest":
values = np.repeat(np.arange(0.0, 5.0), 2)[1:-1]
elif method == "zero":
values = np.repeat(np.arange(0.0, 4.0), 2)
else:
values = 0.491667 + np.arange(0.0, 4.0, 0.5)
values = np.insert(values, 0, np.nan)
index = date_range("2000-01-01 00:00:00", periods=9, freq="1h")
expected = Series(values, index=index)
tm.assert_series_equal(result, expected)
def test_resample_interpolate_irregular_sampling(all_1d_no_arg_interpolation_methods):
pytest.importorskip("scipy")
# GH#21351
ser = Series(
np.linspace(0.0, 1.0, 5),
index=DatetimeIndex(
[
"2000-01-01 00:00:03",
"2000-01-01 00:00:22",
"2000-01-01 00:00:24",
"2000-01-01 00:00:31",
"2000-01-01 00:00:39",
]
),
)
# Resample to 5 second sampling and interpolate with the given method
ser_resampled = ser.resample("5s").interpolate(all_1d_no_arg_interpolation_methods)
# Check that none of the resampled values are NaN, except the first one
# which lies 3 seconds before the first actual data point
assert np.isnan(ser_resampled.iloc[0])
assert not ser_resampled.iloc[1:].isna().any()
def test_raises_on_non_datetimelike_index():
# this is a non datetimelike index
xp = DataFrame()
msg = (
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
"but got an instance of 'RangeIndex'"
)
with pytest.raises(TypeError, match=msg):
xp.resample("YE")
@pytest.mark.parametrize(
"index",
[
PeriodIndex([], freq="D", name="a"),
DatetimeIndex([], name="a"),
TimedeltaIndex([], name="a"),
],
)
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_resample_empty_series(freq, index, resample_method):
# GH12771 & GH12868
ser = Series(index=index, dtype=float)
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
ser.resample(freq)
return
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
rs = ser.resample(freq)
result = getattr(rs, resample_method)()
if resample_method == "ohlc":
expected = DataFrame(
[], index=ser.index[:0], columns=["open", "high", "low", "close"]
)
expected.index = _asfreq_compat(ser.index, freq)
tm.assert_frame_equal(result, expected, check_dtype=False)
else:
expected = ser.copy()
expected.index = _asfreq_compat(ser.index, freq)
tm.assert_series_equal(result, expected, check_dtype=False)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
@pytest.mark.parametrize("min_count", [0, 1])
def test_resample_empty_sum_string(string_dtype_no_object, min_count):
# https://github.com/pandas-dev/pandas/issues/60229
dtype = string_dtype_no_object
ser = Series(
pd.NA,
index=DatetimeIndex(
[
"2000-01-01 00:00:00",
"2000-01-01 00:00:10",
"2000-01-01 00:00:20",
"2000-01-01 00:00:30",
]
),
dtype=dtype,
)
rs = ser.resample("20s")
result = rs.sum(min_count=min_count)
value = "" if min_count == 0 else pd.NA
index = date_range(start="2000-01-01", freq="20s", periods=2, unit="us")
expected = Series(value, index=index, dtype=dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"freq",
[
pytest.param("ME", marks=pytest.mark.xfail(reason="Don't know why this fails")),
"D",
"h",
],
)
def test_resample_nat_index_series(freq, resample_method):
# GH39227
ser = Series(range(5), index=PeriodIndex([NaT] * 5, freq=freq))
rs = ser.resample(freq)
result = getattr(rs, resample_method)()
if resample_method == "ohlc":
expected = DataFrame(
[], index=ser.index[:0], columns=["open", "high", "low", "close"]
)
tm.assert_frame_equal(result, expected, check_dtype=False)
else:
expected = ser[:0].copy()
tm.assert_series_equal(result, expected, check_dtype=False)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
@pytest.mark.parametrize(
"index",
[
PeriodIndex([], freq="D", name="a"),
DatetimeIndex([], name="a"),
TimedeltaIndex([], name="a"),
],
)
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
@pytest.mark.parametrize("resample_method", ["count", "size"])
def test_resample_count_empty_series(freq, index, resample_method):
# GH28427
ser = Series(index=index)
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
ser.resample(freq)
return
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
rs = ser.resample(freq)
result = getattr(rs, resample_method)()
index = _asfreq_compat(ser.index, freq)
expected = Series([], dtype="int64", index=index, name=ser.name)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"index", [DatetimeIndex([]), TimedeltaIndex([]), PeriodIndex([], freq="D")]
)
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_resample_empty_dataframe(index, freq, resample_method):
# GH13212
df = DataFrame(index=index)
# count retains dimensions too
if freq == "ME" and isinstance(df.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
df.resample(freq, group_keys=False)
return
elif freq == "ME" and isinstance(df.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
rs = df.resample(freq, group_keys=False)
result = getattr(rs, resample_method)()
if resample_method == "ohlc":
# TODO: no tests with len(df.columns) > 0
mi = MultiIndex.from_product([df.columns, ["open", "high", "low", "close"]])
expected = DataFrame([], index=df.index[:0], columns=mi, dtype=np.float64)
expected.index = _asfreq_compat(df.index, freq)
elif resample_method != "size":
expected = df.copy()
else:
# GH14962
expected = Series([], dtype=np.int64)
expected.index = _asfreq_compat(df.index, freq)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
tm.assert_almost_equal(result, expected)
# test size for GH13212 (currently stays as df)
@pytest.mark.parametrize(
"index", [DatetimeIndex([]), TimedeltaIndex([]), PeriodIndex([], freq="D")]
)
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_resample_count_empty_dataframe(freq, index):
# GH28427
empty_frame_dti = DataFrame(index=index, columns=Index(["a"], dtype=object))
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
empty_frame_dti.resample(freq)
return
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
result = empty_frame_dti.resample(freq).count()
index = _asfreq_compat(empty_frame_dti.index, freq)
expected = DataFrame(dtype="int64", index=index, columns=Index(["a"], dtype=object))
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"index", [DatetimeIndex([]), TimedeltaIndex([]), PeriodIndex([], freq="D")]
)
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_resample_size_empty_dataframe(freq, index):
# GH28427
empty_frame_dti = DataFrame(index=index, columns=Index(["a"], dtype=object))
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
empty_frame_dti.resample(freq)
return
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
result = empty_frame_dti.resample(freq).size()
index = _asfreq_compat(empty_frame_dti.index, freq)
expected = Series([], dtype="int64", index=index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("index", [DatetimeIndex([]), TimedeltaIndex([])])
@pytest.mark.parametrize("freq", ["D", "h"])
@pytest.mark.parametrize(
"method", ["ffill", "bfill", "nearest", "asfreq", "interpolate", "mean"]
)
def test_resample_apply_empty_dataframe(index, freq, method):
# GH#55572
empty_frame_dti = DataFrame(index=index)
rs = empty_frame_dti.resample(freq)
result = rs.apply(getattr(rs, method))
expected_index = _asfreq_compat(empty_frame_dti.index, freq)
expected = DataFrame([], index=expected_index)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"index",
[
PeriodIndex([], freq="M", name="a"),
DatetimeIndex([], name="a"),
TimedeltaIndex([], name="a"),
],
)
@pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"])
def test_resample_empty_dtypes(index, dtype, resample_method):
# Empty series were sometimes causing a segfault (for the functions
# with Cython bounds-checking disabled) or an IndexError. We just run
# them to ensure they no longer do. (GH #10228)
empty_series_dti = Series([], index, dtype)
rs = empty_series_dti.resample("D", group_keys=False)
try:
getattr(rs, resample_method)()
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object_)
pass
@pytest.mark.parametrize(
"index",
[
PeriodIndex([], freq="D", name="a"),
DatetimeIndex([], name="a"),
TimedeltaIndex([], name="a"),
],
)
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_apply_to_empty_series(index, freq):
# GH 14313
ser = Series(index=index)
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
ser.resample(freq)
return
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
result = ser.resample(freq, group_keys=False).apply(lambda x: 1)
expected = ser.resample(freq).apply("sum")
tm.assert_series_equal(result, expected, check_dtype=False)
@pytest.mark.parametrize(
"index",
[
timedelta_range("1 day", "10 day", freq="D"),
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
],
)
def test_resampler_is_iterable(index):
# GH 15314
series = Series(range(len(index)), index=index)
freq = "h"
tg = Grouper(freq=freq, convention="start")
grouped = series.groupby(tg)
resampled = series.resample(freq)
for (rk, rv), (gk, gv) in zip(resampled, grouped):
assert rk == gk
tm.assert_series_equal(rv, gv)
@pytest.mark.parametrize(
"index",
[
timedelta_range("1 day", "10 day", freq="D"),
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
],
)
def test_resample_quantile(index):
# GH 15023
ser = Series(range(len(index)), index=index)
q = 0.75
freq = "h"
result = ser.resample(freq).quantile(q)
expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("how", ["first", "last"])
def test_first_last_skipna(any_real_nullable_dtype, skipna, how):
# GH#57019
if is_extension_array_dtype(any_real_nullable_dtype):
na_value = Series(dtype=any_real_nullable_dtype).dtype.na_value
else:
na_value = np.nan
df = DataFrame(
{
"a": [2, 1, 1, 2],
"b": [na_value, 3.0, na_value, 4.0],
"c": [na_value, 3.0, na_value, 4.0],
},
index=date_range("2020-01-01", periods=4, freq="D", unit="ns"),
dtype=any_real_nullable_dtype,
)
rs = df.resample("ME")
method = getattr(rs, how)
result = method(skipna=skipna)
ts = pd.to_datetime("2020-01-31").as_unit("ns")
gb = df.groupby(df.shape[0] * [ts])
expected = getattr(gb, how)(skipna=skipna)
expected.index.freq = "ME"
tm.assert_frame_equal(result, expected)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,671 @@
from textwrap import dedent
import numpy as np
import pytest
from pandas.compat import is_platform_windows
import pandas as pd
from pandas import (
DataFrame,
Index,
Series,
TimedeltaIndex,
Timestamp,
)
import pandas._testing as tm
from pandas.core.indexes.datetimes import date_range
@pytest.fixture
def test_frame():
return DataFrame(
{"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
index=date_range("1/1/2000", freq="s", periods=40, unit="ns"),
)
def test_tab_complete_ipython6_warning(ip):
from IPython.core.completer import provisionalcompleter
code = dedent(
"""\
import numpy as np
from pandas import Series, date_range
data = np.arange(10, dtype=np.float64)
index = date_range("2020-01-01", periods=len(data))
s = Series(data, index=index)
rs = s.resample("D")
"""
)
ip.run_cell(code)
# GH 31324 newer jedi version raises Deprecation warning;
# appears resolved 2021-02-02
with tm.assert_produces_warning(None, raise_on_extra_warnings=False):
with provisionalcompleter("ignore"):
list(ip.Completer.completions("rs.", 1))
def test_deferred_with_groupby():
# GH 12486
# support deferred resample ops with groupby
data = [
["2010-01-01", "A", 2],
["2010-01-02", "A", 3],
["2010-01-05", "A", 8],
["2010-01-10", "A", 7],
["2010-01-13", "A", 3],
["2010-01-01", "B", 5],
["2010-01-03", "B", 2],
["2010-01-04", "B", 1],
["2010-01-11", "B", 7],
["2010-01-14", "B", 3],
]
df = DataFrame(data, columns=["date", "id", "score"])
df.date = pd.to_datetime(df.date)
def f_0(x):
return x.set_index("date").resample("D").asfreq()
expected = df.groupby("id").apply(f_0)
result = df.set_index("date").groupby("id").resample("D").asfreq()
tm.assert_frame_equal(result, expected)
df = DataFrame(
{
"date": date_range(start="2016-01-01", periods=4, freq="W"),
"group": [1, 1, 2, 2],
"val": [5, 6, 7, 8],
}
).set_index("date")
def f_1(x):
return x.resample("1D").ffill()
expected = df.groupby("group").apply(f_1)
result = df.groupby("group").resample("1D").ffill()
tm.assert_frame_equal(result, expected)
def test_getitem(test_frame):
g = test_frame.groupby("A")
expected = g.B.apply(lambda x: x.resample("2s").mean())
result = g.resample("2s").B.mean()
tm.assert_series_equal(result, expected)
result = g.B.resample("2s").mean()
tm.assert_series_equal(result, expected)
result = g.resample("2s").mean().B
tm.assert_series_equal(result, expected)
def test_getitem_multiple():
# GH 13174
# multiple calls after selection causing an issue with aliasing
data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}]
df = DataFrame(data, index=date_range("2016-01-01", periods=2))
r = df.groupby("id").resample("1D")
result = r["buyer"].count()
exp_mi = pd.MultiIndex.from_arrays([[1, 2], df.index], names=("id", None))
expected = Series(
[1, 1],
index=exp_mi,
name="buyer",
)
tm.assert_series_equal(result, expected)
result = r["buyer"].count()
tm.assert_series_equal(result, expected)
def test_groupby_resample_on_api_with_getitem():
# GH 17813
df = DataFrame(
{"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1}
)
exp = df.set_index("date").groupby("id").resample("2D")["data"].sum()
result = df.groupby("id").resample("2D", on="date")["data"].sum()
tm.assert_series_equal(result, exp)
def test_groupby_with_origin():
# GH 31809
freq = "1399min" # prime number that is smaller than 24h
start, end = "1/1/2000 00:00:00", "1/31/2000 00:00"
middle = "1/15/2000 00:00:00"
rng = date_range(start, end, freq="1231min") # prime number
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
ts2 = ts[middle:end]
# proves that grouper without a fixed origin does not work
# when dealing with unusual frequencies
simple_grouper = pd.Grouper(freq=freq)
count_ts = ts.groupby(simple_grouper).agg("count")
count_ts = count_ts[middle:end]
count_ts2 = ts2.groupby(simple_grouper).agg("count")
with pytest.raises(AssertionError, match="Index are different"):
tm.assert_index_equal(count_ts.index, count_ts2.index)
# test origin on 1970-01-01 00:00:00
origin = Timestamp(0)
adjusted_grouper = pd.Grouper(freq=freq, origin=origin)
adjusted_count_ts = ts.groupby(adjusted_grouper).agg("count")
adjusted_count_ts = adjusted_count_ts[middle:end]
adjusted_count_ts2 = ts2.groupby(adjusted_grouper).agg("count")
tm.assert_series_equal(adjusted_count_ts, adjusted_count_ts2)
# test origin on 2049-10-18 20:00:00
origin_future = Timestamp(0) + pd.Timedelta("1399min") * 30_000
adjusted_grouper2 = pd.Grouper(freq=freq, origin=origin_future)
adjusted2_count_ts = ts.groupby(adjusted_grouper2).agg("count")
adjusted2_count_ts = adjusted2_count_ts[middle:end]
adjusted2_count_ts2 = ts2.groupby(adjusted_grouper2).agg("count")
tm.assert_series_equal(adjusted2_count_ts, adjusted2_count_ts2)
# both grouper use an adjusted timestamp that is a multiple of 1399 min
# they should be equals even if the adjusted_timestamp is in the future
tm.assert_series_equal(adjusted_count_ts, adjusted2_count_ts2)
def test_nearest():
# GH 17496
# Resample nearest
index = date_range("1/1/2000", periods=3, freq="min", unit="ns")
result = Series(range(3), index=index).resample("20s").nearest()
expected = Series(
[0, 0, 1, 1, 1, 2, 2],
index=pd.DatetimeIndex(
[
"2000-01-01 00:00:00",
"2000-01-01 00:00:20",
"2000-01-01 00:00:40",
"2000-01-01 00:01:00",
"2000-01-01 00:01:20",
"2000-01-01 00:01:40",
"2000-01-01 00:02:00",
],
dtype="datetime64[ns]",
freq="20s",
),
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"f",
[
"first",
"last",
"median",
"sem",
"sum",
"mean",
"min",
"max",
"size",
"count",
"nearest",
"bfill",
"ffill",
"asfreq",
"ohlc",
],
)
def test_methods(f, test_frame):
g = test_frame.groupby("A")
r = g.resample("2s")
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
tm.assert_equal(result, expected)
def test_methods_nunique(test_frame):
# series only
g = test_frame.groupby("A")
r = g.resample("2s")
result = r.B.nunique()
expected = g.B.apply(lambda x: x.resample("2s").nunique())
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("f", ["std", "var"])
def test_methods_std_var(f, test_frame):
g = test_frame.groupby("A")
r = g.resample("2s")
result = getattr(r, f)(ddof=1)
expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
tm.assert_frame_equal(result, expected)
def test_apply(test_frame):
g = test_frame.groupby("A")
r = g.resample("2s")
# reduction
expected = g.resample("2s").sum()
def f_0(x):
return x.resample("2s").sum()
result = r.apply(f_0)
tm.assert_frame_equal(result, expected)
def f_1(x):
return x.resample("2s").apply(lambda y: y.sum())
result = g.apply(f_1)
tm.assert_frame_equal(result, expected)
def test_apply_with_mutated_index():
# GH 15169
index = date_range("1-1-2015", "12-31-15", freq="D")
df = DataFrame(
data={"col1": np.random.default_rng(2).random(len(index))}, index=index
)
def f(x):
s = Series([1, 2], index=["a", "b"])
return s
expected = df.groupby(pd.Grouper(freq="ME")).apply(f)
result = df.resample("ME").apply(f)
tm.assert_frame_equal(result, expected)
# A case for series
expected = df["col1"].groupby(pd.Grouper(freq="ME"), group_keys=False).apply(f)
result = df["col1"].resample("ME").apply(f)
tm.assert_series_equal(result, expected)
def test_apply_columns_multilevel():
# GH 16231
cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")])
ind = date_range(start="2017-01-01", freq="15Min", periods=8)
df = DataFrame(
np.array([0] * 16, dtype=np.int64).reshape(8, 2), index=ind, columns=cols
)
agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns}
result = df.resample("h").apply(lambda x: agg_dict[x.name](x))
expected = DataFrame(
2 * [[0, 0.0]],
index=date_range(start="2017-01-01", freq="1h", periods=2),
columns=pd.MultiIndex.from_tuples(
[("A", "a", "", "one"), ("B", "b", "i", "two")]
),
)
tm.assert_frame_equal(result, expected)
def test_apply_non_naive_index():
def weighted_quantile(series, weights, q):
series = series.sort_values()
cumsum = weights.reindex(series.index).fillna(0).cumsum()
cutoff = cumsum.iloc[-1] * q
return series[cumsum >= cutoff].iloc[0]
times = date_range("2017-6-23 18:00", periods=8, freq="15min", tz="UTC")
data = Series([1.0, 1, 1, 1, 1, 2, 2, 0], index=times)
weights = Series([160.0, 91, 65, 43, 24, 10, 1, 0], index=times)
result = data.resample("D").apply(weighted_quantile, weights=weights, q=0.5)
ind = date_range(
"2017-06-23 00:00:00+00:00", "2017-06-23 00:00:00+00:00", freq="D", tz="UTC"
)
expected = Series([1.0], index=ind)
tm.assert_series_equal(result, expected)
def test_resample_groupby_with_label(unit):
# GH 13235
index = date_range("2000-01-01", freq="2D", periods=5, unit=unit)
df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]})
result = df.groupby("col0").resample("1W", label="left").sum()
mi = [
np.array([0, 0, 1, 2], dtype=np.int64),
np.array(
["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"],
dtype=f"M8[{unit}]",
),
]
mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None])
expected = DataFrame(data={"col1": [1, 1, 2, 1]}, index=mindex)
tm.assert_frame_equal(result, expected)
def test_consistency_with_window(test_frame):
# consistent return values with window
df = test_frame
expected = Index([1, 2, 3], name="A")
result = df.groupby("A").resample("2s").mean()
assert result.index.nlevels == 2
tm.assert_index_equal(result.index.levels[0], expected)
result = df.groupby("A").rolling(20).mean()
assert result.index.nlevels == 2
tm.assert_index_equal(result.index.levels[0], expected)
def test_median_duplicate_columns():
# GH 14233
df = DataFrame(
np.random.default_rng(2).standard_normal((20, 3)),
columns=list("aaa"),
index=date_range("2012-01-01", periods=20, freq="s"),
)
result = df.resample("5s").median()
df.columns = ["a", "b", "c"]
expected = df.resample("5s").median()
expected.columns = result.columns
tm.assert_frame_equal(result, expected)
def test_apply_to_one_column_of_df():
# GH: 36951
df = DataFrame(
{"col": range(10), "col1": range(10, 20)},
index=date_range("2012-01-01", periods=10, freq="20min"),
)
# access "col" via getattr -> make sure we handle AttributeError
result = df.resample("h").apply(lambda group: group.col.sum())
expected = Series(
[3, 12, 21, 9], index=date_range("2012-01-01", periods=4, freq="h")
)
tm.assert_series_equal(result, expected)
# access "col" via _getitem__ -> make sure we handle KeyErrpr
result = df.resample("h").apply(lambda group: group["col"].sum())
tm.assert_series_equal(result, expected)
def test_resample_groupby_agg():
# GH: 33548
df = DataFrame(
{
"cat": [
"cat_1",
"cat_1",
"cat_2",
"cat_1",
"cat_2",
"cat_1",
"cat_2",
"cat_1",
],
"num": [5, 20, 22, 3, 4, 30, 10, 50],
"date": [
"2019-2-1",
"2018-02-03",
"2020-3-11",
"2019-2-2",
"2019-2-2",
"2018-12-4",
"2020-3-11",
"2020-12-12",
],
}
)
df["date"] = pd.to_datetime(df["date"])
resampled = df.groupby("cat").resample("YE", on="date")
expected = resampled[["num"]].sum()
result = resampled.agg({"num": "sum"})
tm.assert_frame_equal(result, expected)
def test_resample_groupby_agg_listlike():
# GH 42905
ts = Timestamp("2021-02-28 00:00:00")
df = DataFrame({"class": ["beta"], "value": [69]}, index=Index([ts], name="date"))
resampled = df.groupby("class").resample("ME")["value"]
result = resampled.agg(["sum", "size"])
expected = DataFrame(
[[69, 1]],
index=pd.MultiIndex.from_tuples([("beta", ts)], names=["class", "date"]),
columns=["sum", "size"],
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
def test_empty(keys):
# GH 26411
df = DataFrame([], columns=["a", "b"], index=TimedeltaIndex([]))
result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
expected_columns = ["b"] if keys == ["a"] else []
expected = (
DataFrame(columns=["a", "b"])
.set_index(keys, drop=False)
.set_index(TimedeltaIndex([]), append=True)[expected_columns]
)
if len(keys) == 1:
expected.index.name = keys[0]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("consolidate", [True, False])
def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
# https://github.com/pandas-dev/pandas/issues/39329
dates = date_range("2020-01-01", periods=15, freq="D", unit="ns")
df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"})
df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)})
df = pd.concat([df1, df2], ignore_index=True)
if consolidate:
df = df._consolidate()
result = df.groupby(["key"]).resample("W", on="date").min()
idx = pd.MultiIndex.from_arrays(
[
["A"] * 3 + ["B"] * 3,
pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2).as_unit(
"ns"
),
],
names=["key", "date"],
)
expected = DataFrame(
{
"col1": [0, 5, 12] * 2,
"col_object": ["val"] * 3 + [np.nan] * 3,
},
index=idx,
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("min_count", [0, 1])
def test_groupby_resample_empty_sum_string(
string_dtype_no_object, test_frame, min_count
):
# https://github.com/pandas-dev/pandas/issues/60229
dtype = string_dtype_no_object
test_frame = test_frame.assign(B=pd.array([pd.NA] * len(test_frame), dtype=dtype))
gbrs = test_frame.groupby("A").resample("40s")
result = gbrs.sum(min_count=min_count)
index = pd.MultiIndex(
levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", unit="ns").as_unit("ns")]],
codes=[[0, 1, 2], [0, 0, 0]],
names=["A", None],
)
value = "" if min_count == 0 else pd.NA
expected = DataFrame({"B": value}, index=index, dtype=dtype)
tm.assert_frame_equal(result, expected)
def test_groupby_resample_with_list_of_keys():
# GH 47362
df = DataFrame(
data={
"date": date_range(start="2016-01-01", periods=8),
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"val": [1, 7, 5, 2, 3, 10, 5, 1],
}
)
result = df.groupby("group").resample("2D", on="date")[["val"]].mean()
mi_exp = pd.MultiIndex.from_arrays(
[[0, 0, 1, 1], df["date"]._values[::2]], names=["group", "date"]
)
expected = DataFrame(
data={
"val": [4.0, 3.5, 6.5, 3.0],
},
index=mi_exp,
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
def test_resample_no_index(keys):
# GH 47705
df = DataFrame([], columns=["a", "b", "date"])
df["date"] = pd.to_datetime(df["date"])
df = df.set_index("date")
result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
expected_columns = ["b"] if keys == ["a"] else []
expected = DataFrame(columns=["a", "b", "date"]).set_index(keys, drop=False)
expected["date"] = pd.to_datetime(expected["date"])
expected = expected.set_index("date", append=True, drop=True)[expected_columns]
if len(keys) == 1:
expected.index.name = keys[0]
tm.assert_frame_equal(result, expected)
def test_resample_no_columns():
# GH#52484
df = DataFrame(
index=Index(
pd.to_datetime(
["2018-01-01 00:00:00", "2018-01-01 12:00:00", "2018-01-02 00:00:00"]
),
name="date",
)
)
result = df.groupby([0, 0, 1]).resample(rule=pd.to_timedelta("06:00:00")).mean()
index = pd.to_datetime(
[
"2018-01-01 00:00:00",
"2018-01-01 06:00:00",
"2018-01-01 12:00:00",
"2018-01-02 00:00:00",
]
)
expected = DataFrame(
index=pd.MultiIndex(
levels=[np.array([0, 1], dtype=np.intp), index],
codes=[[0, 0, 0, 1], [0, 1, 2, 3]],
names=[None, "date"],
)
)
# GH#52710 - Index comes out as 32-bit on 64-bit Windows
tm.assert_frame_equal(result, expected, check_index_type=not is_platform_windows())
def test_groupby_resample_size_all_index_same():
# GH 46826
df = DataFrame(
{"A": [1] * 3 + [2] * 3 + [1] * 3 + [2] * 3, "B": np.arange(12)},
index=date_range("31/12/2000 18:00", freq="h", periods=12, unit="ns"),
)
result = df.groupby("A").resample("D").size()
mi_exp = pd.MultiIndex.from_arrays(
[
[1, 1, 2, 2],
pd.DatetimeIndex(["2000-12-31", "2001-01-01"] * 2, dtype="M8[ns]"),
],
names=["A", None],
)
expected = Series(
3,
index=mi_exp,
)
tm.assert_series_equal(result, expected)
def test_groupby_resample_on_index_with_list_of_keys():
# GH 50840
df = DataFrame(
data={
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"val": [3, 1, 4, 1, 5, 9, 2, 6],
},
index=date_range(start="2016-01-01", periods=8, name="date"),
)
result = df.groupby("group").resample("2D")[["val"]].mean()
mi_exp = pd.MultiIndex.from_arrays(
[[0, 0, 1, 1], df.index[::2]], names=["group", "date"]
)
expected = DataFrame(
data={
"val": [2.0, 2.5, 7.0, 4.0],
},
index=mi_exp,
)
tm.assert_frame_equal(result, expected)
def test_groupby_resample_on_index_with_list_of_keys_multi_columns():
# GH 50876
df = DataFrame(
data={
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"first_val": [3, 1, 4, 1, 5, 9, 2, 6],
"second_val": [2, 7, 1, 8, 2, 8, 1, 8],
"third_val": [1, 4, 1, 4, 2, 1, 3, 5],
},
index=date_range(start="2016-01-01", periods=8, name="date"),
)
result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean()
mi_exp = pd.MultiIndex.from_arrays(
[[0, 0, 1, 1], df.index[::2]], names=["group", "date"]
)
expected = DataFrame(
data={
"first_val": [2.0, 2.5, 7.0, 4.0],
"second_val": [4.5, 4.5, 5.0, 4.5],
},
index=mi_exp,
)
tm.assert_frame_equal(result, expected)
def test_groupby_resample_on_index_with_list_of_keys_missing_column():
# GH 50876
df = DataFrame(
data={
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"val": [3, 1, 4, 1, 5, 9, 2, 6],
},
index=Series(
date_range(start="2016-01-01", periods=8),
name="date",
),
)
gb = df.groupby("group")
rs = gb.resample("2D")
with pytest.raises(KeyError, match="Columns not found"):
rs[["val_not_in_dataframe"]]

View File

@@ -0,0 +1,439 @@
from datetime import datetime
from operator import methodcaller
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
Series,
Timestamp,
)
import pandas._testing as tm
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexes.datetimes import date_range
@pytest.fixture
def test_series():
return Series(
np.random.default_rng(2).standard_normal(1000),
index=date_range("1/1/2000", periods=1000),
)
def test_apply(test_series):
grouper = Grouper(freq="YE", label="right", closed="right")
grouped = test_series.groupby(grouper)
def f(x):
return x.sort_values()[-3:]
applied = grouped.apply(f)
expected = test_series.groupby(lambda x: x.year).apply(f)
applied.index = applied.index.droplevel(0)
expected.index = expected.index.droplevel(0)
tm.assert_series_equal(applied, expected)
def test_count(test_series):
test_series[::3] = np.nan
expected = test_series.groupby(lambda x: x.year).count()
grouper = Grouper(freq="YE", label="right", closed="right")
result = test_series.groupby(grouper).count()
expected.index = result.index
tm.assert_series_equal(result, expected)
result = test_series.resample("YE").count()
expected.index = result.index
tm.assert_series_equal(result, expected)
def test_numpy_reduction(test_series):
result = test_series.resample("YE", closed="right").prod()
expected = test_series.groupby(lambda x: x.year).agg(np.prod)
expected.index = result.index
tm.assert_series_equal(result, expected)
def test_apply_iteration():
# #2300
N = 1000
ind = date_range(start="2000-01-01", freq="D", periods=N)
df = DataFrame({"open": 1, "close": 2}, index=ind)
tg = Grouper(freq="ME")
grouper, _ = tg._get_grouper(df)
# Errors
grouped = df.groupby(grouper, group_keys=False)
def f(df):
return df["close"] / df["open"]
# it works!
result = grouped.apply(f)
tm.assert_index_equal(result.index, df.index)
@pytest.mark.parametrize(
"index",
[
Index([1, 2]),
Index(["a", "b"]),
Index([1.1, 2.2]),
pd.MultiIndex.from_arrays([[1, 2], ["a", "b"]]),
],
)
def test_fails_on_no_datetime_index(index):
name = type(index).__name__
df = DataFrame({"a": range(len(index))}, index=index)
msg = (
"Only valid with DatetimeIndex, TimedeltaIndex "
f"or PeriodIndex, but got an instance of '{name}'"
)
with pytest.raises(TypeError, match=msg):
df.groupby(Grouper(freq="D"))
def test_aaa_group_order():
# GH 12840
# check TimeGrouper perform stable sorts
n = 20
data = np.random.default_rng(2).standard_normal((n, 4))
df = DataFrame(data, columns=["A", "B", "C", "D"])
df["key"] = [
datetime(2013, 1, 1),
datetime(2013, 1, 2),
datetime(2013, 1, 3),
datetime(2013, 1, 4),
datetime(2013, 1, 5),
] * 4
grouped = df.groupby(Grouper(key="key", freq="D"))
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
def test_aggregate_normal(resample_method):
"""Check TimeGrouper's aggregation is identical as normal groupby."""
data = np.random.default_rng(2).standard_normal((20, 4))
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, 3, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = Index(
[
datetime(2013, 1, 1),
datetime(2013, 1, 2),
datetime(2013, 1, 3),
datetime(2013, 1, 4),
datetime(2013, 1, 5),
]
* 4,
dtype="M8[ns]",
)
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
expected = getattr(normal_grouped, resample_method)()
dt_result = getattr(dt_grouped, resample_method)()
expected.index = date_range(
start="2013-01-01", freq="D", periods=5, unit="ns", name="key"
)
tm.assert_equal(expected, dt_result)
@pytest.mark.xfail(reason="if TimeGrouper is used included, 'nth' doesn't work yet")
def test_aggregate_nth():
"""Check TimeGrouper's aggregation is identical as normal groupby."""
data = np.random.default_rng(2).standard_normal((20, 4))
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, 3, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = [
datetime(2013, 1, 1),
datetime(2013, 1, 2),
datetime(2013, 1, 3),
datetime(2013, 1, 4),
datetime(2013, 1, 5),
] * 4
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
expected = normal_grouped.nth(3)
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
dt_result = dt_grouped.nth(3)
tm.assert_frame_equal(expected, dt_result)
@pytest.mark.parametrize(
"method, method_args, unit",
[
("sum", {}, 0),
("sum", {"min_count": 0}, 0),
("sum", {"min_count": 1}, np.nan),
("prod", {}, 1),
("prod", {"min_count": 0}, 1),
("prod", {"min_count": 1}, np.nan),
],
)
def test_resample_entirely_nat_window(method, method_args, unit):
ser = Series([0] * 2 + [np.nan] * 2, index=date_range("2017", periods=4, unit="ns"))
result = methodcaller(method, **method_args)(ser.resample("2D"))
exp_dti = pd.DatetimeIndex(["2017-01-01", "2017-01-03"], dtype="M8[ns]", freq="2D")
expected = Series([0.0, unit], index=exp_dti)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"func, fill_value",
[("min", np.nan), ("max", np.nan), ("sum", 0), ("prod", 1), ("count", 0)],
)
def test_aggregate_with_nat(func, fill_value):
# check TimeGrouper's aggregation is identical as normal groupby
# if NaT is included, 'var', 'std', 'mean', 'first','last'
# and 'nth' doesn't work yet
n = 20
data = np.random.default_rng(2).standard_normal((n, 4)).astype("int64")
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = Index(
[
datetime(2013, 1, 1),
datetime(2013, 1, 2),
pd.NaT,
datetime(2013, 1, 4),
datetime(2013, 1, 5),
]
* 4,
dtype="M8[ns]",
)
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
normal_result = getattr(normal_grouped, func)()
dt_result = getattr(dt_grouped, func)()
pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"])
expected = pd.concat([normal_result, pad])
expected = expected.sort_index()
dti = date_range(
start="2013-01-01",
freq="D",
periods=5,
name="key",
unit=dt_df["key"]._values.unit,
)
expected.index = dti._with_freq(None) # TODO: is this desired?
tm.assert_frame_equal(expected, dt_result)
assert dt_result.index.name == "key"
def test_aggregate_with_nat_size():
# GH 9925
n = 20
data = np.random.default_rng(2).standard_normal((n, 4)).astype("int64")
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = Index(
[
datetime(2013, 1, 1),
datetime(2013, 1, 2),
pd.NaT,
datetime(2013, 1, 4),
datetime(2013, 1, 5),
]
* 4,
dtype="M8[ns]",
)
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
normal_result = normal_grouped.size()
dt_result = dt_grouped.size()
pad = Series([0], index=[3])
expected = pd.concat([normal_result, pad])
expected = expected.sort_index()
expected.index = date_range(
start="2013-01-01",
freq="D",
periods=5,
name="key",
unit=dt_df["key"]._values.unit,
)._with_freq(None)
tm.assert_series_equal(expected, dt_result)
assert dt_result.index.name == "key"
def test_repr():
# GH18203
result = repr(Grouper(key="A", freq="h"))
expected = (
"TimeGrouper(key='A', freq=<Hour>, sort=True, dropna=True, "
"closed='left', label='left', how='mean', "
"convention='e', origin='start_day')"
)
assert result == expected
result = repr(Grouper(key="A", freq="h", origin="2000-01-01"))
expected = (
"TimeGrouper(key='A', freq=<Hour>, sort=True, dropna=True, "
"closed='left', label='left', how='mean', "
"convention='e', origin=Timestamp('2000-01-01 00:00:00'))"
)
assert result == expected
@pytest.mark.parametrize(
"method, method_args, expected_values",
[
("sum", {}, [1, 0, 1]),
("sum", {"min_count": 0}, [1, 0, 1]),
("sum", {"min_count": 1}, [1, np.nan, 1]),
("sum", {"min_count": 2}, [np.nan, np.nan, np.nan]),
("prod", {}, [1, 1, 1]),
("prod", {"min_count": 0}, [1, 1, 1]),
("prod", {"min_count": 1}, [1, np.nan, 1]),
("prod", {"min_count": 2}, [np.nan, np.nan, np.nan]),
],
)
def test_upsample_sum(method, method_args, expected_values):
ser = Series(1, index=date_range("2017", periods=2, freq="h", unit="ns"))
resampled = ser.resample("30min")
index = pd.DatetimeIndex(
["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"],
dtype="M8[ns]",
freq="30min",
)
result = methodcaller(method, **method_args)(resampled)
expected = Series(expected_values, index=index)
tm.assert_series_equal(result, expected)
@pytest.fixture
def groupy_test_df():
return DataFrame(
{"price": [10, 11, 9], "volume": [50, 60, 50]},
index=date_range("01/01/2018", periods=3, freq="W", unit="ns"),
)
def test_groupby_resample_interpolate_raises(groupy_test_df):
# GH 35325
# Make a copy of the test data frame that has index.name=None
groupy_test_df_without_index_name = groupy_test_df.copy()
groupy_test_df_without_index_name.index.name = None
dfs = [groupy_test_df, groupy_test_df_without_index_name]
for df in dfs:
with pytest.raises(
NotImplementedError,
match="Direct interpolation of MultiIndex data frames is not supported",
):
df.groupby("volume").resample("1D").interpolate(method="linear")
def test_groupby_resample_interpolate_with_apply_syntax(groupy_test_df):
# GH 35325
# Make a copy of the test data frame that has index.name=None
groupy_test_df_without_index_name = groupy_test_df.copy()
groupy_test_df_without_index_name.index.name = None
dfs = [groupy_test_df, groupy_test_df_without_index_name]
for df in dfs:
result = df.groupby("volume").apply(
lambda x: x.resample("1D").interpolate(method="linear"),
)
volume = [50] * 15 + [60]
week_starting = [
*list(date_range("2018-01-07", "2018-01-21", unit="ns")),
Timestamp("2018-01-14"),
]
expected_ind = pd.MultiIndex.from_arrays(
[volume, week_starting],
names=["volume", df.index.name],
)
expected = DataFrame(
data={
"price": [
10.0,
9.928571428571429,
9.857142857142858,
9.785714285714286,
9.714285714285714,
9.642857142857142,
9.571428571428571,
9.5,
9.428571428571429,
9.357142857142858,
9.285714285714286,
9.214285714285714,
9.142857142857142,
9.071428571428571,
9.0,
11.0,
]
},
index=expected_ind,
)
tm.assert_frame_equal(result, expected)
def test_groupby_resample_interpolate_with_apply_syntax_off_grid(groupy_test_df):
"""Similar test as test_groupby_resample_interpolate_with_apply_syntax but
with resampling that results in missing anchor points when interpolating.
See GH#21351."""
# GH#21351
result = groupy_test_df.groupby("volume").apply(
lambda x: x.resample("265h").interpolate(method="linear")
)
volume = [50, 50, 60]
week_starting = pd.DatetimeIndex(
[
Timestamp("2018-01-07"),
Timestamp("2018-01-18 01:00:00"),
Timestamp("2018-01-14"),
]
).as_unit("ns")
expected_ind = pd.MultiIndex.from_arrays(
[volume, week_starting],
names=["volume", "week_starting"],
)
expected = DataFrame(
data={"price": [10.0, 9.5, 11.0]},
index=expected_ind,
)
tm.assert_frame_equal(result, expected, check_names=False)

View File

@@ -0,0 +1,218 @@
from datetime import timedelta
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm
from pandas.core.indexes.timedeltas import timedelta_range
def test_asfreq_bug():
df = DataFrame(data=[1, 3], index=[timedelta(), timedelta(minutes=3)])
result = df.resample("1min").asfreq()
expected = DataFrame(
data=[1, np.nan, np.nan, 3],
index=timedelta_range("0 day", periods=4, freq="1min", unit="us"),
)
tm.assert_frame_equal(result, expected)
def test_resample_with_nat():
# GH 13223
index = pd.to_timedelta(["0s", pd.NaT, "2s"])
result = DataFrame({"value": [2, 3, 5]}, index).resample("1s").mean()
expected = DataFrame(
{"value": [2.5, np.nan, 5.0]},
index=timedelta_range("0 day", periods=3, freq="1s"),
)
tm.assert_frame_equal(result, expected)
def test_resample_as_freq_with_subperiod():
# GH 13022
index = timedelta_range("00:00:00", "00:10:00", freq="5min")
df = DataFrame(data={"value": [1, 5, 10]}, index=index)
result = df.resample("2min").asfreq()
expected_data = {"value": [1, np.nan, np.nan, np.nan, np.nan, 10]}
expected = DataFrame(
data=expected_data, index=timedelta_range("00:00:00", "00:10:00", freq="2min")
)
tm.assert_frame_equal(result, expected)
def test_resample_with_timedeltas():
expected = DataFrame({"A": np.arange(1480)})
expected = expected.groupby(expected.index // 30).sum()
expected.index = timedelta_range("0 days", freq="30min", periods=50)
df = DataFrame(
{"A": np.arange(1480)},
index=pd.to_timedelta(np.arange(1480), unit="min").as_unit("us"),
)
result = df.resample("30min").sum()
tm.assert_frame_equal(result, expected)
s = df["A"]
result = s.resample("30min").sum()
tm.assert_series_equal(result, expected["A"])
def test_resample_single_period_timedelta():
s = Series(list(range(5)), index=timedelta_range("1 day", freq="s", periods=5))
result = s.resample("2s").sum()
expected = Series([1, 5, 4], index=timedelta_range("1 day", freq="2s", periods=3))
tm.assert_series_equal(result, expected)
def test_resample_timedelta_idempotency():
# GH 12072
index = timedelta_range("0", periods=9, freq="10ms")
series = Series(range(9), index=index)
result = series.resample("10ms").mean()
expected = series.astype(float)
tm.assert_series_equal(result, expected)
def test_resample_offset_with_timedeltaindex():
# GH 10530 & 31809
rng = timedelta_range(start="0s", periods=25, freq="s")
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
with_base = ts.resample("2s", offset="5s").mean()
without_base = ts.resample("2s").mean()
exp_without_base = timedelta_range(start="0s", end="25s", freq="2s")
exp_with_base = timedelta_range(start="5s", end="29s", freq="2s")
tm.assert_index_equal(without_base.index, exp_without_base)
tm.assert_index_equal(with_base.index, exp_with_base)
def test_resample_categorical_data_with_timedeltaindex():
# GH #12169
df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s"))
df["Group"] = df["Group_obj"].astype("category")
result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
exp_tdi = pd.TimedeltaIndex(np.array([0, 10], dtype="m8[s]"), freq="10s")
expected = DataFrame(
{"Group_obj": ["A", "A"], "Group": ["A", "A"]},
index=exp_tdi,
)
expected = expected.reindex(["Group_obj", "Group"], axis=1)
expected["Group"] = expected["Group_obj"].astype("category")
tm.assert_frame_equal(result, expected)
def test_resample_timedelta_values():
# GH 13119
# check that timedelta dtype is preserved when NaT values are
# introduced by the resampling
times = timedelta_range("1 day", "6 day", freq="4D")
df = DataFrame({"time": times}, index=times)
times2 = timedelta_range("1 day", "6 day", freq="2D")
exp = Series(times2, index=times2, name="time")
exp.iloc[1] = pd.NaT
res = df.resample("2D").first()["time"]
tm.assert_series_equal(res, exp)
res = df["time"].resample("2D").first()
tm.assert_series_equal(res, exp)
@pytest.mark.parametrize(
"start, end, freq, resample_freq",
[
("8h", "21h59min50s", "10s", "3h"), # GH 30353 example
("3h", "22h", "1h", "5h"),
("527D", "5006D", "3D", "10D"),
("1D", "10D", "1D", "2D"), # GH 13022 example
# tests that worked before GH 33498:
("8h", "21h59min50s", "10s", "2h"),
("0h", "21h59min50s", "10s", "3h"),
("10D", "85D", "D", "2D"),
],
)
def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
# GH 33498
# check that the timedelta bins does not contains an extra bin
idx = timedelta_range(start=start, end=end, freq=freq)
s = Series(np.arange(len(idx)), index=idx)
result = s.resample(resample_freq).min()
expected_index = timedelta_range(freq=resample_freq, start=start, end=end)
tm.assert_index_equal(result.index, expected_index)
assert result.index.freq == expected_index.freq
assert not np.isnan(result.iloc[-1])
@pytest.mark.parametrize("duplicates", [True, False])
def test_resample_with_timedelta_yields_no_empty_groups(duplicates):
# GH 10603
df = DataFrame(
np.random.default_rng(2).normal(size=(10000, 4)),
index=timedelta_range(start="0s", periods=10000, freq="3906250ns"),
)
if duplicates:
# case with non-unique columns
df.columns = ["A", "B", "A", "C"]
result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))
expected = DataFrame(
[[768] * 4] * 12 + [[528] * 4],
index=timedelta_range(start="1s", periods=13, freq="3s", unit="ns"),
)
expected.columns = df.columns
tm.assert_frame_equal(result, expected)
def test_resample_quantile_timedelta(unit):
# GH: 29485
dtype = np.dtype(f"m8[{unit}]")
df = DataFrame(
{"value": pd.to_timedelta(np.arange(4), unit="s").astype(dtype)},
index=pd.date_range("20200101", periods=4, tz="UTC"),
)
result = df.resample("2D").quantile(0.99)
expected = DataFrame(
{
"value": [
pd.Timedelta("0 days 00:00:00.990000"),
pd.Timedelta("0 days 00:00:02.990000"),
]
},
index=pd.date_range("20200101", periods=2, tz="UTC", freq="2D"),
).astype(dtype)
tm.assert_frame_equal(result, expected)
def test_resample_closed_right():
# GH#45414
idx = pd.Index([pd.Timedelta(seconds=120 + i * 30) for i in range(10)])
ser = Series(range(10), index=idx)
result = ser.resample("min", closed="right", label="right").sum()
expected = Series(
[0, 3, 7, 11, 15, 9],
index=pd.TimedeltaIndex(
[pd.Timedelta(seconds=120 + i * 60) for i in range(6)], freq="min"
),
)
tm.assert_series_equal(result, expected)
@td.skip_if_no("pyarrow")
def test_arrow_duration_resample():
# GH 56371
idx = pd.Index(timedelta_range("1 day", periods=5), dtype="duration[ns][pyarrow]")
expected = Series(np.arange(5, dtype=np.float64), index=idx)
result = expected.resample("1D").mean()
tm.assert_series_equal(result, expected)