initial commit

This commit is contained in:
2026-05-11 12:36:20 +05:30
commit 384cbe8019
15377 changed files with 2360544 additions and 0 deletions

View File

@@ -0,0 +1,193 @@
import numpy as np
import pytest
from pandas._libs import iNaT
from pandas.core.dtypes.dtypes import DatetimeTZDtype
import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import DatetimeArray
class TestDatetimeArrayConstructor:
def test_from_sequence_invalid_type(self):
mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)])
with pytest.raises(TypeError, match="Cannot create a DatetimeArray"):
DatetimeArray._from_sequence(mi, dtype="M8[ns]")
@pytest.mark.parametrize(
"meth",
[
DatetimeArray._from_sequence,
pd.to_datetime,
pd.DatetimeIndex,
],
)
def test_mixing_naive_tzaware_raises(self, meth):
# GH#24569
arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")])
msg = "|".join(
[
"Cannot mix tz-aware with tz-naive values",
"Tz-aware datetime.datetime cannot be converted "
"to datetime64 unless utc=True",
]
)
for obj in [arr, arr[::-1]]:
# check that we raise regardless of whether naive is found
# before aware or vice-versa
with pytest.raises(ValueError, match=msg):
meth(obj)
def test_from_pandas_array(self):
arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9
result = DatetimeArray._from_sequence(arr, dtype="M8[ns]")._with_freq("infer")
expected = pd.date_range("1970-01-01", periods=5, freq="h", unit="ns")._data
tm.assert_datetime_array_equal(result, expected)
def test_bool_dtype_raises(self):
arr = np.array([1, 2, 3], dtype="bool")
msg = r"dtype bool cannot be converted to datetime64\[ns\]"
with pytest.raises(TypeError, match=msg):
DatetimeArray._from_sequence(arr, dtype="M8[ns]")
with pytest.raises(TypeError, match=msg):
pd.DatetimeIndex(arr)
with pytest.raises(TypeError, match=msg):
pd.to_datetime(arr)
def test_copy(self):
data = np.array([1, 2, 3], dtype="M8[ns]")
arr = DatetimeArray._from_sequence(data, dtype=data.dtype, copy=False)
assert arr._ndarray is data
arr = DatetimeArray._from_sequence(data, dtype=data.dtype, copy=True)
assert arr._ndarray is not data
def test_numpy_datetime_unit(self, unit):
data = np.array([1, 2, 3], dtype=f"M8[{unit}]")
arr = DatetimeArray._from_sequence(data)
assert arr.unit == unit
assert arr[0].unit == unit
class TestSequenceToDT64NS:
def test_tz_dtype_mismatch_raises(self):
arr = DatetimeArray._from_sequence(
["2000"], dtype=DatetimeTZDtype(tz="US/Central")
)
with pytest.raises(TypeError, match="data is already tz-aware"):
DatetimeArray._from_sequence(arr, dtype=DatetimeTZDtype(tz="UTC"))
def test_tz_dtype_matches(self):
dtype = DatetimeTZDtype(tz="US/Central")
arr = DatetimeArray._from_sequence(["2000"], dtype=dtype)
result = DatetimeArray._from_sequence(arr, dtype=dtype)
tm.assert_equal(arr, result)
@pytest.mark.parametrize("order", ["F", "C"])
def test_2d(self, order):
dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific")
arr = np.array(dti, dtype=object).reshape(3, 2)
if order == "F":
arr = arr.T
res = DatetimeArray._from_sequence(arr, dtype=dti.dtype)
expected = DatetimeArray._from_sequence(arr.ravel(), dtype=dti.dtype).reshape(
arr.shape
)
tm.assert_datetime_array_equal(res, expected)
# ----------------------------------------------------------------------------
# Arrow interaction
EXTREME_VALUES = [0, 123456789, None, iNaT, 2**63 - 1, -(2**63) + 1]
FINE_TO_COARSE_SAFE = [123_000_000_000, None, -123_000_000_000]
COARSE_TO_FINE_SAFE = [123, None, -123]
@pytest.mark.parametrize(
("pa_unit", "pd_unit", "pa_tz", "pd_tz", "data"),
[
("s", "s", "UTC", "UTC", EXTREME_VALUES),
("ms", "ms", "UTC", "Europe/Berlin", EXTREME_VALUES),
("us", "us", "US/Eastern", "UTC", EXTREME_VALUES),
("ns", "ns", "US/Central", "Asia/Kolkata", EXTREME_VALUES),
("ns", "s", "UTC", "UTC", FINE_TO_COARSE_SAFE),
("us", "ms", "UTC", "Europe/Berlin", FINE_TO_COARSE_SAFE),
("ms", "us", "US/Eastern", "UTC", COARSE_TO_FINE_SAFE),
("s", "ns", "US/Central", "Asia/Kolkata", COARSE_TO_FINE_SAFE),
],
)
def test_from_arrow_with_different_units_and_timezones_with(
pa_unit, pd_unit, pa_tz, pd_tz, data
):
pa = pytest.importorskip("pyarrow")
pa_type = pa.timestamp(pa_unit, tz=pa_tz)
arr = pa.array(data, type=pa_type)
dtype = DatetimeTZDtype(unit=pd_unit, tz=pd_tz)
result = dtype.__from_arrow__(arr)
expected = DatetimeArray._from_sequence(data, dtype=f"M8[{pa_unit}, UTC]").astype(
dtype, copy=False
)
tm.assert_extension_array_equal(result, expected)
result = dtype.__from_arrow__(pa.chunked_array([arr]))
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize(
("unit", "tz"),
[
("s", "UTC"),
("ms", "Europe/Berlin"),
("us", "US/Eastern"),
("ns", "Asia/Kolkata"),
("ns", "UTC"),
],
)
def test_from_arrow_from_empty(unit, tz):
pa = pytest.importorskip("pyarrow")
data = []
arr = pa.array(data)
dtype = DatetimeTZDtype(unit=unit, tz=tz)
result = dtype.__from_arrow__(arr)
expected = DatetimeArray._from_sequence(
np.array(data, dtype=f"datetime64[{unit}]"), dtype=np.dtype(f"M8[{unit}]")
)
expected = expected.tz_localize(tz=tz)
tm.assert_extension_array_equal(result, expected)
result = dtype.__from_arrow__(pa.chunked_array([arr]))
tm.assert_extension_array_equal(result, expected)
def test_from_arrow_from_integers():
pa = pytest.importorskip("pyarrow")
data = [0, 123456789, None, 2**63 - 1, iNaT, -123456789]
arr = pa.array(data)
dtype = DatetimeTZDtype(unit="ns", tz="UTC")
result = dtype.__from_arrow__(arr)
expected = DatetimeArray._from_sequence(
np.array(data, dtype="datetime64[ns]"), dtype=np.dtype("M8[ns]")
)
expected = expected.tz_localize("UTC")
tm.assert_extension_array_equal(result, expected)
result = dtype.__from_arrow__(pa.chunked_array([arr]))
tm.assert_extension_array_equal(result, expected)

View File

@@ -0,0 +1,44 @@
import pytest
import pandas._testing as tm
from pandas.core.arrays import DatetimeArray
class TestAccumulator:
def test_accumulators_freq(self):
# GH#50297
arr = DatetimeArray._from_sequence(
[
"2000-01-01",
"2000-01-02",
"2000-01-03",
],
dtype="M8[ns]",
)._with_freq("infer")
result = arr._accumulate("cummin")
expected = DatetimeArray._from_sequence(["2000-01-01"] * 3, dtype="M8[ns]")
tm.assert_datetime_array_equal(result, expected)
result = arr._accumulate("cummax")
expected = DatetimeArray._from_sequence(
[
"2000-01-01",
"2000-01-02",
"2000-01-03",
],
dtype="M8[ns]",
)
tm.assert_datetime_array_equal(result, expected)
@pytest.mark.parametrize("func", ["cumsum", "cumprod"])
def test_accumulators_disallowed(self, func):
# GH#50297
arr = DatetimeArray._from_sequence(
[
"2000-01-01",
"2000-01-02",
],
dtype="M8[ns]",
)._with_freq("infer")
with pytest.raises(TypeError, match=f"Accumulation {func}"):
arr._accumulate(func)

View File

@@ -0,0 +1,176 @@
import numpy as np
import pytest
from pandas.core.dtypes.dtypes import DatetimeTZDtype
import pandas as pd
from pandas import NaT
import pandas._testing as tm
from pandas.core.arrays import DatetimeArray
class TestReductions:
@pytest.fixture
def arr1d(self, tz_naive_fixture):
"""Fixture returning DatetimeArray with parametrized timezones"""
tz = tz_naive_fixture
dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]")
arr = DatetimeArray._from_sequence(
[
"2000-01-03",
"2000-01-03",
"NaT",
"2000-01-02",
"2000-01-05",
"2000-01-04",
],
dtype=dtype,
)
return arr
def test_min_max(self, arr1d, unit):
arr = arr1d
arr = arr.as_unit(unit)
tz = arr.tz
result = arr.min()
expected = pd.Timestamp("2000-01-02", tz=tz).as_unit(unit)
assert result == expected
assert result.unit == expected.unit
result = arr.max()
expected = pd.Timestamp("2000-01-05", tz=tz).as_unit(unit)
assert result == expected
assert result.unit == expected.unit
result = arr.min(skipna=False)
assert result is NaT
result = arr.max(skipna=False)
assert result is NaT
@pytest.mark.parametrize("tz", [None, "US/Central"])
def test_min_max_empty(self, skipna, tz):
dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]")
arr = DatetimeArray._from_sequence([], dtype=dtype)
result = arr.min(skipna=skipna)
assert result is NaT
result = arr.max(skipna=skipna)
assert result is NaT
@pytest.mark.parametrize("tz", [None, "US/Central"])
def test_median_empty(self, skipna, tz):
dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]")
arr = DatetimeArray._from_sequence([], dtype=dtype)
result = arr.median(skipna=skipna)
assert result is NaT
arr = arr.reshape(0, 3)
result = arr.median(axis=0, skipna=skipna)
expected = type(arr)._from_sequence([NaT, NaT, NaT], dtype=arr.dtype)
tm.assert_equal(result, expected)
result = arr.median(axis=1, skipna=skipna)
expected = type(arr)._from_sequence([], dtype=arr.dtype)
tm.assert_equal(result, expected)
def test_median(self, arr1d):
arr = arr1d
result = arr.median()
assert result == arr[0]
result = arr.median(skipna=False)
assert result is NaT
result = arr.dropna().median(skipna=False)
assert result == arr[0]
result = arr.median(axis=0)
assert result == arr[0]
def test_median_axis(self, arr1d):
arr = arr1d
assert arr.median(axis=0) == arr.median()
assert arr.median(axis=0, skipna=False) is NaT
msg = r"abs\(axis\) must be less than ndim"
with pytest.raises(ValueError, match=msg):
arr.median(axis=1)
@pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning")
def test_median_2d(self, arr1d):
arr = arr1d.reshape(1, -1)
# axis = None
assert arr.median() == arr1d.median()
assert arr.median(skipna=False) is NaT
# axis = 0
result = arr.median(axis=0)
expected = arr1d
tm.assert_equal(result, expected)
# Since column 3 is all-NaT, we get NaT there with or without skipna
result = arr.median(axis=0, skipna=False)
expected = arr1d
tm.assert_equal(result, expected)
# axis = 1
result = arr.median(axis=1)
expected = type(arr)._from_sequence([arr1d.median()], dtype=arr.dtype)
tm.assert_equal(result, expected)
result = arr.median(axis=1, skipna=False)
expected = type(arr)._from_sequence([NaT], dtype=arr.dtype)
tm.assert_equal(result, expected)
def test_mean(self, arr1d):
arr = arr1d
# manually verified result
expected = arr[0] + 0.4 * pd.Timedelta(days=1)
result = arr.mean()
assert result == expected
result = arr.mean(skipna=False)
assert result is NaT
result = arr.dropna().mean(skipna=False)
assert result == expected
result = arr.mean(axis=0)
assert result == expected
def test_mean_2d(self):
dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific", unit="ns")
dta = dti._data.reshape(3, 2)
result = dta.mean(axis=0)
expected = dta[1]
tm.assert_datetime_array_equal(result, expected)
result = dta.mean(axis=1)
expected = dta[:, 0] + pd.Timedelta(hours=12)
tm.assert_datetime_array_equal(result, expected)
result = dta.mean(axis=None)
expected = dti.mean()
assert result == expected
def test_mean_empty(self, arr1d, skipna):
arr = arr1d[:0]
assert arr.mean(skipna=skipna) is NaT
arr2d = arr.reshape(0, 3)
result = arr2d.mean(axis=0, skipna=skipna)
expected = DatetimeArray._from_sequence([NaT, NaT, NaT], dtype=arr.dtype)
tm.assert_datetime_array_equal(result, expected)
result = arr2d.mean(axis=1, skipna=skipna)
expected = arr # i.e. 1D, empty
tm.assert_datetime_array_equal(result, expected)
result = arr2d.mean(axis=None, skipna=skipna)
assert result is NaT