initial commit
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,86 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
array,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:Setting a value on a view:FutureWarning"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [lambda x: x, DatetimeIndex])
|
||||
def test_datetimeindex(box):
|
||||
dt = date_range("2019-12-31", periods=3, freq="D")
|
||||
ser = Series(dt)
|
||||
idx = box(DatetimeIndex(ser))
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Timestamp("2020-12-31")
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_datetimeindex_tz_convert():
|
||||
dt = date_range("2019-12-31", periods=3, freq="D", tz="Europe/Berlin")
|
||||
ser = Series(dt)
|
||||
idx = DatetimeIndex(ser).tz_convert("US/Eastern")
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Timestamp("2020-12-31", tz="Europe/Berlin")
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_datetimeindex_tz_localize():
|
||||
dt = date_range("2019-12-31", periods=3, freq="D")
|
||||
ser = Series(dt)
|
||||
idx = DatetimeIndex(ser).tz_localize("Europe/Berlin")
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Timestamp("2020-12-31")
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_datetimeindex_isocalendar():
|
||||
dt = date_range("2019-12-31", periods=3, freq="D")
|
||||
ser = Series(dt)
|
||||
df = DatetimeIndex(ser).isocalendar()
|
||||
expected = df.index.copy(deep=True)
|
||||
ser.iloc[0] = Timestamp("2020-12-31")
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
|
||||
|
||||
def test_index_values():
|
||||
idx = date_range("2019-12-31", periods=3, freq="D")
|
||||
result = idx.values
|
||||
assert result.flags.writeable is False
|
||||
|
||||
|
||||
def test_constructor_copy_input_datetime_ndarray_default():
|
||||
# GH 63388
|
||||
arr = np.array(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
|
||||
idx = DatetimeIndex(arr)
|
||||
assert not np.shares_memory(arr, get_array(idx))
|
||||
|
||||
|
||||
def test_constructor_copy_input_datetime_ea_default():
|
||||
# GH 63388
|
||||
arr = array(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
|
||||
idx = DatetimeIndex(arr)
|
||||
assert not tm.shares_memory(arr, idx.array)
|
||||
|
||||
|
||||
def test_series_from_temporary_datetimeindex_readonly_data():
|
||||
# GH 63388
|
||||
arr = np.array(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
|
||||
arr.flags.writeable = False
|
||||
ser = Series(DatetimeIndex(arr))
|
||||
assert not np.shares_memory(arr, get_array(ser))
|
||||
ser.iloc[0] = Timestamp("2020-01-01")
|
||||
expected = Series(
|
||||
[Timestamp("2020-01-01"), Timestamp("2020-01-02")], dtype="datetime64[ns]"
|
||||
)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
@@ -0,0 +1,177 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def index_view(index_data):
|
||||
df = DataFrame({"a": index_data, "b": 1.5})
|
||||
view = df[:]
|
||||
df = df.set_index("a", drop=True)
|
||||
idx = df.index
|
||||
# df = None
|
||||
return idx, view
|
||||
|
||||
|
||||
def test_set_index_update_column():
|
||||
df = DataFrame({"a": [1, 2], "b": 1})
|
||||
df = df.set_index("a", drop=False)
|
||||
expected = df.index.copy(deep=True)
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
|
||||
|
||||
def test_set_index_drop_update_column():
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5})
|
||||
view = df[:]
|
||||
df = df.set_index("a", drop=True)
|
||||
expected = df.index.copy(deep=True)
|
||||
view.iloc[0, 0] = 100
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
|
||||
|
||||
def test_set_index_series():
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5})
|
||||
ser = Series([10, 11])
|
||||
df = df.set_index(ser)
|
||||
expected = df.index.copy(deep=True)
|
||||
ser.iloc[0] = 100
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
|
||||
|
||||
def test_assign_index_as_series():
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5})
|
||||
ser = Series([10, 11])
|
||||
df.index = ser
|
||||
expected = df.index.copy(deep=True)
|
||||
ser.iloc[0] = 100
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
|
||||
|
||||
def test_assign_index_as_index():
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5})
|
||||
ser = Series([10, 11])
|
||||
rhs_index = Index(ser)
|
||||
df.index = rhs_index
|
||||
rhs_index = None # overwrite to clear reference
|
||||
expected = df.index.copy(deep=True)
|
||||
ser.iloc[0] = 100
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
|
||||
|
||||
def test_index_from_series():
|
||||
ser = Series([1, 2])
|
||||
idx = Index(ser)
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = 100
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_index_from_series_copy():
|
||||
ser = Series([1, 2])
|
||||
idx = Index(ser, copy=True) # noqa: F841
|
||||
arr = get_array(ser)
|
||||
ser.iloc[0] = 100
|
||||
assert np.shares_memory(get_array(ser), arr)
|
||||
|
||||
|
||||
def test_index_from_index():
|
||||
ser = Series([1, 2])
|
||||
idx = Index(ser)
|
||||
idx = Index(idx)
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = 100
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
lambda x: x._shallow_copy(x._values),
|
||||
lambda x: x.view(),
|
||||
lambda x: x.take([0, 1]),
|
||||
lambda x: x.repeat([1, 1]),
|
||||
lambda x: x[slice(0, 2)],
|
||||
lambda x: x[[0, 1]],
|
||||
lambda x: x._getitem_slice(slice(0, 2)),
|
||||
lambda x: x.delete([]),
|
||||
lambda x: x.rename("b"),
|
||||
lambda x: x.astype("Int64", copy=False),
|
||||
],
|
||||
ids=[
|
||||
"_shallow_copy",
|
||||
"view",
|
||||
"take",
|
||||
"repeat",
|
||||
"getitem_slice",
|
||||
"getitem_list",
|
||||
"_getitem_slice",
|
||||
"delete",
|
||||
"rename",
|
||||
"astype",
|
||||
],
|
||||
)
|
||||
def test_index_ops(func, request):
|
||||
idx, view_ = index_view([1, 2])
|
||||
expected = idx.copy(deep=True)
|
||||
if "astype" in request.node.callspec.id:
|
||||
expected = expected.astype("Int64")
|
||||
idx = func(idx)
|
||||
view_.iloc[0, 0] = 100
|
||||
tm.assert_index_equal(idx, expected, check_names=False)
|
||||
|
||||
|
||||
def test_infer_objects():
|
||||
idx, view_ = index_view(["a", "b"])
|
||||
expected = idx.copy(deep=True)
|
||||
idx = idx.infer_objects(copy=False)
|
||||
view_.iloc[0, 0] = "aaaa"
|
||||
tm.assert_index_equal(idx, expected, check_names=False)
|
||||
|
||||
|
||||
def test_index_to_frame():
|
||||
idx = Index([1, 2, 3], name="a")
|
||||
expected = idx.copy(deep=True)
|
||||
df = idx.to_frame()
|
||||
assert np.shares_memory(get_array(df, "a"), idx._values)
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_index_values():
|
||||
idx = Index([1, 2, 3])
|
||||
result = idx.values
|
||||
assert result.flags.writeable is False
|
||||
|
||||
|
||||
def test_constructor_copy_input_ndarray_default():
|
||||
arr = np.array([0, 1])
|
||||
idx = Index(arr)
|
||||
assert not np.shares_memory(arr, get_array(idx))
|
||||
|
||||
|
||||
def test_constructor_copy_input_ea_default():
|
||||
arr = array([0, 1], dtype="Int64")
|
||||
idx = Index(arr)
|
||||
assert not tm.shares_memory(arr, idx.array)
|
||||
|
||||
|
||||
def test_series_from_temporary_index_readonly_data():
|
||||
# GH 63370
|
||||
arr = np.array([0, 1], dtype=np.dtype(np.int8))
|
||||
arr.flags.writeable = False
|
||||
ser = Series(Index(arr))
|
||||
assert not np.shares_memory(arr, get_array(ser))
|
||||
assert ser._mgr._has_no_reference(0)
|
||||
ser[[False, True]] = np.array([0, 2], dtype=np.dtype(np.int8))
|
||||
expected = Series([0, 2], dtype=np.dtype(np.int8))
|
||||
tm.assert_series_equal(ser, expected)
|
||||
@@ -0,0 +1,29 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_constructor_copy_input_interval_ea_default():
|
||||
# GH 63388
|
||||
arr = array([Interval(0, 1), Interval(1, 2)])
|
||||
idx = IntervalIndex(arr)
|
||||
assert not tm.shares_memory(arr, idx.array)
|
||||
|
||||
|
||||
def test_series_from_temporary_intervalindex_readonly_data():
|
||||
# GH 63388
|
||||
arr = array([Interval(0, 1), Interval(1, 2)])
|
||||
arr._left.flags.writeable = False
|
||||
arr._right.flags.writeable = False
|
||||
ser = Series(IntervalIndex(arr))
|
||||
assert not np.shares_memory(arr._left, get_array(ser)._left)
|
||||
ser.iloc[0] = Interval(5, 6)
|
||||
expected = Series([Interval(5, 6), Interval(1, 2)], dtype="interval[int64, right]")
|
||||
tm.assert_series_equal(ser, expected)
|
||||
@@ -0,0 +1,47 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Period,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
array,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:Setting a value on a view:FutureWarning"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [lambda x: x, PeriodIndex])
|
||||
def test_periodindex(box):
|
||||
dt = period_range("2019-12-31", periods=3, freq="D")
|
||||
ser = Series(dt)
|
||||
idx = box(PeriodIndex(ser))
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Period("2020-12-31")
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_constructor_copy_input_period_ea_default():
|
||||
# GH 63388
|
||||
arr = array(["2020-01-01", "2020-01-02"], dtype="period[D]")
|
||||
idx = PeriodIndex(arr)
|
||||
assert not tm.shares_memory(arr, idx.array)
|
||||
|
||||
|
||||
def test_series_from_temporary_periodindex_readonly_data():
|
||||
# GH 63388
|
||||
arr = array(["2020-01-01", "2020-01-02"], dtype="period[D]")
|
||||
arr._ndarray.flags.writeable = False
|
||||
ser = Series(PeriodIndex(arr))
|
||||
assert not np.shares_memory(arr._ndarray, get_array(ser))
|
||||
ser.iloc[0] = Period("2022-01-01", freq="D")
|
||||
expected = Series(
|
||||
[Period("2022-01-01", freq="D"), Period("2020-01-02", freq="D")],
|
||||
dtype="period[D]",
|
||||
)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
@@ -0,0 +1,59 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
Timedelta,
|
||||
TimedeltaIndex,
|
||||
array,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:Setting a value on a view:FutureWarning"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cons",
|
||||
[
|
||||
lambda x: TimedeltaIndex(x),
|
||||
lambda x: TimedeltaIndex(TimedeltaIndex(x)),
|
||||
],
|
||||
)
|
||||
def test_timedeltaindex(cons):
|
||||
dt = timedelta_range("1 day", periods=3)
|
||||
ser = Series(dt)
|
||||
idx = cons(ser)
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Timedelta("5 days")
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_constructor_copy_input_timedelta_ndarray_default():
|
||||
# GH 63388
|
||||
arr = np.array([1, 2], dtype="timedelta64[ns]")
|
||||
idx = TimedeltaIndex(arr)
|
||||
assert not np.shares_memory(arr, get_array(idx))
|
||||
|
||||
|
||||
def test_constructor_copy_input_timedelta_ea_default():
|
||||
# GH 63388
|
||||
arr = array([1, 2], dtype="timedelta64[ns]")
|
||||
idx = TimedeltaIndex(arr)
|
||||
assert not tm.shares_memory(arr, idx.array)
|
||||
|
||||
|
||||
def test_series_from_temporary_timedeltaindex_readonly_data():
|
||||
# GH 63388
|
||||
arr = np.array([1, 2], dtype="timedelta64[ns]")
|
||||
arr.flags.writeable = False
|
||||
ser = Series(TimedeltaIndex(arr))
|
||||
assert not np.shares_memory(arr, get_array(ser))
|
||||
ser.iloc[0] = Timedelta(days=1)
|
||||
expected = Series(
|
||||
[Timedelta(days=1), Timedelta(nanoseconds=2)], dtype="timedelta64[ns]"
|
||||
)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
229
venv/Lib/site-packages/pandas/tests/copy_view/test_array.py
Normal file
229
venv/Lib/site-packages/pandas/tests/copy_view/test_array.py
Normal file
@@ -0,0 +1,229 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.numpy import np_version_gt2
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Copy/view behaviour for accessing underlying array of Series/DataFrame
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda ser: ser.values,
|
||||
lambda ser: np.asarray(ser.array),
|
||||
lambda ser: np.asarray(ser),
|
||||
lambda ser: np.array(ser, copy=False),
|
||||
],
|
||||
ids=["values", "array", "np.asarray", "np.array"],
|
||||
)
|
||||
def test_series_values(request, method):
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
ser_orig = ser.copy()
|
||||
|
||||
arr = method(ser)
|
||||
|
||||
if request.node.callspec.id == "array":
|
||||
# https://github.com/pandas-dev/pandas/issues/63099
|
||||
# .array for now does not return a read-only view
|
||||
assert arr.flags.writeable is True
|
||||
# updating the array updates the series
|
||||
arr[0] = 0
|
||||
assert ser.iloc[0] == 0
|
||||
return
|
||||
|
||||
# .values still gives a view but is read-only
|
||||
assert np.shares_memory(arr, get_array(ser, "name"))
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
# mutating series through arr therefore doesn't work
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
arr[0] = 0
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
# mutating the series itself still works
|
||||
ser.iloc[0] = 0
|
||||
assert ser.values[0] == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda df: df.values,
|
||||
lambda df: np.asarray(df),
|
||||
lambda ser: np.array(ser, copy=False),
|
||||
],
|
||||
ids=["values", "asarray", "array"],
|
||||
)
|
||||
def test_dataframe_values(method):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
df_orig = df.copy()
|
||||
|
||||
arr = method(df)
|
||||
|
||||
# .values still gives a view but is read-only
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
# mutating series through arr therefore doesn't work
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
arr[0, 0] = 0
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# mutating the series itself still works
|
||||
df.iloc[0, 0] = 0
|
||||
assert df.values[0, 0] == 0
|
||||
|
||||
|
||||
def test_series_to_numpy():
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
ser_orig = ser.copy()
|
||||
|
||||
# default: copy=False, no dtype or NAs
|
||||
arr = ser.to_numpy()
|
||||
# to_numpy still gives a view but is read-only
|
||||
assert np.shares_memory(arr, get_array(ser, "name"))
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
# mutating series through arr therefore doesn't work
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
arr[0] = 0
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
# mutating the series itself still works
|
||||
ser.iloc[0] = 0
|
||||
assert ser.values[0] == 0
|
||||
|
||||
# specify copy=True gives a writeable array
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
arr = ser.to_numpy(copy=True)
|
||||
assert not np.shares_memory(arr, get_array(ser, "name"))
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
# specifying a dtype that already causes a copy also gives a writeable array
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
arr = ser.to_numpy(dtype="float64")
|
||||
assert not np.shares_memory(arr, get_array(ser, "name"))
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda ser: np.asarray(ser.values),
|
||||
lambda ser: np.asarray(ser.array),
|
||||
lambda ser: np.asarray(ser),
|
||||
lambda ser: np.asarray(ser, dtype="int64"),
|
||||
lambda ser: np.array(ser, copy=False),
|
||||
],
|
||||
ids=["values", "array", "np.asarray", "np.asarray-dtype", "np.array"],
|
||||
)
|
||||
def test_series_values_ea_dtypes(request, method):
|
||||
ser = Series([1, 2, 3], dtype="Int64")
|
||||
ser_orig = ser.copy()
|
||||
|
||||
arr = method(ser)
|
||||
|
||||
if request.node.callspec.id in ("values", "array"):
|
||||
# https://github.com/pandas-dev/pandas/issues/63099
|
||||
# .array/values for now does not return a read-only view
|
||||
assert arr.flags.writeable is True
|
||||
# updating the array updates the series
|
||||
arr[0] = 0
|
||||
assert ser.iloc[0] == 0
|
||||
return
|
||||
|
||||
# conversion to ndarray gives a view but is read-only
|
||||
assert np.shares_memory(arr, get_array(ser))
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
# mutating series through arr therefore doesn't work
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
arr[0] = 0
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
# mutating the series itself still works
|
||||
ser.iloc[0] = 0
|
||||
assert ser.values[0] == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda df: df.values,
|
||||
lambda df: np.asarray(df),
|
||||
lambda df: np.asarray(df, dtype="int64"),
|
||||
lambda df: np.array(df, copy=False),
|
||||
],
|
||||
ids=["values", "np.asarray", "np.asarray-dtype", "np.array"],
|
||||
)
|
||||
def test_dataframe_array_ea_dtypes(method):
|
||||
df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
|
||||
arr = method(df)
|
||||
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
|
||||
def test_dataframe_array_string_dtype():
|
||||
df = DataFrame({"a": ["a", "b"]}, dtype="string[python]")
|
||||
arr = np.asarray(df)
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
|
||||
def test_series_array_string_dtype(any_string_dtype):
|
||||
ser = Series(["a", "b"], dtype=any_string_dtype)
|
||||
arr = np.asarray(ser)
|
||||
if any_string_dtype == "string" and any_string_dtype.storage == "pyarrow":
|
||||
# for pyarrow strings, the numpy arrays is not a view, so also does
|
||||
# not need to be read-only (https://github.com/pandas-dev/pandas/pull/64035)
|
||||
assert not np.shares_memory(arr, get_array(ser))
|
||||
assert arr.flags.writeable is True
|
||||
else:
|
||||
assert np.shares_memory(arr, get_array(ser))
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
|
||||
def test_dataframe_multiple_numpy_dtypes():
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1.5})
|
||||
arr = np.asarray(df)
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
if np_version_gt2:
|
||||
# copy=False semantics are only supported in NumPy>=2.
|
||||
|
||||
with pytest.raises(ValueError, match="Unable to avoid copy while creating"):
|
||||
arr = np.array(df, copy=False)
|
||||
|
||||
arr = np.array(df, copy=True)
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
def test_dataframe_single_block_copy_true():
|
||||
# the copy=False/None cases are tested above in test_dataframe_values
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
arr = np.array(df, copy=True)
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
def test_values_is_ea():
|
||||
df = DataFrame({"a": date_range("2012-01-01", periods=3)})
|
||||
arr = np.asarray(df)
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
|
||||
def test_empty_dataframe():
|
||||
df = DataFrame()
|
||||
arr = np.asarray(df)
|
||||
assert arr.flags.writeable is True
|
||||
230
venv/Lib/site-packages/pandas/tests/copy_view/test_astype.py
Normal file
230
venv/Lib/site-packages/pandas/tests/copy_view/test_astype.py
Normal file
@@ -0,0 +1,230 @@
|
||||
import pickle
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_astype_single_dtype():
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": 1.5})
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype("float64")
|
||||
|
||||
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
# mutating df2 triggers a copy-on-write for that column/block
|
||||
df2.iloc[0, 2] = 5.5
|
||||
assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# mutating parent also doesn't update result
|
||||
df2 = df.astype("float64")
|
||||
df.iloc[0, 2] = 5.5
|
||||
tm.assert_frame_equal(df2, df_orig.astype("float64"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
|
||||
@pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"])
|
||||
def test_astype_avoids_copy(dtype, new_dtype):
|
||||
if new_dtype == "int64[pyarrow]":
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame({"a": [1, 2, 3]}, dtype=dtype)
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype(new_dtype)
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
# mutating df2 triggers a copy-on-write for that column/block
|
||||
df2.iloc[0, 0] = 10
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# mutating parent also doesn't update result
|
||||
df2 = df.astype(new_dtype)
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(df2, df_orig.astype(new_dtype))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float64", "int32", "Int32", "int32[pyarrow]"])
|
||||
def test_astype_different_target_dtype(dtype):
|
||||
if dtype == "int32[pyarrow]":
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype(dtype)
|
||||
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
assert df2._mgr._has_no_reference(0)
|
||||
|
||||
df2.iloc[0, 0] = 5
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# mutating parent also doesn't update result
|
||||
df2 = df.astype(dtype)
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(df2, df_orig.astype(dtype))
|
||||
|
||||
|
||||
def test_astype_numpy_to_ea():
|
||||
ser = Series([1, 2, 3])
|
||||
result = ser.astype("Int64")
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, new_dtype", [("object", "string[python]"), ("string[python]", "object")]
|
||||
)
|
||||
def test_astype_string_and_object(dtype, new_dtype):
|
||||
df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype(new_dtype)
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
df2.iloc[0, 0] = "x"
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, new_dtype", [("object", "string[python]"), ("string[python]", "object")]
|
||||
)
|
||||
def test_astype_string_and_object_update_original(dtype, new_dtype):
|
||||
df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
|
||||
df2 = df.astype(new_dtype)
|
||||
df_orig = df2.copy()
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
df.iloc[0, 0] = "x"
|
||||
tm.assert_frame_equal(df2, df_orig)
|
||||
|
||||
|
||||
def test_astype_str_copy_on_pickle_roundrip():
|
||||
# TODO(infer_string) this test can be removed after 3.0 (once str is the default)
|
||||
# https://github.com/pandas-dev/pandas/issues/54654
|
||||
# ensure_string_array may alter array inplace
|
||||
base = Series(np.array([(1, 2), None, 1], dtype="object"))
|
||||
base_copy = pickle.loads(pickle.dumps(base))
|
||||
base_copy.astype(str)
|
||||
tm.assert_series_equal(base, base_copy)
|
||||
|
||||
|
||||
def test_astype_string_copy_on_pickle_roundrip(any_string_dtype):
|
||||
# https://github.com/pandas-dev/pandas/issues/54654
|
||||
# ensure_string_array may alter array inplace
|
||||
base = Series(np.array([(1, 2), None, 1], dtype="object"))
|
||||
base_copy = pickle.loads(pickle.dumps(base))
|
||||
base_copy.astype(any_string_dtype)
|
||||
tm.assert_series_equal(base, base_copy)
|
||||
|
||||
|
||||
def test_astype_string_read_only_on_pickle_roundrip(any_string_dtype):
|
||||
# https://github.com/pandas-dev/pandas/issues/54654
|
||||
# ensure_string_array may alter read-only array inplace
|
||||
base = Series(np.array([(1, 2), None, 1], dtype="object"))
|
||||
base_copy = pickle.loads(pickle.dumps(base))
|
||||
base_copy._values.flags.writeable = False
|
||||
base_copy.astype(any_string_dtype)
|
||||
tm.assert_series_equal(base, base_copy)
|
||||
|
||||
|
||||
def test_astype_dict_dtypes():
|
||||
df = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")}
|
||||
)
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype({"a": "float64", "c": "float64"})
|
||||
|
||||
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
# mutating df2 triggers a copy-on-write for that column/block
|
||||
df2.iloc[0, 2] = 5.5
|
||||
assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
|
||||
df2.iloc[0, 1] = 10
|
||||
assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_astype_different_datetime_resos():
|
||||
df = DataFrame({"a": date_range("2019-12-31", periods=2, freq="D")})
|
||||
result = df.astype("datetime64[ms]")
|
||||
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
assert result._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_astype_different_timezones():
|
||||
df = DataFrame(
|
||||
{"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific", unit="ns")}
|
||||
)
|
||||
result = df.astype("datetime64[ns, Europe/Berlin]")
|
||||
assert not result._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
|
||||
|
||||
def test_astype_different_timezones_different_reso():
|
||||
df = DataFrame(
|
||||
{"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific", unit="ns")}
|
||||
)
|
||||
result = df.astype("datetime64[ms, Europe/Berlin]")
|
||||
assert result._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
|
||||
|
||||
def test_astype_arrow_timestamp():
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [
|
||||
Timestamp("2020-01-01 01:01:01.000001"),
|
||||
Timestamp("2020-01-01 01:01:01.000001"),
|
||||
]
|
||||
},
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
result = df.astype("timestamp[ns][pyarrow]")
|
||||
assert not result._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(result, "a")._pa_array)
|
||||
|
||||
|
||||
def test_convert_dtypes_infer_objects():
|
||||
ser = Series(["a", "b", "c"])
|
||||
ser_orig = ser.copy()
|
||||
result = ser.convert_dtypes(
|
||||
convert_integer=False,
|
||||
convert_boolean=False,
|
||||
convert_floating=False,
|
||||
convert_string=False,
|
||||
)
|
||||
|
||||
assert tm.shares_memory(get_array(ser), get_array(result))
|
||||
result.iloc[0] = "x"
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
|
||||
def test_convert_dtypes(using_infer_string):
|
||||
df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]})
|
||||
df_orig = df.copy()
|
||||
df2 = df.convert_dtypes()
|
||||
|
||||
if using_infer_string:
|
||||
# String column is already Arrow-backed, so memory is shared
|
||||
assert tm.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
# String column converts from object to Arrow, no memory sharing
|
||||
assert not tm.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
assert tm.shares_memory(get_array(df2, "d"), get_array(df, "d"))
|
||||
assert tm.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
||||
assert tm.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
df2.iloc[0, 0] = "x"
|
||||
df2.iloc[0, 1] = 10
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
@@ -0,0 +1,104 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import CHAINED_WARNING_DISABLED
|
||||
from pandas.errors import ChainedAssignmentError
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])]
|
||||
)
|
||||
def test_series_setitem(indexer):
|
||||
# ensure we only get a single warning for those typical cases of chained
|
||||
# assignment
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
|
||||
# using custom check instead of tm.assert_produces_warning because that doesn't
|
||||
# fail if multiple warnings are raised
|
||||
if CHAINED_WARNING_DISABLED:
|
||||
return
|
||||
with pytest.warns() as record: # noqa: TID251
|
||||
df["a"][indexer] = 0
|
||||
assert len(record) == 1
|
||||
assert record[0].category == ChainedAssignmentError
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", ["a", ["a", "b"], slice(0, 2), np.array([True, False, True])]
|
||||
)
|
||||
def test_frame_setitem(indexer):
|
||||
df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1})
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[0:3][indexer] = 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])]
|
||||
)
|
||||
def test_series_iloc_setitem(indexer):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].iloc[indexer] = 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])]
|
||||
)
|
||||
def test_frame_iloc_setitem(indexer):
|
||||
df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1})
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[0:3].iloc[indexer] = 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])]
|
||||
)
|
||||
def test_series_loc_setitem(indexer):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].loc[indexer] = 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [0, [0, 1], (0, "a"), slice(0, 2), np.array([True, False, True])]
|
||||
)
|
||||
def test_frame_loc_setitem(indexer):
|
||||
df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1})
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[0:3].loc[indexer] = 10
|
||||
|
||||
|
||||
def test_series_at_setitem():
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].at[0] = 0
|
||||
|
||||
|
||||
def test_frame_at_setitem():
|
||||
df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1})
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[0:3].at[0, "a"] = 10
|
||||
|
||||
|
||||
def test_series_iat_setitem():
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].iat[0] = 0
|
||||
|
||||
|
||||
def test_frame_iat_setitem():
|
||||
df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1})
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[0:3].iat[0, 0] = 10
|
||||
72
venv/Lib/site-packages/pandas/tests/copy_view/test_clip.py
Normal file
72
venv/Lib/site-packages/pandas/tests/copy_view/test_clip.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_clip_inplace_reference():
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df_copy = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
df.clip(lower=2, inplace=True)
|
||||
|
||||
assert not np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(df_copy, view)
|
||||
|
||||
|
||||
def test_clip_inplace_reference_no_op():
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df_copy = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
df.clip(lower=0, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert not view._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(df_copy, view)
|
||||
|
||||
|
||||
def test_clip_inplace():
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
arr_a = get_array(df, "a")
|
||||
df.clip(lower=2, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_clip():
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
df2 = df.clip(lower=2)
|
||||
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
assert df._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
|
||||
def test_clip_no_op():
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df2 = df.clip(lower=0)
|
||||
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
|
||||
def test_clip_chained_inplace():
|
||||
df = DataFrame({"a": [1, 4, 2], "b": 1})
|
||||
df_orig = df.copy()
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].clip(1, 2, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[["a"]].clip(1, 2, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
@@ -0,0 +1,382 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_string_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Period,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
Timedelta,
|
||||
TimedeltaIndex,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Copy/view behaviour for Series / DataFrame constructors
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "int64"])
|
||||
def test_series_from_series(dtype):
|
||||
# Case: constructing a Series from another Series object follows CoW rules:
|
||||
# a new object is returned and thus mutations are not propagated
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
|
||||
# default is copy=False -> new Series is a shallow copy / view of original
|
||||
result = Series(ser, dtype=dtype)
|
||||
|
||||
# the shallow copy still shares memory
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
assert result._mgr.blocks[0].refs.has_reference()
|
||||
|
||||
# mutating new series copy doesn't mutate original
|
||||
result.iloc[0] = 0
|
||||
assert ser.iloc[0] == 1
|
||||
# mutating triggered a copy-on-write -> no longer shares memory
|
||||
assert not np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
# the same when modifying the parent
|
||||
result = Series(ser, dtype=dtype)
|
||||
|
||||
# mutating original doesn't mutate new series
|
||||
ser.iloc[0] = 0
|
||||
assert result.iloc[0] == 1
|
||||
|
||||
# forcing copy=False still gives a CoW shallow copy
|
||||
result = Series(ser, dtype=dtype, copy=False)
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
assert result._mgr.blocks[0].refs.has_reference()
|
||||
|
||||
# forcing copy=True still results in an actual hard copy up front
|
||||
result = Series(ser, dtype=dtype, copy=True)
|
||||
assert not np.shares_memory(get_array(ser), get_array(result))
|
||||
assert ser._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_series_from_series_with_reindex():
|
||||
# Case: constructing a Series from another Series with specifying an index
|
||||
# that potentially requires a reindex of the values
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
|
||||
# passing an index that doesn't actually require a reindex of the values
|
||||
# -> still getting a CoW shallow copy
|
||||
for index in [
|
||||
ser.index,
|
||||
ser.index.copy(),
|
||||
list(ser.index),
|
||||
ser.index.rename("idx"),
|
||||
]:
|
||||
result = Series(ser, index=index)
|
||||
assert np.shares_memory(ser.values, result.values)
|
||||
result.iloc[0] = 0
|
||||
assert ser.iloc[0] == 1
|
||||
|
||||
# forcing copy=True still results in an actual hard copy up front
|
||||
result = Series(ser, index=index, copy=True)
|
||||
assert not np.shares_memory(ser.values, result.values)
|
||||
assert not result._mgr.blocks[0].refs.has_reference()
|
||||
|
||||
# ensure that if an actual reindex is needed, we don't have any refs
|
||||
# (mutating the result wouldn't trigger CoW)
|
||||
result = Series(ser, index=[0, 1, 2, 3])
|
||||
assert not np.shares_memory(ser.values, result.values)
|
||||
assert not result._mgr.blocks[0].refs.has_reference()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "int64"])
|
||||
@pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)])
|
||||
@pytest.mark.parametrize(
|
||||
"arr", [np.array([1, 2, 3], dtype="int64"), pd.array([1, 2, 3], dtype="Int64")]
|
||||
)
|
||||
def test_series_from_array(idx, dtype, arr):
|
||||
ser = Series(arr, dtype=dtype, index=idx)
|
||||
ser_orig = ser.copy()
|
||||
data = getattr(arr, "_data", arr)
|
||||
assert not np.shares_memory(get_array(ser), data)
|
||||
|
||||
arr[0] = 100
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
# if the user explicitly passes copy=False, we get an actual view
|
||||
# not protected by CoW
|
||||
ser = Series(arr, dtype=dtype, index=idx, copy=False)
|
||||
assert np.shares_memory(get_array(ser), data)
|
||||
arr[0] = 50
|
||||
assert ser.iloc[0] == 50
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy", [True, False, None])
|
||||
def test_series_from_array_different_dtype(copy):
|
||||
arr = np.array([1, 2, 3], dtype="int64")
|
||||
ser = Series(arr, dtype="int32", copy=copy)
|
||||
assert not np.shares_memory(get_array(ser), arr)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
Index([1, 2]),
|
||||
DatetimeIndex([Timestamp("2019-12-31"), Timestamp("2020-12-31")]),
|
||||
PeriodIndex([Period("2019-12-31"), Period("2020-12-31")]),
|
||||
TimedeltaIndex([Timedelta("1 days"), Timedelta("2 days")]),
|
||||
],
|
||||
)
|
||||
def test_series_from_index(idx):
|
||||
ser = Series(idx)
|
||||
expected = idx.copy(deep=True)
|
||||
assert np.shares_memory(get_array(ser), get_array(idx))
|
||||
assert not ser._mgr._has_no_reference(0)
|
||||
ser.iloc[0] = ser.iloc[1]
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
# forcing copy=False still gives a CoW shallow copy
|
||||
ser = Series(idx, copy=False)
|
||||
assert np.shares_memory(get_array(ser), get_array(idx))
|
||||
assert not ser._mgr._has_no_reference(0)
|
||||
ser.iloc[0] = ser.iloc[1]
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
# forcing copy=True still results in a copy
|
||||
ser = Series(idx, copy=True)
|
||||
assert not np.shares_memory(get_array(ser), get_array(idx))
|
||||
assert ser._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy", [True, False, None])
|
||||
def test_series_from_index_different_dtypes(copy):
|
||||
idx = Index([1, 2, 3], dtype="int64", copy=copy)
|
||||
ser = Series(idx, dtype="int32")
|
||||
assert not np.shares_memory(get_array(ser), get_array(idx))
|
||||
assert ser._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_series_from_block_manager_different_dtype():
|
||||
ser = Series([1, 2, 3], dtype="int64")
|
||||
msg = "Passing a SingleBlockManager to Series"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
ser2 = Series(ser._mgr, dtype="int32")
|
||||
assert not np.shares_memory(get_array(ser), get_array(ser2))
|
||||
assert ser2._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_mgr", [True, False])
|
||||
@pytest.mark.parametrize("columns", [None, ["a"]])
|
||||
def test_dataframe_constructor_mgr_or_df(columns, use_mgr):
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
|
||||
if use_mgr:
|
||||
data = df._mgr
|
||||
warn = DeprecationWarning
|
||||
else:
|
||||
data = df
|
||||
warn = None
|
||||
msg = "Passing a BlockManager to DataFrame"
|
||||
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
|
||||
new_df = DataFrame(data)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
|
||||
new_df.iloc[0] = 100
|
||||
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "int64", "Int64"])
|
||||
@pytest.mark.parametrize("index", [None, [0, 1, 2]])
|
||||
@pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]])
|
||||
def test_dataframe_from_dict_of_series(columns, index, dtype):
|
||||
# Case: constructing a DataFrame from Series objects with copy=False
|
||||
# has to do a lazy following CoW rules
|
||||
# (the default for DataFrame(dict) is still to copy to ensure consolidation)
|
||||
s1 = Series([1, 2, 3])
|
||||
s2 = Series([4, 5, 6])
|
||||
s1_orig = s1.copy()
|
||||
expected = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6]}, index=index, columns=columns, dtype=dtype
|
||||
)
|
||||
|
||||
result = DataFrame(
|
||||
{"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False
|
||||
)
|
||||
|
||||
# the shallow copy still shares memory
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(s1))
|
||||
|
||||
# mutating the new dataframe doesn't mutate original
|
||||
result.iloc[0, 0] = 10
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(s1))
|
||||
tm.assert_series_equal(s1, s1_orig)
|
||||
|
||||
# the same when modifying the parent series
|
||||
s1 = Series([1, 2, 3])
|
||||
s2 = Series([4, 5, 6])
|
||||
result = DataFrame(
|
||||
{"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False
|
||||
)
|
||||
s1.iloc[0] = 10
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(s1))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "int64"])
|
||||
def test_dataframe_from_dict_of_series_with_reindex(dtype):
|
||||
# Case: constructing a DataFrame from Series objects with copy=False
|
||||
# and passing an index that requires an actual (no-view) reindex -> need
|
||||
# to ensure the result doesn't have refs set up to unnecessarily trigger
|
||||
# a copy on write
|
||||
s1 = Series([1, 2, 3])
|
||||
s2 = Series([4, 5, 6])
|
||||
df = DataFrame({"a": s1, "b": s2}, index=[1, 2, 3], dtype=dtype, copy=False)
|
||||
|
||||
# df should own its memory, so mutating shouldn't trigger a copy
|
||||
arr_before = get_array(df, "a")
|
||||
assert not np.shares_memory(arr_before, get_array(s1))
|
||||
df.iloc[0, 0] = 100
|
||||
arr_after = get_array(df, "a")
|
||||
assert np.shares_memory(arr_before, arr_after)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, dtype",
|
||||
[
|
||||
([1, 2], "int64"),
|
||||
# 1D-only EA
|
||||
([1, 2], "Int64"),
|
||||
pytest.param(
|
||||
["a", "b"],
|
||||
"str",
|
||||
marks=pytest.mark.xfail(
|
||||
reason="TODO bug with infer_string=False and specifying dtype='str'"
|
||||
)
|
||||
if not using_string_dtype()
|
||||
else [],
|
||||
),
|
||||
(["a", "b"], object),
|
||||
# 2D EA
|
||||
(
|
||||
[Timestamp("2020", tz="UTC"), Timestamp("2021", tz="UTC")],
|
||||
"datetime64[ns, UTC]",
|
||||
),
|
||||
],
|
||||
ids=["int", "int-ea", "str", "object", "datetime64tz"],
|
||||
)
|
||||
def test_dataframe_from_series_or_index(data, dtype, index_or_series):
|
||||
obj = index_or_series(data, dtype=dtype)
|
||||
obj_orig = obj.copy(deep=True) # deep=True needed for Index
|
||||
|
||||
# default is copy=False -> DataFrame holds a shallow copy of original Index/Series
|
||||
df = DataFrame(obj)
|
||||
assert tm.shares_memory(get_array(obj), get_array(df, 0))
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
|
||||
df.iloc[0, 0] = data[-1]
|
||||
tm.assert_equal(obj, obj_orig)
|
||||
|
||||
# with passing the (identical) dtype -> same
|
||||
df = DataFrame(obj, dtype=dtype)
|
||||
assert tm.shares_memory(get_array(obj), get_array(df, 0))
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
|
||||
df.iloc[0, 0] = data[-1]
|
||||
tm.assert_equal(obj, obj_orig)
|
||||
|
||||
# forcing copy=True still results in an actual hard copy up front
|
||||
df = DataFrame(obj, copy=True)
|
||||
if not (obj.dtype == "str" and obj.dtype.storage == "pyarrow"):
|
||||
# ArrowExtensionArray deep copy still points to the same underlying data
|
||||
assert not tm.shares_memory(get_array(obj), get_array(df, 0))
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
df.iloc[0, 0] = data[-1]
|
||||
tm.assert_equal(obj, obj_orig)
|
||||
|
||||
|
||||
def test_dataframe_from_series_or_index_different_dtype(index_or_series):
|
||||
obj = index_or_series([1, 2], dtype="int64")
|
||||
df = DataFrame(obj, dtype="int32")
|
||||
assert not np.shares_memory(get_array(obj), get_array(df, 0))
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_dataframe_from_series_dont_infer_datetime():
|
||||
ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object)
|
||||
df = DataFrame(ser)
|
||||
assert df.dtypes.iloc[0] == np.dtype(object)
|
||||
assert np.shares_memory(get_array(ser), get_array(df, 0))
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [None, [0, 1, 2]])
|
||||
def test_dataframe_from_dict_of_series_with_dtype(index):
|
||||
# Variant of above, but now passing a dtype that causes a copy
|
||||
# -> need to ensure the result doesn't have refs set up to unnecessarily
|
||||
# trigger a copy on write
|
||||
s1 = Series([1.0, 2.0, 3.0])
|
||||
s2 = Series([4, 5, 6])
|
||||
df = DataFrame({"a": s1, "b": s2}, index=index, dtype="int64", copy=False)
|
||||
|
||||
# df should own its memory, so mutating shouldn't trigger a copy
|
||||
arr_before = get_array(df, "a")
|
||||
assert not np.shares_memory(arr_before, get_array(s1))
|
||||
df.iloc[0, 0] = 100
|
||||
arr_after = get_array(df, "a")
|
||||
assert np.shares_memory(arr_before, arr_after)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy", [False, None, True])
|
||||
def test_dataframe_from_numpy_array(copy):
|
||||
arr = np.array([[1, 2], [3, 4]])
|
||||
df = DataFrame(arr, copy=copy)
|
||||
|
||||
if copy is not False or copy is True:
|
||||
assert not np.shares_memory(get_array(df, 0), arr)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, 0), arr)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, dtype",
|
||||
[
|
||||
# 1D-only EA
|
||||
([1, 2], "Int64"),
|
||||
# 2D EA
|
||||
(
|
||||
[Timestamp("2020", tz="UTC"), Timestamp("2021", tz="UTC")],
|
||||
"datetime64[ns, UTC]",
|
||||
),
|
||||
],
|
||||
ids=["int-ea", "datetime64tz"],
|
||||
)
|
||||
@pytest.mark.parametrize("copy", [False, None, True])
|
||||
def test_dataframe_from_extension_array(copy, data, dtype):
|
||||
arr = pd.array(data, dtype=dtype)
|
||||
df = DataFrame(arr, copy=copy)
|
||||
|
||||
if arr.dtype == "Int64":
|
||||
# to ensure tm.shares_memory works correctly
|
||||
# TODO fix in tm.shares_memory or get_array?
|
||||
arr = arr._data
|
||||
|
||||
if copy is None or copy is True:
|
||||
assert not tm.shares_memory(get_array(df, 0), arr)
|
||||
else:
|
||||
assert tm.shares_memory(get_array(df, 0), arr)
|
||||
|
||||
|
||||
def test_frame_from_dict_of_index():
|
||||
idx = Index([1, 2, 3])
|
||||
expected = idx.copy(deep=True)
|
||||
df = DataFrame({"a": idx}, copy=False)
|
||||
assert np.shares_memory(get_array(df, "a"), idx._values)
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_index_equal(idx, expected)
|
||||
@@ -0,0 +1,100 @@
|
||||
import pytest
|
||||
|
||||
from pandas.errors import Pandas4Warning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
concat,
|
||||
merge,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"meth, kwargs",
|
||||
[
|
||||
("truncate", {}),
|
||||
("tz_convert", {"tz": "UTC"}),
|
||||
("tz_localize", {"tz": "UTC"}),
|
||||
("infer_objects", {}),
|
||||
("astype", {"dtype": "float64"}),
|
||||
("reindex", {"index": [2, 0, 1]}),
|
||||
("transpose", {}),
|
||||
("set_axis", {"labels": [1, 2, 3]}),
|
||||
("rename", {"index": {1: 2}}),
|
||||
("set_flags", {}),
|
||||
("to_period", {}),
|
||||
("to_timestamp", {}),
|
||||
("swaplevel", {"i": 0, "j": 1}),
|
||||
],
|
||||
)
|
||||
def test_copy_deprecation(meth, kwargs):
|
||||
df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": 1})
|
||||
|
||||
if meth in ("tz_convert", "tz_localize", "to_period"):
|
||||
tz = None if meth in ("tz_localize", "to_period") else "US/Eastern"
|
||||
df.index = pd.date_range("2020-01-01", freq="D", periods=len(df), tz=tz)
|
||||
elif meth == "to_timestamp":
|
||||
df.index = pd.period_range("2020-01-01", freq="D", periods=len(df))
|
||||
elif meth == "swaplevel":
|
||||
df = df.set_index(["b", "c"])
|
||||
|
||||
if meth != "swaplevel":
|
||||
with tm.assert_produces_warning(Pandas4Warning, match="copy"):
|
||||
getattr(df, meth)(copy=False, **kwargs)
|
||||
|
||||
if meth != "transpose":
|
||||
with tm.assert_produces_warning(Pandas4Warning, match="copy"):
|
||||
getattr(df.a, meth)(copy=False, **kwargs)
|
||||
|
||||
|
||||
def test_copy_deprecation_reindex_like_align():
|
||||
df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
# Somehow the stack level check is incorrect here
|
||||
with tm.assert_produces_warning(
|
||||
Pandas4Warning, match="copy", check_stacklevel=False
|
||||
):
|
||||
df.reindex_like(df, copy=False)
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
Pandas4Warning, match="copy", check_stacklevel=False
|
||||
):
|
||||
df.a.reindex_like(df.a, copy=False)
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
Pandas4Warning, match="copy", check_stacklevel=False
|
||||
):
|
||||
df.align(df, copy=False)
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
Pandas4Warning, match="copy", check_stacklevel=False
|
||||
):
|
||||
df.a.align(df.a, copy=False)
|
||||
|
||||
|
||||
def test_copy_deprecation_merge_concat():
|
||||
df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
Pandas4Warning, match="copy", check_stacklevel=False
|
||||
):
|
||||
df.merge(df, copy=False)
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
Pandas4Warning, match="copy", check_stacklevel=False
|
||||
):
|
||||
merge(df, df, copy=False)
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
Pandas4Warning, match="copy", check_stacklevel=False
|
||||
):
|
||||
concat([df, df], copy=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", [False, True, "warn"])
|
||||
def test_copy_on_write_deprecation_option(value):
|
||||
msg = "Copy-on-Write can no longer be disabled"
|
||||
# stacklevel points to contextlib due to use of context manager.
|
||||
with tm.assert_produces_warning(Pandas4Warning, match=msg, check_stacklevel=False):
|
||||
with pd.option_context("mode.copy_on_write", value):
|
||||
pass
|
||||
@@ -0,0 +1,93 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_assigning_to_same_variable_removes_references():
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
df = df.reset_index()
|
||||
assert df._mgr._has_no_reference(1)
|
||||
arr = get_array(df, "a")
|
||||
df.iloc[0, 1] = 100 # Write into a
|
||||
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_setitem_dont_track_unnecessary_references():
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
|
||||
|
||||
df["b"] = 100
|
||||
arr = get_array(df, "a")
|
||||
# We split the block in setitem, if we are not careful the new blocks will
|
||||
# reference each other triggering a copy
|
||||
df.iloc[0, 0] = 100
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_setitem_with_view_copies():
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
|
||||
view = df[:]
|
||||
expected = df.copy()
|
||||
|
||||
df["b"] = 100
|
||||
arr = get_array(df, "a")
|
||||
df.iloc[0, 0] = 100 # Check that we correctly track reference
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
tm.assert_frame_equal(view, expected)
|
||||
|
||||
|
||||
def test_setitem_with_view_invalidated_does_not_copy(request):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
|
||||
view = df[:]
|
||||
|
||||
df["b"] = 100
|
||||
arr = get_array(df, "a")
|
||||
view = None # noqa: F841
|
||||
# TODO(CoW) block gets split because of `df["b"] = 100`
|
||||
# which introduces additional refs, even when those of `view` go out of scopes
|
||||
df.iloc[0, 0] = 100
|
||||
# Setitem split the block. Since the old block shared data with view
|
||||
# all the new blocks are referencing view and each other. When view
|
||||
# goes out of scope, they don't share data with any other block,
|
||||
# so we should not trigger a copy
|
||||
mark = pytest.mark.xfail(reason="blk.delete does not track references correctly")
|
||||
request.applymarker(mark)
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_out_of_scope():
|
||||
def func():
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5, "c": 1})
|
||||
# create some subset
|
||||
result = df[["a", "b"]]
|
||||
return result
|
||||
|
||||
result = func()
|
||||
assert not result._mgr.blocks[0].refs.has_reference()
|
||||
assert not result._mgr.blocks[1].refs.has_reference()
|
||||
|
||||
|
||||
def test_delete():
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"]
|
||||
)
|
||||
del df["b"]
|
||||
assert not df._mgr.blocks[0].refs.has_reference()
|
||||
assert not df._mgr.blocks[1].refs.has_reference()
|
||||
|
||||
df = df[["a"]]
|
||||
assert not df._mgr.blocks[0].refs.has_reference()
|
||||
|
||||
|
||||
def test_delete_reference():
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"]
|
||||
)
|
||||
x = df[:]
|
||||
del df["b"]
|
||||
assert df._mgr.blocks[0].refs.has_reference()
|
||||
assert df._mgr.blocks[1].refs.has_reference()
|
||||
assert x._mgr.blocks[0].refs.has_reference()
|
||||
332
venv/Lib/site-packages/pandas/tests/copy_view/test_functions.py
Normal file
332
venv/Lib/site-packages/pandas/tests/copy_view/test_functions.py
Normal file
@@ -0,0 +1,332 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
concat,
|
||||
merge,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_concat_frames():
|
||||
df = DataFrame({"b": ["a"] * 3}, dtype=object)
|
||||
df2 = DataFrame({"a": ["a"] * 3}, dtype=object)
|
||||
df_orig = df.copy()
|
||||
result = concat([df, df2], axis=1)
|
||||
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
|
||||
result.iloc[0, 0] = "d"
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
|
||||
result.iloc[0, 1] = "d"
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_concat_frames_updating_input():
|
||||
df = DataFrame({"b": ["a"] * 3}, dtype=object)
|
||||
df2 = DataFrame({"a": ["a"] * 3}, dtype=object)
|
||||
result = concat([df, df2], axis=1)
|
||||
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
|
||||
expected = result.copy()
|
||||
df.iloc[0, 0] = "d"
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
|
||||
df2.iloc[0, 0] = "d"
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_series():
|
||||
ser = Series([1, 2], name="a")
|
||||
ser2 = Series([3, 4], name="b")
|
||||
ser_orig = ser.copy()
|
||||
ser2_orig = ser2.copy()
|
||||
result = concat([ser, ser2], axis=1)
|
||||
|
||||
assert np.shares_memory(get_array(result, "a"), ser.values)
|
||||
assert np.shares_memory(get_array(result, "b"), ser2.values)
|
||||
|
||||
result.iloc[0, 0] = 100
|
||||
assert not np.shares_memory(get_array(result, "a"), ser.values)
|
||||
assert np.shares_memory(get_array(result, "b"), ser2.values)
|
||||
|
||||
result.iloc[0, 1] = 1000
|
||||
assert not np.shares_memory(get_array(result, "b"), ser2.values)
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
tm.assert_series_equal(ser2, ser2_orig)
|
||||
|
||||
|
||||
def test_concat_frames_chained():
|
||||
df1 = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
df2 = DataFrame({"c": [4, 5, 6]})
|
||||
df3 = DataFrame({"d": [4, 5, 6]})
|
||||
result = concat([concat([df1, df2], axis=1), df3], axis=1)
|
||||
expected = result.copy()
|
||||
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(df2, "c"))
|
||||
assert np.shares_memory(get_array(result, "d"), get_array(df3, "d"))
|
||||
|
||||
df1.iloc[0, 0] = 100
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_series_chained():
|
||||
ser1 = Series([1, 2, 3], name="a")
|
||||
ser2 = Series([4, 5, 6], name="c")
|
||||
ser3 = Series([4, 5, 6], name="d")
|
||||
result = concat([concat([ser1, ser2], axis=1), ser3], axis=1)
|
||||
expected = result.copy()
|
||||
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(ser2, "c"))
|
||||
assert np.shares_memory(get_array(result, "d"), get_array(ser3, "d"))
|
||||
|
||||
ser1.iloc[0] = 100
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_series_updating_input():
|
||||
ser = Series([1, 2], name="a")
|
||||
ser2 = Series([3, 4], name="b")
|
||||
expected = DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
result = concat([ser, ser2], axis=1)
|
||||
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
|
||||
|
||||
ser.iloc[0] = 100
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ser2.iloc[0] = 1000
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_mixed_series_frame():
|
||||
df = DataFrame({"a": [1, 2, 3], "c": 1})
|
||||
ser = Series([4, 5, 6], name="d")
|
||||
result = concat([df, ser], axis=1)
|
||||
expected = result.copy()
|
||||
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(df, "c"))
|
||||
assert np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
|
||||
|
||||
ser.iloc[0] = 100
|
||||
assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
|
||||
|
||||
df.iloc[0, 0] = 100
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_copy_keyword():
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df2 = DataFrame({"b": [1.5, 2.5]})
|
||||
|
||||
result = concat([df, df2], axis=1)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
lambda df1, df2, **kwargs: df1.merge(df2, **kwargs),
|
||||
lambda df1, df2, **kwargs: merge(df1, df2, **kwargs),
|
||||
],
|
||||
)
|
||||
def test_merge_on_key(func):
|
||||
df1 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "a": [1, 2, 3]})
|
||||
df2 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "b": [4, 5, 6]})
|
||||
df1_orig = df1.copy()
|
||||
df2_orig = df2.copy()
|
||||
|
||||
result = func(df1, df2, on="key")
|
||||
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
assert np.shares_memory(get_array(result, "key"), get_array(df1, "key"))
|
||||
assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key"))
|
||||
|
||||
result.iloc[0, 1] = 0
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 2] = 0
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
tm.assert_frame_equal(df2, df2_orig)
|
||||
|
||||
|
||||
def test_merge_on_index():
|
||||
df1 = DataFrame({"a": [1, 2, 3]})
|
||||
df2 = DataFrame({"b": [4, 5, 6]})
|
||||
df1_orig = df1.copy()
|
||||
df2_orig = df2.copy()
|
||||
|
||||
result = merge(df1, df2, left_index=True, right_index=True)
|
||||
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 0] = 0
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 1] = 0
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
tm.assert_frame_equal(df2, df2_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, how",
|
||||
[
|
||||
(lambda df1, df2, **kwargs: merge(df2, df1, on="key", **kwargs), "right"),
|
||||
(lambda df1, df2, **kwargs: merge(df1, df2, on="key", **kwargs), "left"),
|
||||
],
|
||||
)
|
||||
def test_merge_on_key_enlarging_one(func, how):
|
||||
df1 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "a": [1, 2, 3]})
|
||||
df2 = DataFrame({"key": Series(["a", "b"], dtype=object), "b": [4, 5]})
|
||||
df1_orig = df1.copy()
|
||||
df2_orig = df2.copy()
|
||||
|
||||
result = func(df1, df2, how=how)
|
||||
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
assert df2._mgr._has_no_reference(1)
|
||||
assert df2._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(get_array(result, "key"), get_array(df1, "key")) is (
|
||||
how == "left"
|
||||
)
|
||||
assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key"))
|
||||
|
||||
if how == "left":
|
||||
result.iloc[0, 1] = 0
|
||||
else:
|
||||
result.iloc[0, 2] = 0
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
tm.assert_frame_equal(df2, df2_orig)
|
||||
|
||||
|
||||
def test_merge_copy_keyword():
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df2 = DataFrame({"b": [3, 4.5]})
|
||||
|
||||
result = df.merge(df2, left_index=True, right_index=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
|
||||
|
||||
|
||||
def test_merge_upcasting_no_copy():
|
||||
left = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
left_copy = left.copy()
|
||||
right = DataFrame({"a": [1, 2, 3], "c": [7, 8, 9]}, dtype=object)
|
||||
result = merge(left, right, on="a")
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(left, "b"))
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(left, "a"))
|
||||
tm.assert_frame_equal(left, left_copy)
|
||||
|
||||
result = merge(right, left, on="a")
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(left, "b"))
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(left, "a"))
|
||||
tm.assert_frame_equal(left, left_copy)
|
||||
|
||||
|
||||
def test_merge_indicator_no_deep_copy():
|
||||
left = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
right = DataFrame({"a": [1, 2, 3], "c": [7, 8, 9]})
|
||||
result = merge(left, right, on="a", indicator=True)
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(left, "b"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(right, "c"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [object, "str"])
|
||||
def test_join_on_key(dtype):
|
||||
df_index = Index(["a", "b", "c"], name="key", dtype=dtype)
|
||||
|
||||
df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
|
||||
df2 = DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True))
|
||||
|
||||
df1_orig = df1.copy()
|
||||
df2_orig = df2.copy()
|
||||
|
||||
result = df1.join(df2, on="key")
|
||||
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
assert tm.shares_memory(get_array(result.index), get_array(df1.index))
|
||||
assert not np.shares_memory(get_array(result.index), get_array(df2.index))
|
||||
|
||||
result.iloc[0, 0] = 0
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 1] = 0
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
tm.assert_frame_equal(df2, df2_orig)
|
||||
|
||||
|
||||
def test_join_multiple_dataframes_on_key():
|
||||
df_index = Index(["a", "b", "c"], name="key", dtype=object)
|
||||
|
||||
df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
|
||||
dfs_list = [
|
||||
DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True)),
|
||||
DataFrame({"c": [7, 8, 9]}, index=df_index.copy(deep=True)),
|
||||
]
|
||||
|
||||
df1_orig = df1.copy()
|
||||
dfs_list_orig = [df.copy() for df in dfs_list]
|
||||
|
||||
result = df1.join(dfs_list)
|
||||
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
assert np.shares_memory(get_array(result.index), get_array(df1.index))
|
||||
assert not np.shares_memory(get_array(result.index), get_array(dfs_list[0].index))
|
||||
assert not np.shares_memory(get_array(result.index), get_array(dfs_list[1].index))
|
||||
|
||||
result.iloc[0, 0] = 0
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
|
||||
result.iloc[0, 1] = 0
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
|
||||
result.iloc[0, 2] = 0
|
||||
assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
for df, df_orig in zip(dfs_list, dfs_list_orig, strict=True):
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
902
venv/Lib/site-packages/pandas/tests/copy_view/test_indexing.py
Normal file
902
venv/Lib/site-packages/pandas/tests/copy_view/test_indexing.py
Normal file
@@ -0,0 +1,902 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_float_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
@pytest.fixture(params=["numpy", "nullable"])
|
||||
def backend(request):
|
||||
if request.param == "numpy":
|
||||
|
||||
def make_dataframe(*args, **kwargs):
|
||||
return DataFrame(*args, **kwargs)
|
||||
|
||||
def make_series(*args, **kwargs):
|
||||
return Series(*args, **kwargs)
|
||||
|
||||
elif request.param == "nullable":
|
||||
|
||||
def make_dataframe(*args, **kwargs):
|
||||
df = DataFrame(*args, **kwargs)
|
||||
df_nullable = df.convert_dtypes()
|
||||
# convert_dtypes will try to cast float to int if there is no loss in
|
||||
# precision -> undo that change
|
||||
for col in df.columns:
|
||||
if is_float_dtype(df[col].dtype) and not is_float_dtype(
|
||||
df_nullable[col].dtype
|
||||
):
|
||||
df_nullable[col] = df_nullable[col].astype("Float64")
|
||||
# copy final result to ensure we start with a fully self-owning DataFrame
|
||||
return df_nullable.copy()
|
||||
|
||||
def make_series(*args, **kwargs):
|
||||
ser = Series(*args, **kwargs)
|
||||
return ser.convert_dtypes().copy()
|
||||
|
||||
return request.param, make_dataframe, make_series
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Indexing operations taking subset + modifying the subset/parent
|
||||
|
||||
|
||||
def test_subset_column_selection(backend):
|
||||
# Case: taking a subset of the columns of a DataFrame
|
||||
# + afterwards modifying the subset
|
||||
_, DataFrame, _ = backend
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
||||
df_orig = df.copy()
|
||||
|
||||
subset = df[["a", "c"]]
|
||||
|
||||
assert subset.index is not df.index
|
||||
|
||||
# the subset shares memory ...
|
||||
assert np.shares_memory(get_array(subset, "a"), get_array(df, "a"))
|
||||
# ... but uses CoW when being modified
|
||||
subset.iloc[0, 0] = 0
|
||||
|
||||
assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a"))
|
||||
|
||||
expected = DataFrame({"a": [0, 2, 3], "c": [0.1, 0.2, 0.3]})
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_subset_column_selection_modify_parent(backend):
|
||||
# Case: taking a subset of the columns of a DataFrame
|
||||
# + afterwards modifying the parent
|
||||
_, DataFrame, _ = backend
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
||||
|
||||
subset = df[["a", "c"]]
|
||||
|
||||
# the subset shares memory ...
|
||||
assert np.shares_memory(get_array(subset, "a"), get_array(df, "a"))
|
||||
# ... but parent uses CoW parent when it is modified
|
||||
df.iloc[0, 0] = 0
|
||||
|
||||
assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a"))
|
||||
# different column/block still shares memory
|
||||
assert np.shares_memory(get_array(subset, "c"), get_array(df, "c"))
|
||||
|
||||
expected = DataFrame({"a": [1, 2, 3], "c": [0.1, 0.2, 0.3]})
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
|
||||
|
||||
def test_subset_row_slice(backend):
|
||||
# Case: taking a subset of the rows of a DataFrame using a slice
|
||||
# + afterwards modifying the subset
|
||||
_, DataFrame, _ = backend
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
||||
df_orig = df.copy()
|
||||
|
||||
subset = df[1:3]
|
||||
subset._mgr._verify_integrity()
|
||||
|
||||
assert subset.columns is not df.columns
|
||||
assert np.shares_memory(get_array(subset, "a"), get_array(df, "a"))
|
||||
|
||||
subset.iloc[0, 0] = 0
|
||||
assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a"))
|
||||
|
||||
subset._mgr._verify_integrity()
|
||||
|
||||
expected = DataFrame({"a": [0, 3], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3))
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
# original parent dataframe is not modified (CoW)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
|
||||
)
|
||||
def test_subset_column_slice(backend, dtype):
|
||||
# Case: taking a subset of the columns of a DataFrame using a slice
|
||||
# + afterwards modifying the subset
|
||||
dtype_backend, DataFrame, _ = backend
|
||||
df = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
|
||||
)
|
||||
df_orig = df.copy()
|
||||
|
||||
subset = df.iloc[:, 1:]
|
||||
subset._mgr._verify_integrity()
|
||||
|
||||
assert subset.index is not df.index
|
||||
assert np.shares_memory(get_array(subset, "b"), get_array(df, "b"))
|
||||
|
||||
subset.iloc[0, 0] = 0
|
||||
assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b"))
|
||||
|
||||
expected = DataFrame({"b": [0, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)})
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
# original parent dataframe is not modified (also not for BlockManager case,
|
||||
# except for single block)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"row_indexer",
|
||||
[slice(1, 2), np.array([False, True, True]), np.array([1, 2])],
|
||||
ids=["slice", "mask", "array"],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"column_indexer",
|
||||
[slice("b", "c"), np.array([False, True, True]), ["b", "c"]],
|
||||
ids=["slice", "mask", "array"],
|
||||
)
|
||||
def test_subset_loc_rows_columns(
|
||||
backend,
|
||||
dtype,
|
||||
row_indexer,
|
||||
column_indexer,
|
||||
):
|
||||
# Case: taking a subset of the rows+columns of a DataFrame using .loc
|
||||
# + afterwards modifying the subset
|
||||
# Generic test for several combinations of row/column indexers, not all
|
||||
# of those could actually return a view / need CoW (so this test is not
|
||||
# checking memory sharing, only ensuring subsequent mutation doesn't
|
||||
# affect the parent dataframe)
|
||||
dtype_backend, DataFrame, _ = backend
|
||||
df = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
|
||||
)
|
||||
df_orig = df.copy()
|
||||
|
||||
subset = df.loc[row_indexer, column_indexer]
|
||||
|
||||
assert subset.index is not df.index
|
||||
assert subset.columns is not df.columns
|
||||
|
||||
# modifying the subset never modifies the parent
|
||||
subset.iloc[0, 0] = 0
|
||||
|
||||
expected = DataFrame(
|
||||
{"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3)
|
||||
)
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"row_indexer",
|
||||
[slice(1, 3), np.array([False, True, True]), np.array([1, 2])],
|
||||
ids=["slice", "mask", "array"],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"column_indexer",
|
||||
[slice(1, 3), np.array([False, True, True]), [1, 2]],
|
||||
ids=["slice", "mask", "array"],
|
||||
)
|
||||
def test_subset_iloc_rows_columns(
|
||||
backend,
|
||||
dtype,
|
||||
row_indexer,
|
||||
column_indexer,
|
||||
):
|
||||
# Case: taking a subset of the rows+columns of a DataFrame using .iloc
|
||||
# + afterwards modifying the subset
|
||||
# Generic test for several combinations of row/column indexers, not all
|
||||
# of those could actually return a view / need CoW (so this test is not
|
||||
# checking memory sharing, only ensuring subsequent mutation doesn't
|
||||
# affect the parent dataframe)
|
||||
dtype_backend, DataFrame, _ = backend
|
||||
df = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
|
||||
)
|
||||
df_orig = df.copy()
|
||||
|
||||
subset = df.iloc[row_indexer, column_indexer]
|
||||
|
||||
assert subset.index is not df.index
|
||||
assert subset.columns is not df.columns
|
||||
|
||||
# modifying the subset never modifies the parent
|
||||
subset.iloc[0, 0] = 0
|
||||
|
||||
expected = DataFrame(
|
||||
{"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3)
|
||||
)
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer",
|
||||
[slice(0, 2), np.array([True, True, False]), np.array([0, 1])],
|
||||
ids=["slice", "mask", "array"],
|
||||
)
|
||||
def test_subset_set_with_row_indexer(backend, indexer_si, indexer):
|
||||
# Case: setting values with a row indexer on a viewing subset
|
||||
# subset[indexer] = value and subset.iloc[indexer] = value
|
||||
_, DataFrame, _ = backend
|
||||
df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]})
|
||||
df_orig = df.copy()
|
||||
subset = df[1:4]
|
||||
|
||||
if (
|
||||
indexer_si is tm.setitem
|
||||
and isinstance(indexer, np.ndarray)
|
||||
and indexer.dtype == "int"
|
||||
):
|
||||
pytest.skip("setitem with labels selects on columns")
|
||||
|
||||
indexer_si(subset)[indexer] = 0
|
||||
|
||||
expected = DataFrame(
|
||||
{"a": [0, 0, 4], "b": [0, 0, 7], "c": [0.0, 0.0, 0.4]}, index=range(1, 4)
|
||||
)
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
# original parent dataframe is not modified (CoW)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_subset_set_with_mask(backend):
|
||||
# Case: setting values with a mask on a viewing subset: subset[mask] = value
|
||||
_, DataFrame, _ = backend
|
||||
df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]})
|
||||
df_orig = df.copy()
|
||||
subset = df[1:4]
|
||||
|
||||
mask = subset > 3
|
||||
|
||||
subset[mask] = 0
|
||||
|
||||
expected = DataFrame(
|
||||
{"a": [2, 3, 0], "b": [0, 0, 0], "c": [0.20, 0.3, 0.4]}, index=range(1, 4)
|
||||
)
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_subset_set_column(backend):
|
||||
# Case: setting a single column on a viewing subset -> subset[col] = value
|
||||
dtype_backend, DataFrame, _ = backend
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
||||
df_orig = df.copy()
|
||||
subset = df[1:3]
|
||||
|
||||
if dtype_backend == "numpy":
|
||||
arr = np.array([10, 11], dtype="int64")
|
||||
else:
|
||||
arr = pd.array([10, 11], dtype="Int64")
|
||||
|
||||
subset["a"] = arr
|
||||
subset._mgr._verify_integrity()
|
||||
expected = DataFrame(
|
||||
{"a": [10, 11], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3)
|
||||
)
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
|
||||
)
|
||||
def test_subset_set_column_with_loc(backend, dtype):
|
||||
# Case: setting a single column with loc on a viewing subset
|
||||
# -> subset.loc[:, col] = value
|
||||
_, DataFrame, _ = backend
|
||||
df = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
|
||||
)
|
||||
df_orig = df.copy()
|
||||
subset = df[1:3]
|
||||
|
||||
subset.loc[:, "a"] = np.array([10, 11], dtype="int64")
|
||||
|
||||
subset._mgr._verify_integrity()
|
||||
expected = DataFrame(
|
||||
{"a": [10, 11], "b": [5, 6], "c": np.array([8, 9], dtype=dtype)},
|
||||
index=range(1, 3),
|
||||
)
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
# original parent dataframe is not modified (CoW)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_subset_set_column_with_loc2(backend):
|
||||
# Case: setting a single column with loc on a viewing subset
|
||||
# -> subset.loc[:, col] = value
|
||||
# separate test for case of DataFrame of a single column -> takes a separate
|
||||
# code path
|
||||
_, DataFrame, _ = backend
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
subset = df[1:3]
|
||||
|
||||
subset.loc[:, "a"] = 0
|
||||
|
||||
subset._mgr._verify_integrity()
|
||||
expected = DataFrame({"a": [0, 0]}, index=range(1, 3))
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
# original parent dataframe is not modified (CoW)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
|
||||
)
|
||||
def test_subset_set_columns(backend, dtype):
|
||||
# Case: setting multiple columns on a viewing subset
|
||||
# -> subset[[col1, col2]] = value
|
||||
dtype_backend, DataFrame, _ = backend
|
||||
df = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
|
||||
)
|
||||
df_orig = df.copy()
|
||||
subset = df[1:3]
|
||||
|
||||
subset[["a", "c"]] = 0
|
||||
|
||||
subset._mgr._verify_integrity()
|
||||
# first and third column should certainly have no references anymore
|
||||
assert all(subset._mgr._has_no_reference(i) for i in [0, 2])
|
||||
expected = DataFrame({"a": [0, 0], "b": [5, 6], "c": [0, 0]}, index=range(1, 3))
|
||||
if dtype_backend == "nullable":
|
||||
# there is not yet a global option, so overriding a column by setting a scalar
|
||||
# defaults to numpy dtype even if original column was nullable
|
||||
expected["a"] = expected["a"].astype("int64")
|
||||
expected["c"] = expected["c"].astype("int64")
|
||||
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer",
|
||||
[slice("a", "b"), np.array([True, True, False]), ["a", "b"]],
|
||||
ids=["slice", "mask", "array"],
|
||||
)
|
||||
def test_subset_set_with_column_indexer(backend, indexer):
|
||||
# Case: setting multiple columns with a column indexer on a viewing subset
|
||||
# -> subset.loc[:, [col1, col2]] = value
|
||||
_, DataFrame, _ = backend
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]})
|
||||
df_orig = df.copy()
|
||||
subset = df[1:3]
|
||||
|
||||
subset.loc[:, indexer] = 0
|
||||
|
||||
subset._mgr._verify_integrity()
|
||||
expected = DataFrame({"a": [0, 0], "b": [0.0, 0.0], "c": [5, 6]}, index=range(1, 3))
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda df: df[["a", "b"]][0:2],
|
||||
lambda df: df[0:2][["a", "b"]],
|
||||
lambda df: df[["a", "b"]].iloc[0:2],
|
||||
lambda df: df[["a", "b"]].loc[0:1],
|
||||
lambda df: df[0:2].iloc[:, 0:2],
|
||||
lambda df: df[0:2].loc[:, "a":"b"], # type: ignore[misc]
|
||||
],
|
||||
ids=[
|
||||
"row-getitem-slice",
|
||||
"column-getitem",
|
||||
"row-iloc-slice",
|
||||
"row-loc-slice",
|
||||
"column-iloc-slice",
|
||||
"column-loc-slice",
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
|
||||
)
|
||||
def test_subset_chained_getitem(
|
||||
request,
|
||||
backend,
|
||||
method,
|
||||
dtype,
|
||||
):
|
||||
# Case: creating a subset using multiple, chained getitem calls using views
|
||||
# still needs to guarantee proper CoW behaviour
|
||||
_, DataFrame, _ = backend
|
||||
df = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
|
||||
)
|
||||
df_orig = df.copy()
|
||||
|
||||
# modify subset -> don't modify parent
|
||||
subset = method(df)
|
||||
|
||||
subset.iloc[0, 0] = 0
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# modify parent -> don't modify subset
|
||||
subset = method(df)
|
||||
df.iloc[0, 0] = 0
|
||||
expected = DataFrame({"a": [1, 2], "b": [4, 5]})
|
||||
tm.assert_frame_equal(subset, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
|
||||
)
|
||||
def test_subset_chained_getitem_column(backend, dtype):
|
||||
# Case: creating a subset using multiple, chained getitem calls using views
|
||||
# still needs to guarantee proper CoW behaviour
|
||||
dtype_backend, DataFrame, Series = backend
|
||||
df = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
|
||||
)
|
||||
df_orig = df.copy()
|
||||
|
||||
# modify subset -> don't modify parent
|
||||
subset = df[:]["a"][0:2]
|
||||
subset.iloc[0] = 0
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# modify parent -> don't modify subset
|
||||
subset = df[:]["a"][0:2]
|
||||
df.iloc[0, 0] = 0
|
||||
expected = Series([1, 2], name="a")
|
||||
tm.assert_series_equal(subset, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda s: s["a":"c"]["a":"b"], # type: ignore[misc]
|
||||
lambda s: s.iloc[0:3].iloc[0:2],
|
||||
lambda s: s.loc["a":"c"].loc["a":"b"], # type: ignore[misc]
|
||||
lambda s: s.loc["a":"c"] # type: ignore[misc]
|
||||
.iloc[0:3]
|
||||
.iloc[0:2]
|
||||
.loc["a":"b"] # type: ignore[misc]
|
||||
.iloc[0:1],
|
||||
],
|
||||
ids=["getitem", "iloc", "loc", "long-chain"],
|
||||
)
|
||||
def test_subset_chained_getitem_series(backend, method):
|
||||
# Case: creating a subset using multiple, chained getitem calls using views
|
||||
# still needs to guarantee proper CoW behaviour
|
||||
_, _, Series = backend
|
||||
s = Series([1, 2, 3], index=["a", "b", "c"])
|
||||
s_orig = s.copy()
|
||||
|
||||
# modify subset -> don't modify parent
|
||||
subset = method(s)
|
||||
subset.iloc[0] = 0
|
||||
tm.assert_series_equal(s, s_orig)
|
||||
|
||||
# modify parent -> don't modify subset
|
||||
subset = s.iloc[0:3].iloc[0:2]
|
||||
s.iloc[0] = 0
|
||||
expected = Series([1, 2], index=["a", "b"])
|
||||
tm.assert_series_equal(subset, expected)
|
||||
|
||||
|
||||
def test_subset_chained_single_block_row():
|
||||
# not parametrizing this for dtype backend, since this explicitly tests single block
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
|
||||
df_orig = df.copy()
|
||||
|
||||
# modify subset -> don't modify parent
|
||||
subset = df[:].iloc[0].iloc[0:2]
|
||||
subset.iloc[0] = 0
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# modify parent -> don't modify subset
|
||||
subset = df[:].iloc[0].iloc[0:2]
|
||||
df.iloc[0, 0] = 0
|
||||
expected = Series([1, 4], index=["a", "b"], name=0)
|
||||
tm.assert_series_equal(subset, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda df: df[:],
|
||||
lambda df: df.loc[:, :],
|
||||
lambda df: df.loc[:],
|
||||
lambda df: df.iloc[:, :],
|
||||
lambda df: df.iloc[:],
|
||||
],
|
||||
ids=["getitem", "loc", "loc-rows", "iloc", "iloc-rows"],
|
||||
)
|
||||
def test_null_slice(backend, method):
|
||||
# Case: also all variants of indexing with a null slice (:) should return
|
||||
# new objects to ensure we correctly use CoW for the results
|
||||
dtype_backend, DataFrame, _ = backend
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = method(df)
|
||||
|
||||
# we always return new objects (shallow copy), regardless of CoW or not
|
||||
assert df2 is not df
|
||||
assert df2.index is not df.index
|
||||
assert df2.columns is not df.columns
|
||||
|
||||
# and those trigger CoW when mutated
|
||||
df2.iloc[0, 0] = 0
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda s: s[:],
|
||||
lambda s: s.loc[:],
|
||||
lambda s: s.iloc[:],
|
||||
],
|
||||
ids=["getitem", "loc", "iloc"],
|
||||
)
|
||||
def test_null_slice_series(backend, method):
|
||||
_, _, Series = backend
|
||||
s = Series([1, 2, 3], index=["a", "b", "c"])
|
||||
s_orig = s.copy()
|
||||
|
||||
s2 = method(s)
|
||||
|
||||
# we always return new objects, regardless of CoW or not
|
||||
assert s2 is not s
|
||||
assert s2.index is not s.index
|
||||
|
||||
# and those trigger CoW when mutated
|
||||
s2.iloc[0] = 0
|
||||
tm.assert_series_equal(s, s_orig)
|
||||
|
||||
|
||||
# TODO add more tests modifying the parent
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Series -- Indexing operations taking subset + modifying the subset/parent
|
||||
|
||||
|
||||
def test_series_getitem_slice(backend):
|
||||
# Case: taking a slice of a Series + afterwards modifying the subset
|
||||
_, _, Series = backend
|
||||
s = Series([1, 2, 3], index=["a", "b", "c"])
|
||||
s_orig = s.copy()
|
||||
|
||||
subset = s[:]
|
||||
assert np.shares_memory(get_array(subset), get_array(s))
|
||||
assert subset.index is not s.index
|
||||
|
||||
subset.iloc[0] = 0
|
||||
|
||||
assert not np.shares_memory(get_array(subset), get_array(s))
|
||||
|
||||
expected = Series([0, 2, 3], index=["a", "b", "c"])
|
||||
tm.assert_series_equal(subset, expected)
|
||||
|
||||
# original parent series is not modified (CoW)
|
||||
tm.assert_series_equal(s, s_orig)
|
||||
|
||||
|
||||
def test_series_getitem_ellipsis():
|
||||
# Case: taking a view of a Series using Ellipsis + afterwards modifying the subset
|
||||
s = Series([1, 2, 3])
|
||||
s_orig = s.copy()
|
||||
|
||||
subset = s[...]
|
||||
assert np.shares_memory(get_array(subset), get_array(s))
|
||||
assert subset.index is not s.index
|
||||
|
||||
subset.iloc[0] = 0
|
||||
|
||||
assert not np.shares_memory(get_array(subset), get_array(s))
|
||||
|
||||
expected = Series([0, 2, 3])
|
||||
tm.assert_series_equal(subset, expected)
|
||||
|
||||
# original parent series is not modified (CoW)
|
||||
tm.assert_series_equal(s, s_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer",
|
||||
[slice(0, 2), np.array([True, True, False]), np.array([0, 1])],
|
||||
ids=["slice", "mask", "array"],
|
||||
)
|
||||
def test_series_subset_set_with_indexer(backend, indexer_si, indexer):
|
||||
# Case: setting values in a viewing Series with an indexer
|
||||
_, _, Series = backend
|
||||
s = Series([1, 2, 3], index=["a", "b", "c"])
|
||||
s_orig = s.copy()
|
||||
subset = s[:]
|
||||
|
||||
if (
|
||||
indexer_si is tm.setitem
|
||||
and isinstance(indexer, np.ndarray)
|
||||
and indexer.dtype.kind == "i"
|
||||
):
|
||||
# In 3.0 we treat integers as always-labels
|
||||
with pytest.raises(KeyError):
|
||||
indexer_si(subset)[indexer] = 0
|
||||
return
|
||||
|
||||
indexer_si(subset)[indexer] = 0
|
||||
expected = Series([0, 0, 3], index=["a", "b", "c"])
|
||||
tm.assert_series_equal(subset, expected)
|
||||
|
||||
tm.assert_series_equal(s, s_orig)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# del operator
|
||||
|
||||
|
||||
def test_del_frame(backend):
|
||||
# Case: deleting a column with `del` on a viewing child dataframe should
|
||||
# not modify parent + update the references
|
||||
dtype_backend, DataFrame, _ = backend
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
||||
df_orig = df.copy()
|
||||
df2 = df[:]
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
del df2["b"]
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
tm.assert_frame_equal(df2, df_orig[["a", "c"]])
|
||||
df2._mgr._verify_integrity()
|
||||
|
||||
df.loc[0, "b"] = 200
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
df_orig = df.copy()
|
||||
|
||||
df2.loc[0, "a"] = 100
|
||||
# modifying child after deleting a column still doesn't update parent
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_del_series(backend):
|
||||
_, _, Series = backend
|
||||
s = Series([1, 2, 3], index=["a", "b", "c"])
|
||||
s_orig = s.copy()
|
||||
s2 = s[:]
|
||||
|
||||
assert np.shares_memory(get_array(s), get_array(s2))
|
||||
|
||||
del s2["a"]
|
||||
|
||||
assert not np.shares_memory(get_array(s), get_array(s2))
|
||||
tm.assert_series_equal(s, s_orig)
|
||||
tm.assert_series_equal(s2, s_orig[["b", "c"]])
|
||||
|
||||
# modifying s2 doesn't need copy on write (due to `del`, s2 is backed by new array)
|
||||
values = s2.values
|
||||
s2.loc["b"] = 100
|
||||
assert values[0] == 100
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Accessing column as Series
|
||||
|
||||
|
||||
def test_column_as_series(backend):
|
||||
# Case: selecting a single column now also uses Copy-on-Write
|
||||
dtype_backend, DataFrame, Series = backend
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
||||
df_orig = df.copy()
|
||||
|
||||
s = df["a"]
|
||||
|
||||
assert s.index is not df.index
|
||||
assert np.shares_memory(get_array(s, "a"), get_array(df, "a"))
|
||||
|
||||
s[0] = 0
|
||||
|
||||
expected = Series([0, 2, 3], name="a")
|
||||
tm.assert_series_equal(s, expected)
|
||||
# assert not np.shares_memory(s.values, get_array(df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
# ensure cached series on getitem is not the changed series
|
||||
tm.assert_series_equal(df["a"], df_orig["a"])
|
||||
|
||||
|
||||
def test_column_as_series_set_with_upcast(backend):
|
||||
# Case: selecting a single column now also uses Copy-on-Write -> when
|
||||
# setting a value causes an upcast, we don't need to update the parent
|
||||
# DataFrame through the cache mechanism
|
||||
dtype_backend, DataFrame, Series = backend
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
||||
df_orig = df.copy()
|
||||
|
||||
s = df["a"]
|
||||
if dtype_backend == "nullable":
|
||||
with pytest.raises(TypeError, match="Invalid value"):
|
||||
s[0] = "foo"
|
||||
expected = Series([1, 2, 3], name="a")
|
||||
tm.assert_series_equal(s, expected)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
# ensure cached series on getitem is not the changed series
|
||||
tm.assert_series_equal(df["a"], df_orig["a"])
|
||||
else:
|
||||
with pytest.raises(TypeError, match="Invalid value"):
|
||||
s[0] = "foo"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda df: df["a"],
|
||||
lambda df: df.loc[:, "a"],
|
||||
lambda df: df.iloc[:, 0],
|
||||
],
|
||||
ids=["getitem", "loc", "iloc"],
|
||||
)
|
||||
def test_column_as_series_no_item_cache(request, backend, method):
|
||||
# Case: selecting a single column (which now also uses Copy-on-Write to protect
|
||||
# the view) should always give a new object (i.e. not make use of a cache)
|
||||
dtype_backend, DataFrame, _ = backend
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
||||
df_orig = df.copy()
|
||||
|
||||
s1 = method(df)
|
||||
s2 = method(df)
|
||||
|
||||
assert s1 is not s2
|
||||
assert s1.index is not df.index
|
||||
assert s1.index is not s2.index
|
||||
|
||||
s1.iloc[0] = 0
|
||||
|
||||
tm.assert_series_equal(s2, df_orig["a"])
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
# TODO add tests for other indexing methods on the Series
|
||||
|
||||
|
||||
def test_dataframe_add_column_from_series(backend):
|
||||
# Case: adding a new column to a DataFrame from an existing column/series
|
||||
# -> delays copy under CoW
|
||||
_, DataFrame, Series = backend
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
|
||||
s = Series([10, 11, 12])
|
||||
df["new"] = s
|
||||
assert np.shares_memory(get_array(df, "new"), get_array(s))
|
||||
|
||||
# editing series -> doesn't modify column in frame
|
||||
s[0] = 0
|
||||
expected = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "new": [10, 11, 12]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [100, "a"])
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_func, indexer",
|
||||
[
|
||||
(tm.loc, (0, "a")),
|
||||
(tm.iloc, (0, 0)),
|
||||
(tm.loc, ([0], "a")),
|
||||
(tm.iloc, ([0], 0)),
|
||||
(tm.loc, (slice(None), "a")),
|
||||
(tm.iloc, (slice(None), 0)),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"col", [[0.1, 0.2, 0.3], [7, 8, 9]], ids=["mixed-block", "single-block"]
|
||||
)
|
||||
def test_set_value_copy_only_necessary_column(indexer_func, indexer, val, col):
|
||||
# When setting inplace, only copy column that is modified instead of the whole
|
||||
# block (by splitting the block)
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": col})
|
||||
df_orig = df.copy()
|
||||
view = df[:]
|
||||
|
||||
if val == "a":
|
||||
with pytest.raises(TypeError, match="Invalid value"):
|
||||
indexer_func(df)[indexer] = val
|
||||
else:
|
||||
indexer_func(df)[indexer] = val
|
||||
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
|
||||
|
||||
def test_series_midx_slice():
|
||||
ser = Series([1, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]]))
|
||||
ser_orig = ser.copy()
|
||||
result = ser[1]
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
result.iloc[0] = 100
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
|
||||
def test_getitem_midx_slice():
|
||||
df = DataFrame({("a", "x"): [1, 2], ("a", "y"): 1, ("b", "x"): 2})
|
||||
df_orig = df.copy()
|
||||
new_df = df[("a",)]
|
||||
|
||||
assert not new_df._mgr._has_no_reference(0)
|
||||
|
||||
assert np.shares_memory(get_array(df, ("a", "x")), get_array(new_df, "x"))
|
||||
new_df.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
|
||||
def test_series_midx_tuples_slice():
|
||||
ser = Series(
|
||||
[1, 2, 3],
|
||||
index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]),
|
||||
)
|
||||
result = ser[(1, 2)]
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
result.iloc[0] = 100
|
||||
expected = Series(
|
||||
[1, 2, 3],
|
||||
index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]),
|
||||
)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_midx_read_only_bool_indexer():
|
||||
# GH#56635
|
||||
def mklbl(prefix, n):
|
||||
return [f"{prefix}{i}" for i in range(n)]
|
||||
|
||||
idx = pd.MultiIndex.from_product(
|
||||
[mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)]
|
||||
)
|
||||
cols = pd.MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"]
|
||||
)
|
||||
df = DataFrame(1, index=idx, columns=cols).sort_index().sort_index(axis=1)
|
||||
|
||||
mask = df[("a", "foo")] == 1
|
||||
expected_mask = mask.copy()
|
||||
result = df.loc[pd.IndexSlice[mask, :, ["C1", "C3"]], :]
|
||||
expected = df.loc[pd.IndexSlice[:, :, ["C1", "C3"]], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_series_equal(mask, expected_mask)
|
||||
|
||||
|
||||
def test_loc_enlarging_with_dataframe():
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
rhs = DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]})
|
||||
rhs_orig = rhs.copy()
|
||||
df.loc[:, ["b", "c"]] = rhs
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(rhs, "b"))
|
||||
assert np.shares_memory(get_array(df, "c"), get_array(rhs, "c"))
|
||||
assert not df._mgr._has_no_reference(1)
|
||||
|
||||
df.iloc[0, 1] = 100
|
||||
tm.assert_frame_equal(rhs, rhs_orig)
|
||||
112
venv/Lib/site-packages/pandas/tests/copy_view/test_internals.py
Normal file
112
venv/Lib/site-packages/pandas/tests/copy_view/test_internals.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_consolidate():
|
||||
# create unconsolidated DataFrame
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
df["c"] = [4, 5, 6]
|
||||
|
||||
# take a viewing subset
|
||||
subset = df[:]
|
||||
|
||||
# each block of subset references a block of df
|
||||
assert all(blk.refs.has_reference() for blk in subset._mgr.blocks)
|
||||
|
||||
# consolidate the two int64 blocks
|
||||
subset._consolidate_inplace()
|
||||
|
||||
# the float64 block still references the parent one because it still a view
|
||||
assert subset._mgr.blocks[0].refs.has_reference()
|
||||
# equivalent of assert np.shares_memory(df["b"].values, subset["b"].values)
|
||||
# but avoids caching df["b"]
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(subset, "b"))
|
||||
|
||||
# the new consolidated int64 block does not reference another
|
||||
assert not subset._mgr.blocks[1].refs.has_reference()
|
||||
|
||||
# the parent dataframe now also only is linked for the float column
|
||||
assert not df._mgr.blocks[0].refs.has_reference()
|
||||
assert df._mgr.blocks[1].refs.has_reference()
|
||||
assert not df._mgr.blocks[2].refs.has_reference()
|
||||
|
||||
# and modifying subset still doesn't modify parent
|
||||
subset.iloc[0, 1] = 0.0
|
||||
assert not df._mgr.blocks[1].refs.has_reference()
|
||||
assert df.loc[0, "b"] == 0.1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.intp, np.int8])
|
||||
@pytest.mark.parametrize(
|
||||
"locs, arr",
|
||||
[
|
||||
([0], np.array([-1, -2, -3])),
|
||||
([1], np.array([-1, -2, -3])),
|
||||
([5], np.array([-1, -2, -3])),
|
||||
([0, 1], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
([0, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
([0, 1, 2], np.array([[-1, -2, -3], [-4, -5, -6], [-4, -5, -6]]).T),
|
||||
([1, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
],
|
||||
)
|
||||
def test_iset_splits_blocks_inplace(locs, arr, dtype):
|
||||
# Nothing currently calls iset with
|
||||
# more than 1 loc with inplace=True (only happens with inplace=False)
|
||||
# but ensure that it works
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [1, 2, 3],
|
||||
"b": [4, 5, 6],
|
||||
"c": [7, 8, 9],
|
||||
"d": [10, 11, 12],
|
||||
"e": [13, 14, 15],
|
||||
"f": Series(["a", "b", "c"], dtype=object),
|
||||
},
|
||||
)
|
||||
arr = arr.astype(dtype)
|
||||
df_orig = df.copy()
|
||||
df2 = df.copy(deep=False) # Trigger a CoW (if enabled, otherwise makes copy)
|
||||
df2._mgr.iset(locs, arr, inplace=True)
|
||||
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
for i, col in enumerate(df.columns):
|
||||
if i not in locs:
|
||||
assert np.shares_memory(get_array(df, col), get_array(df2, col))
|
||||
|
||||
|
||||
def test_exponential_backoff():
|
||||
# GH#55518
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
for i in range(490):
|
||||
df.copy(deep=False)
|
||||
|
||||
assert len(df._mgr.blocks[0].refs.referenced_blocks) == 491
|
||||
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
dfs = [df.copy(deep=False) for i in range(510)]
|
||||
|
||||
for i in range(20):
|
||||
df.copy(deep=False)
|
||||
assert len(df._mgr.blocks[0].refs.referenced_blocks) == 531
|
||||
assert df._mgr.blocks[0].refs.clear_counter == 1000
|
||||
|
||||
for i in range(500):
|
||||
df.copy(deep=False)
|
||||
|
||||
# Don't reduce since we still have over 500 objects alive
|
||||
assert df._mgr.blocks[0].refs.clear_counter == 1000
|
||||
|
||||
dfs = dfs[:300]
|
||||
for i in range(500):
|
||||
df.copy(deep=False)
|
||||
|
||||
# Reduce since there are less than 500 objects alive
|
||||
assert df._mgr.blocks[0].refs.clear_counter == 500
|
||||
@@ -0,0 +1,307 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
Interval,
|
||||
NaT,
|
||||
Series,
|
||||
Timestamp,
|
||||
interval_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["pad", "nearest", "linear"])
|
||||
def test_interpolate_no_op(method):
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df_orig = df.copy()
|
||||
|
||||
if method == "pad":
|
||||
msg = f"Can not interpolate with method={method}"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.interpolate(method=method)
|
||||
else:
|
||||
result = df.interpolate(method=method)
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
assert result.index is not df.index
|
||||
assert result.columns is not df.columns
|
||||
|
||||
result.iloc[0, 0] = 100
|
||||
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["ffill", "bfill"])
|
||||
def test_interp_fill_functions(func):
|
||||
# Check that these takes the same code paths as interpolate
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df_orig = df.copy()
|
||||
|
||||
result = getattr(df, func)()
|
||||
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
assert result.index is not df.index
|
||||
assert result.columns is not df.columns
|
||||
|
||||
result.iloc[0, 0] = 100
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["ffill", "bfill"])
|
||||
@pytest.mark.parametrize(
|
||||
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
|
||||
)
|
||||
def test_interpolate_triggers_copy(vals, func):
|
||||
df = DataFrame({"a": vals})
|
||||
result = getattr(df, func)()
|
||||
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
# Check that we don't have references when triggering a copy
|
||||
assert result._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
|
||||
)
|
||||
def test_interpolate_inplace_no_reference_no_copy(vals):
|
||||
df = DataFrame({"a": vals})
|
||||
arr = get_array(df, "a")
|
||||
df.interpolate(method="linear", inplace=True)
|
||||
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
# Check that we don't have references when triggering a copy
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
|
||||
)
|
||||
def test_interpolate_inplace_with_refs(vals):
|
||||
df = DataFrame({"a": [1, np.nan, 2]})
|
||||
df_orig = df.copy()
|
||||
arr = get_array(df, "a")
|
||||
view = df[:]
|
||||
df.interpolate(method="linear", inplace=True)
|
||||
# Check that copy was triggered in interpolate and that we don't
|
||||
# have any references left
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["ffill", "bfill"])
|
||||
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
|
||||
def test_interp_fill_functions_inplace(func, dtype):
|
||||
# Check that these takes the same code paths as interpolate
|
||||
df = DataFrame({"a": [1, np.nan, 2]}, dtype=dtype)
|
||||
df_orig = df.copy()
|
||||
arr = get_array(df, "a")
|
||||
view = df[:]
|
||||
|
||||
getattr(df, func)(inplace=True)
|
||||
|
||||
# Check that copy was triggered in interpolate and that we don't
|
||||
# have any references left
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_interpolate_cannot_with_object_dtype():
|
||||
df = DataFrame({"a": ["a", np.nan, "c"], "b": 1})
|
||||
df["a"] = df["a"].astype(object)
|
||||
|
||||
msg = "DataFrame cannot interpolate with object dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.interpolate()
|
||||
|
||||
|
||||
def test_interpolate_object_convert_no_op():
|
||||
df = DataFrame({"a": ["a", "b", "c"], "b": 1})
|
||||
df["a"] = df["a"].astype(object)
|
||||
arr_a = get_array(df, "a")
|
||||
|
||||
# Now CoW makes a copy, it should not!
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(arr_a, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_interpolate_object_convert_copies():
|
||||
df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
|
||||
arr_a = get_array(df, "a")
|
||||
msg = "Can not interpolate with method=pad"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.interpolate(method="pad", inplace=True)
|
||||
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(arr_a, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_interpolate_downcast_reference_triggers_copy():
|
||||
df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
|
||||
df_orig = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
|
||||
msg = "Can not interpolate with method=pad"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.interpolate(method="pad", inplace=True)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(arr_a, get_array(df, "a"))
|
||||
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
|
||||
|
||||
def test_fillna():
|
||||
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.fillna(5.5)
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert df2.index is not df.index
|
||||
assert df2.columns is not df.columns
|
||||
|
||||
df2.iloc[0, 1] = 100
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
|
||||
def test_fillna_dict():
|
||||
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.fillna({"a": 100.5})
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
df2.iloc[0, 1] = 100
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
|
||||
def test_fillna_inplace():
|
||||
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||||
arr_a = get_array(df, "a")
|
||||
arr_b = get_array(df, "b")
|
||||
|
||||
df.fillna(5.5, inplace=True)
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert np.shares_memory(get_array(df, "b"), arr_b)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert df._mgr._has_no_reference(1)
|
||||
|
||||
|
||||
def test_fillna_inplace_reference():
|
||||
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||||
df_orig = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
arr_b = get_array(df, "b")
|
||||
view = df[:]
|
||||
|
||||
df.fillna(5.5, inplace=True)
|
||||
assert not np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert np.shares_memory(get_array(df, "b"), arr_b)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
expected = DataFrame({"a": [1.5, 5.5], "b": 1})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_fillna_interval_inplace_reference():
|
||||
# Set dtype explicitly to avoid implicit cast when setting nan
|
||||
ser = Series(
|
||||
interval_range(start=0, end=5), name="a", dtype="interval[float64, right]"
|
||||
)
|
||||
ser.iloc[1] = np.nan
|
||||
|
||||
ser_orig = ser.copy()
|
||||
view = ser[:]
|
||||
ser.fillna(value=Interval(left=0, right=5), inplace=True)
|
||||
|
||||
assert not np.shares_memory(
|
||||
get_array(ser, "a").left.values, get_array(view, "a").left.values
|
||||
)
|
||||
tm.assert_series_equal(view, ser_orig)
|
||||
|
||||
|
||||
def test_fillna_series_empty_arg():
|
||||
ser = Series([1, np.nan, 2])
|
||||
ser_orig = ser.copy()
|
||||
result = ser.fillna({})
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
ser.iloc[0] = 100.5
|
||||
tm.assert_series_equal(ser_orig, result)
|
||||
|
||||
|
||||
def test_fillna_series_empty_arg_inplace():
|
||||
ser = Series([1, np.nan, 2])
|
||||
arr = get_array(ser)
|
||||
ser.fillna({}, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(ser), arr)
|
||||
assert ser._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_fillna_ea_noop_shares_memory(any_numeric_ea_and_arrow_dtype):
|
||||
df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
|
||||
df_orig = df.copy()
|
||||
df2 = df.fillna(100)
|
||||
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert not df2._mgr._has_no_reference(1)
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
df2.iloc[0, 1] = 100
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert df2._mgr._has_no_reference(1)
|
||||
assert df._mgr._has_no_reference(1)
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
|
||||
def test_fillna_inplace_ea_noop_shares_memory(any_numeric_ea_and_arrow_dtype):
|
||||
df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
|
||||
df_orig = df.copy()
|
||||
view = df[:]
|
||||
df.fillna(100, inplace=True)
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
|
||||
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
|
||||
assert not df._mgr._has_no_reference(1)
|
||||
assert not view._mgr._has_no_reference(1)
|
||||
|
||||
df.iloc[0, 1] = 100
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
|
||||
|
||||
def test_fillna_chained_assignment():
|
||||
df = DataFrame({"a": [1, np.nan, 2], "b": 1})
|
||||
df_orig = df.copy()
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].fillna(100, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[["a"]].fillna(100, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["interpolate", "ffill", "bfill"])
|
||||
def test_interpolate_chained_assignment(func):
|
||||
df = DataFrame({"a": [1, np.nan, 2], "b": 1})
|
||||
df_orig = df.copy()
|
||||
with tm.raises_chained_assignment_error():
|
||||
getattr(df["a"], func)(inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
getattr(df[["a"]], func)(inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
1601
venv/Lib/site-packages/pandas/tests/copy_view/test_methods.py
Normal file
1601
venv/Lib/site-packages/pandas/tests/copy_view/test_methods.py
Normal file
File diff suppressed because it is too large
Load Diff
356
venv/Lib/site-packages/pandas/tests/copy_view/test_replace.py
Normal file
356
venv/Lib/site-packages/pandas/tests/copy_view/test_replace.py
Normal file
@@ -0,0 +1,356 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"replace_kwargs",
|
||||
[
|
||||
{"to_replace": {"a": 1, "b": 4}, "value": -1},
|
||||
# Test CoW splits blocks to avoid copying unchanged columns
|
||||
{"to_replace": {"a": 1}, "value": -1},
|
||||
{"to_replace": {"b": 4}, "value": -1},
|
||||
{"to_replace": {"b": {4: 1}}},
|
||||
# TODO: Add these in a further optimization
|
||||
# We would need to see which columns got replaced in the mask
|
||||
# which could be expensive
|
||||
# {"to_replace": {"b": 1}},
|
||||
# 1
|
||||
],
|
||||
)
|
||||
def test_replace(replace_kwargs):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
||||
df_orig = df.copy()
|
||||
|
||||
df_replaced = df.replace(**replace_kwargs)
|
||||
|
||||
if (df_replaced["b"] == df["b"]).all():
|
||||
assert np.shares_memory(get_array(df_replaced, "b"), get_array(df, "b"))
|
||||
assert tm.shares_memory(get_array(df_replaced, "c"), get_array(df, "c"))
|
||||
|
||||
# mutating squeezed df triggers a copy-on-write for that column/block
|
||||
df_replaced.loc[0, "c"] = -1
|
||||
assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c"))
|
||||
|
||||
if "a" in replace_kwargs["to_replace"]:
|
||||
arr = get_array(df_replaced, "a")
|
||||
df_replaced.loc[0, "a"] = 100
|
||||
assert np.shares_memory(get_array(df_replaced, "a"), arr)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_replace_regex_inplace_refs():
|
||||
df = DataFrame({"a": ["aaa", "bbb"]})
|
||||
df_orig = df.copy()
|
||||
view = df[:]
|
||||
arr = get_array(df, "a")
|
||||
df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True)
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
assert df._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
|
||||
|
||||
def test_replace_regex_inplace():
|
||||
df = DataFrame({"a": ["aaa", "bbb"]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert tm.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
df_orig = df.copy()
|
||||
df2 = df.replace(to_replace=r"^b.*$", value="new", regex=True)
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
assert not tm.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
|
||||
def test_replace_regex_inplace_no_op():
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace(to_replace=r"^a.$", value="new", inplace=True, regex=True)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
df_orig = df.copy()
|
||||
df2 = df.replace(to_replace=r"^x.$", value="new", regex=True)
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
|
||||
def test_replace_mask_all_false_second_block():
|
||||
df = DataFrame({"a": [1.5, 2, 3], "b": 100.5, "c": 1, "d": 2})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.replace(to_replace=1.5, value=55.5)
|
||||
|
||||
# TODO: Block splitting would allow us to avoid copying b
|
||||
assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
df2.loc[0, "c"] = 1
|
||||
tm.assert_frame_equal(df, df_orig) # Original is unchanged
|
||||
|
||||
assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
||||
assert np.shares_memory(get_array(df, "d"), get_array(df2, "d"))
|
||||
|
||||
|
||||
def test_replace_coerce_single_column():
|
||||
df = DataFrame({"a": [1.5, 2, 3], "b": 100.5})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.replace(to_replace=1.5, value="a")
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
df2.loc[0, "b"] = 0.5
|
||||
tm.assert_frame_equal(df, df_orig) # Original is unchanged
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
|
||||
|
||||
def test_replace_to_replace_wrong_dtype():
|
||||
df = DataFrame({"a": [1.5, 2, 3], "b": 100.5})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.replace(to_replace="xxx", value=1.5)
|
||||
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
df2.loc[0, "b"] = 0.5
|
||||
tm.assert_frame_equal(df, df_orig) # Original is unchanged
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
|
||||
|
||||
def test_replace_list_categorical():
|
||||
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
|
||||
arr = get_array(df, "a")
|
||||
|
||||
df.replace(["c"], value="a", inplace=True)
|
||||
assert np.shares_memory(arr.codes, get_array(df, "a").codes)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
df_orig = df.copy()
|
||||
df.replace(["b"], value="a")
|
||||
df2 = df.apply(lambda x: x.cat.rename_categories({"b": "d"}))
|
||||
assert not np.shares_memory(arr.codes, get_array(df2, "a").codes)
|
||||
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_replace_list_inplace_refs_categorical():
|
||||
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
|
||||
view = df[:]
|
||||
df_orig = df.copy()
|
||||
df.replace(["c"], value="a", inplace=True)
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("to_replace", [1.5, [1.5], []])
|
||||
def test_replace_inplace(to_replace):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
arr_a = get_array(df, "a")
|
||||
df.replace(to_replace=1.5, value=15.5, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("to_replace", [1.5, [1.5]])
|
||||
def test_replace_inplace_reference(to_replace):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
df.replace(to_replace=to_replace, value=15.5, inplace=True)
|
||||
|
||||
assert not np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("to_replace", ["a", 100.5])
|
||||
def test_replace_inplace_reference_no_op(to_replace):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
df.replace(to_replace=to_replace, value=15.5, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert not view._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("to_replace", [1, [1]])
|
||||
def test_replace_categorical_inplace_reference(to_replace):
|
||||
df = DataFrame({"a": Categorical([1, 2, 3])})
|
||||
df_orig = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
df.replace(to_replace=to_replace, value=1, inplace=True)
|
||||
assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
|
||||
|
||||
def test_replace_categorical_inplace():
|
||||
df = DataFrame({"a": Categorical([1, 2, 3])})
|
||||
arr_a = get_array(df, "a")
|
||||
df.replace(to_replace=1, value=1, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a").codes, arr_a.codes)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
expected = DataFrame({"a": Categorical([1, 2, 3])})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_replace_categorical():
|
||||
df = DataFrame({"a": Categorical([1, 2, 3])})
|
||||
df_orig = df.copy()
|
||||
df2 = df.replace(to_replace=1, value=1)
|
||||
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert df2._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(get_array(df, "a").codes, get_array(df2, "a").codes)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
arr_a = get_array(df2, "a").codes
|
||||
df2.iloc[0, 0] = 2.0
|
||||
assert np.shares_memory(get_array(df2, "a").codes, arr_a)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["where", "mask"])
|
||||
def test_masking_inplace(method):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
|
||||
method = getattr(df, method)
|
||||
method(df["a"] > 1.6, -1, inplace=True)
|
||||
|
||||
assert not np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
|
||||
|
||||
def test_replace_empty_list():
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
|
||||
df2 = df.replace([], [])
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
arr_a = get_array(df, "a")
|
||||
df.replace([], [])
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert not df2._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", ["d", None])
|
||||
def test_replace_object_list_inplace(value):
|
||||
df = DataFrame({"a": ["a", "b", "c"]}, dtype=object)
|
||||
arr = get_array(df, "a")
|
||||
df.replace(["c"], value, inplace=True)
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_replace_list_multiple_elements_inplace():
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace([1, 2], 4, inplace=True)
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_replace_list_none():
|
||||
df = DataFrame({"a": ["a", "b", "c"]})
|
||||
|
||||
df_orig = df.copy()
|
||||
df2 = df.replace(["b"], value=None)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
# replace multiple values that don't actually replace anything with None
|
||||
# https://github.com/pandas-dev/pandas/issues/59770
|
||||
df3 = df.replace(["d", "e", "f"], value=None)
|
||||
tm.assert_frame_equal(df3, df_orig)
|
||||
assert tm.shares_memory(get_array(df, "a"), get_array(df3, "a"))
|
||||
|
||||
|
||||
def test_replace_list_none_inplace_refs():
|
||||
df = DataFrame({"a": ["a", "b", "c"]})
|
||||
arr = get_array(df, "a")
|
||||
df_orig = df.copy()
|
||||
view = df[:]
|
||||
df.replace(["a"], value=None, inplace=True)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
|
||||
|
||||
def test_replace_columnwise_no_op_inplace():
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
|
||||
view = df[:]
|
||||
df_orig = df.copy()
|
||||
df.replace({"a": 10}, 100, inplace=True)
|
||||
assert np.shares_memory(get_array(view, "a"), get_array(df, "a"))
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
|
||||
|
||||
def test_replace_columnwise_no_op():
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
df2 = df.replace({"a": 10}, 100)
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
df2.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_replace_chained_assignment():
|
||||
df = DataFrame({"a": [1, np.nan, 2], "b": 1})
|
||||
df_orig = df.copy()
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].replace(1, 100, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[["a"]].replace(1, 100, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_replace_listlike():
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
|
||||
result = df.replace([200, 201], [11, 11])
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
|
||||
result.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(df, df)
|
||||
|
||||
result = df.replace([200, 2], [10, 10])
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_replace_listlike_inplace():
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace([200, 2], [10, 11], inplace=True)
|
||||
assert np.shares_memory(get_array(df, "a"), arr)
|
||||
|
||||
view = df[:]
|
||||
df_orig = df.copy()
|
||||
df.replace([200, 3], [10, 11], inplace=True)
|
||||
assert not np.shares_memory(get_array(df, "a"), arr)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
142
venv/Lib/site-packages/pandas/tests/copy_view/test_setitem.py
Normal file
142
venv/Lib/site-packages/pandas/tests/copy_view/test_setitem.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Copy/view behaviour for the values that are set in a DataFrame
|
||||
|
||||
|
||||
def test_set_column_with_array():
|
||||
# Case: setting an array as a new column (df[col] = arr) copies that data
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
arr = np.array([1, 2, 3], dtype="int64")
|
||||
|
||||
df["c"] = arr
|
||||
|
||||
# the array data is copied
|
||||
assert not np.shares_memory(get_array(df, "c"), arr)
|
||||
# and thus modifying the array does not modify the DataFrame
|
||||
arr[0] = 0
|
||||
tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
|
||||
|
||||
|
||||
def test_set_column_with_series():
|
||||
# Case: setting a series as a new column (df[col] = s) copies that data
|
||||
# (with delayed copy with CoW)
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
ser = Series([1, 2, 3])
|
||||
|
||||
df["c"] = ser
|
||||
|
||||
assert np.shares_memory(get_array(df, "c"), get_array(ser))
|
||||
|
||||
# and modifying the series does not modify the DataFrame
|
||||
ser.iloc[0] = 0
|
||||
assert ser.iloc[0] == 0
|
||||
tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
|
||||
|
||||
|
||||
def test_set_column_with_index():
|
||||
# Case: setting an index as a new column (df[col] = idx) copies that data
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
idx = Index([1, 2, 3])
|
||||
|
||||
df["c"] = idx
|
||||
|
||||
# the index data is copied
|
||||
assert not np.shares_memory(get_array(df, "c"), idx.values)
|
||||
|
||||
idx = RangeIndex(1, 4)
|
||||
arr = idx.values
|
||||
|
||||
df["d"] = idx
|
||||
|
||||
assert not np.shares_memory(get_array(df, "d"), arr)
|
||||
|
||||
|
||||
def test_set_columns_with_dataframe():
|
||||
# Case: setting a DataFrame as new columns copies that data
|
||||
# (with delayed copy with CoW)
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
|
||||
|
||||
df[["c", "d"]] = df2
|
||||
|
||||
assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
||||
# and modifying the set DataFrame does not modify the original DataFrame
|
||||
df2.iloc[0, 0] = 0
|
||||
tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c"))
|
||||
|
||||
|
||||
def test_setitem_series_no_copy():
|
||||
# Case: setting a Series as column into a DataFrame can delay copying that data
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
rhs = Series([4, 5, 6])
|
||||
rhs_orig = rhs.copy()
|
||||
|
||||
# adding a new column
|
||||
df["b"] = rhs
|
||||
assert np.shares_memory(get_array(rhs), get_array(df, "b"))
|
||||
|
||||
df.iloc[0, 1] = 100
|
||||
tm.assert_series_equal(rhs, rhs_orig)
|
||||
|
||||
|
||||
def test_setitem_series_no_copy_single_block():
|
||||
# Overwriting an existing column that is a single block
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
rhs = Series([4, 5, 6])
|
||||
rhs_orig = rhs.copy()
|
||||
|
||||
df["a"] = rhs
|
||||
assert np.shares_memory(get_array(rhs), get_array(df, "a"))
|
||||
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_series_equal(rhs, rhs_orig)
|
||||
|
||||
|
||||
def test_setitem_series_no_copy_split_block():
|
||||
# Overwriting an existing column that is part of a larger block
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
rhs = Series([4, 5, 6])
|
||||
rhs_orig = rhs.copy()
|
||||
|
||||
df["b"] = rhs
|
||||
assert np.shares_memory(get_array(rhs), get_array(df, "b"))
|
||||
|
||||
df.iloc[0, 1] = 100
|
||||
tm.assert_series_equal(rhs, rhs_orig)
|
||||
|
||||
|
||||
def test_setitem_series_column_midx_broadcasting():
|
||||
# Setting a Series to multiple columns will repeat the data
|
||||
# (currently copying the data eagerly)
|
||||
df = DataFrame(
|
||||
[[1, 2, 3], [3, 4, 5]],
|
||||
columns=MultiIndex.from_arrays([["a", "a", "b"], [1, 2, 3]]),
|
||||
)
|
||||
rhs = Series([10, 11])
|
||||
df["a"] = rhs
|
||||
assert not np.shares_memory(get_array(rhs), df._get_column_array(0))
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_set_column_with_inplace_operator():
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
|
||||
# this should not raise any warning
|
||||
with tm.assert_produces_warning(None):
|
||||
df["a"] += 1
|
||||
|
||||
# when it is not in a chain, then it should produce a warning
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
ser = df["a"]
|
||||
ser += 1
|
||||
14
venv/Lib/site-packages/pandas/tests/copy_view/test_util.py
Normal file
14
venv/Lib/site-packages/pandas/tests/copy_view/test_util.py
Normal file
@@ -0,0 +1,14 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import DataFrame
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_get_array_numpy():
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df, "a"))
|
||||
|
||||
|
||||
def test_get_array_masked():
|
||||
df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df, "a"))
|
||||
30
venv/Lib/site-packages/pandas/tests/copy_view/util.py
Normal file
30
venv/Lib/site-packages/pandas/tests/copy_view/util.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
from pandas.core.arrays import BaseMaskedArray
|
||||
|
||||
|
||||
def get_array(obj, col=None):
|
||||
"""
|
||||
Helper method to get array for a DataFrame column or a Series.
|
||||
|
||||
Equivalent of df[col].values, but without going through normal getitem,
|
||||
which triggers tracking references / CoW (and we might be testing that
|
||||
this is done by some other operation).
|
||||
"""
|
||||
if isinstance(obj, Index):
|
||||
arr = obj._values
|
||||
elif isinstance(obj, Series) and (col is None or obj.name == col):
|
||||
arr = obj._values
|
||||
else:
|
||||
assert col is not None
|
||||
icol = obj.columns.get_loc(col)
|
||||
assert isinstance(icol, int)
|
||||
arr = obj._get_column_array(icol)
|
||||
if isinstance(arr, BaseMaskedArray):
|
||||
return arr._data
|
||||
elif isinstance(arr, Categorical):
|
||||
return arr
|
||||
return getattr(arr, "_ndarray", arr)
|
||||
Reference in New Issue
Block a user