Skip to content

Commit 4d23ab7

Browse files
committed
ENH: Resolve Future Warnings from pandas v2.1.0
closes #713
1 parent faed66f commit 4d23ab7

File tree

9 files changed

+66
-39
lines changed

9 files changed

+66
-39
lines changed

plotnine/coords/coord.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,18 @@ def munch(
167167
) -> pd.DataFrame:
168168
ranges = self.backtransform_range(panel_params)
169169

170-
data.loc[data["x"] == -np.inf, "x"] = ranges.x[0]
171-
data.loc[data["x"] == np.inf, "x"] = ranges.x[1]
172-
data.loc[data["y"] == -np.inf, "y"] = ranges.y[0]
173-
data.loc[data["y"] == np.inf, "y"] = ranges.y[1]
170+
x_neginf = np.isneginf(data["x"])
171+
x_posinf = np.isposinf(data["x"])
172+
y_neginf = np.isneginf(data["y"])
173+
y_posinf = np.isposinf(data["y"])
174+
if x_neginf.any():
175+
data.loc[x_neginf, "x"] = ranges.x[0]
176+
if x_posinf.any():
177+
data.loc[x_posinf, "x"] = ranges.x[1]
178+
if y_neginf.any():
179+
data.loc[y_neginf, "y"] = ranges.y[0]
180+
if y_posinf.any():
181+
data.loc[y_posinf, "y"] = ranges.y[1]
174182

175183
dist = self.distance(data["x"], data["y"], panel_params)
176184
bool_idx = (

plotnine/geoms/geom.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ def draw_layer(
286286
includes the stacking order of the layer in
287287
the plot (*zorder*)
288288
"""
289-
for pid, pdata in data.groupby("PANEL"):
289+
for pid, pdata in data.groupby("PANEL", observed=True):
290290
if len(pdata) == 0:
291291
continue
292292
ploc = pdata["PANEL"].iat[0] - 1

plotnine/mapping/evaluation.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,10 @@ def reorder(x, y, fun=np.median, ascending=True):
160160
if len(x) != len(y):
161161
raise ValueError(f"Lengths are not equal. {len(x)=}, {len(x)=}")
162162
summary = (
163-
pd.Series(y).groupby(x).apply(fun).sort_values(ascending=ascending)
163+
pd.Series(y)
164+
.groupby(x, observed=True)
165+
.apply(fun)
166+
.sort_values(ascending=ascending)
164167
)
165168
cats = summary.index.to_list()
166169
return pd.Categorical(x, categories=cats)

plotnine/stats/binning.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414
from plotnine.typing import FloatArray, TupleFloat2
1515

1616

17-
__all__ = [
17+
__all__ = (
1818
"freedman_diaconis_bins",
1919
"breaks_from_bins",
2020
"breaks_from_binwidth",
2121
"assign_bins",
2222
"fuzzybreaks",
23-
]
23+
)
2424

2525

2626
def freedman_diaconis_bins(a):
@@ -37,7 +37,7 @@ def freedman_diaconis_bins(a):
3737
if h == 0:
3838
bins = np.ceil(np.sqrt(a.size))
3939
else:
40-
bins = np.ceil((np.nanmax(a) - np.nanmin(a)) / h)
40+
bins = np.ceil((np.nanmax(a) - np.nanmin(a)) / h) # type: ignore
4141

4242
return int(bins)
4343

@@ -168,10 +168,10 @@ def assign_bins(x, breaks, weight=None, pad=False, closed="right"):
168168
# - the bins to which each x is assigned
169169
# - the weight of each x value
170170
# Then create a weighted frequency table
171-
df = pd.DataFrame({"bin_idx": bin_idx, "weight": weight})
172-
wftable = df.pivot_table("weight", index=["bin_idx"], aggfunc=np.sum)[
173-
"weight"
174-
]
171+
bins_long = pd.DataFrame({"bin_idx": bin_idx, "weight": weight})
172+
wftable = bins_long.pivot_table(
173+
"weight", index=["bin_idx"], aggfunc="sum"
174+
)["weight"]
175175

176176
# Empty bins get no value in the computed frequency table.
177177
# We need to add the zeros and since frequency table is a
@@ -279,7 +279,7 @@ def fuzzybreaks(
279279
binwidth = (srange[1] - srange[0]) / bins
280280

281281
if boundary is None or np.isnan(boundary):
282-
boundary = round_any(srange[0], binwidth, np.floor) # pyright: ignore
282+
boundary = round_any(srange[0], binwidth, np.floor)
283283

284284
if recompute_bins:
285285
bins = int(np.ceil((srange[1] - boundary) / binwidth))

plotnine/stats/density.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
"""
1010

1111
import numpy as np
12-
import pandas.api.types as pdtypes
12+
13+
from ..utils import array_kind
1314

1415

1516
def kde_scipy(data, grid, **kwargs):
@@ -214,13 +215,10 @@ def get_var_type(col):
214215
The origin of the character codes is
215216
:class:`statsmodels.nonparametric.kernel_density.KDEMultivariate`.
216217
"""
217-
if pdtypes.is_numeric_dtype(col):
218-
# continuous
218+
if array_kind.continuous(col):
219219
return "c"
220-
elif pdtypes.is_categorical_dtype(col):
221-
# ordered or unordered
222-
return "o" if col.cat.ordered else "u"
220+
elif array_kind.discrete(col):
221+
return "o" if array_kind.ordinal else "u"
223222
else:
224-
# unordered if unsure, e.g string columns that
225-
# are not categorical
223+
# unordered if unsure
226224
return "u"

plotnine/stats/stat_bin_2d.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,13 @@ def compute_group(cls, data, scales, **params):
104104
xbins = pd.cut(
105105
x,
106106
bins=xbreaks, # pyright: ignore
107-
labels=False, # pyright: ignore
107+
labels=False,
108108
right=True,
109109
)
110110
ybins = pd.cut(
111111
y,
112112
bins=ybreaks, # pyright: ignore
113-
labels=False, # pyright: ignore
113+
labels=False,
114114
right=True,
115115
)
116116

@@ -123,15 +123,15 @@ def compute_group(cls, data, scales, **params):
123123
ybreaks[0] -= np.diff(np.diff(ybreaks))[0]
124124
xbreaks[0] -= np.diff(np.diff(xbreaks))[0]
125125

126-
df = pd.DataFrame(
126+
bins_grid_long = pd.DataFrame(
127127
{
128128
"xbins": xbins,
129129
"ybins": ybins,
130130
"weight": weight,
131131
}
132132
)
133-
table = df.pivot_table(
134-
"weight", index=["xbins", "ybins"], aggfunc=np.sum
133+
table = bins_grid_long.pivot_table(
134+
"weight", index=["xbins", "ybins"], aggfunc="sum"
135135
)["weight"]
136136

137137
# create rectangles

plotnine/stats/stat_boxplot.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import numpy as np
22
import pandas as pd
3-
import pandas.api.types as pdtypes
43

54
from ..doctools import document
65
from ..utils import resolution
@@ -97,7 +96,7 @@ def compute_group(cls, data, scales, **params):
9796
else:
9897
width = params["width"]
9998

100-
if pdtypes.is_categorical_dtype(data["x"]):
99+
if isinstance(data["x"].dtype, pd.CategoricalDtype):
101100
x = data["x"].iloc[0]
102101
else:
103102
x = np.mean([data["x"].min(), data["x"].max()])

plotnine/stats/stat_count.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,11 @@ def compute_group(cls, data, scales, **params):
6565

6666
weight = data.get("weight", [1] * len(x)) # pyright: ignore
6767
width = params["width"]
68-
df = pd.DataFrame({"weight": weight, "x": x})
68+
xdata_long = pd.DataFrame({"x": x, "weight": weight})
6969
# weighted frequency count
70-
count = df.pivot_table("weight", index=["x"], aggfunc=np.sum)["weight"]
70+
count = xdata_long.pivot_table("weight", index=["x"], aggfunc="sum")[
71+
"weight"
72+
]
7173
x = count.index
7274
count = count.to_numpy()
7375
return pd.DataFrame(

plotnine/utils.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
import numpy as np
1616
import pandas as pd
17-
import pandas.api.types as pdtypes
1817

1918
# missing in type stubs
2019
from pandas.core.groupby import DataFrameGroupBy # type: ignore
@@ -207,14 +206,14 @@ def add_margins(
207206
categories = {}
208207
for v in itertools.chain(*vars):
209208
col = df[v]
210-
if not pdtypes.is_categorical_dtype(df[v]):
209+
if not isinstance(df[v].dtype, pd.CategoricalDtype):
211210
col = pd.Categorical(df[v])
212211
categories[v] = col.categories
213212
if "(all)" not in categories[v]:
214213
categories[v] = categories[v].insert(len(categories[v]), "(all)")
215214

216215
for v in merged.columns.intersection(list(categories.keys())):
217-
merged[v] = merged[v].astype(pdtypes.CategoricalDtype(categories[v]))
216+
merged[v] = merged[v].astype(pd.CategoricalDtype(categories[v]))
218217

219218
return merged
220219

@@ -286,9 +285,7 @@ def _id_var(x: pd.Series[Any], drop: bool = False) -> list[int]:
286285
if len(x) == 0:
287286
return []
288287

289-
categorical = pdtypes.is_categorical_dtype(x)
290-
291-
if categorical:
288+
if array_kind.categorical(x):
292289
if drop:
293290
x = x.cat.remove_unused_categories()
294291
lst = list(x.cat.codes + 1)
@@ -593,7 +590,7 @@ def groupby_apply(
593590
axis = 0
594591

595592
lst = []
596-
for _, d in df.groupby(cols):
593+
for _, d in df.groupby(cols, observed=True):
597594
# function fn should be free to modify dataframe d, therefore
598595
# do not mark d as a slice of df i.e no SettingWithCopyWarning
599596
lst.append(func(d, *args, **kwargs))
@@ -1180,10 +1177,30 @@ def ordinal(arr):
11801177
out : bool
11811178
Whether array `arr` is an ordered categorical
11821179
"""
1183-
if pdtypes.is_categorical_dtype(arr):
1180+
if isinstance(arr.dtype, pd.CategoricalDtype):
11841181
return arr.cat.ordered
11851182
return False
11861183

1184+
@staticmethod
1185+
def categorical(arr):
1186+
"""
1187+
Return True if array is a categorical
1188+
1189+
Parameters
1190+
----------
1191+
arr : list-like
1192+
List
1193+
1194+
Returns
1195+
-------
1196+
bool
1197+
Whether array `arr` is a categorical
1198+
"""
1199+
if not hasattr(arr, "dtype"):
1200+
return False
1201+
1202+
return isinstance(arr.dtype, pd.CategoricalDtype)
1203+
11871204

11881205
def log(x, base=None):
11891206
"""

0 commit comments

Comments
 (0)