Source code for actuarialpy.weighted
r"""Explicit-weight aggregation for quantities that cannot be summed.
Additive amounts (claims, premium, exposure) roll up by summation, and
ratios of them roll up as ratios of sums -- that is
:func:`actuarialpy.summarize_experience`'s contract. Quantities that are
*already* rates or ratios at the row level (rate actions, trend
assumptions, persistency) cannot be summed and must be averaged with an
explicit weight:
.. math::
\bar{x}_w = \frac{\sum_i w_i x_i}{\sum_i w_i}.
The weight is a **required** argument everywhere in this module. An
unweighted mean of rate actions silently equal-weights a small risk with a
large one; forcing the caller to name the weight (premium, exposure, ...)
makes that choice visible and reviewable.
"""
from __future__ import annotations
from collections.abc import Iterable
from typing import Any
import numpy as np
import pandas as pd
from actuarialpy.columns import as_list, validate_columns
[docs]
def weighted_mean(values: Any, weights: Any, *, skipna: bool = False) -> float:
"""Weighted mean with validated, explicit weights.
Parameters
----------
values : array-like
Row-level rates or ratios to average.
weights : array-like
Non-negative, finite weights, same length as ``values``, with a
positive total.
skipna : bool
When True, pairs where the value is NaN are dropped before
averaging. Default False: a NaN value propagates to the result, so
missing data surfaces instead of silently shrinking the base.
"""
v = np.asarray(values, dtype=float)
w = np.asarray(weights, dtype=float)
if v.shape != w.shape:
raise ValueError(
f"values and weights must have the same shape, got {v.shape} and {w.shape}"
)
if v.ndim != 1:
v = v.ravel()
w = w.ravel()
if not np.all(np.isfinite(w)):
raise ValueError("weights must be finite")
if np.any(w < 0):
raise ValueError("weights must be non-negative")
if skipna:
keep = ~np.isnan(v)
v, w = v[keep], w[keep]
total = w.sum()
if not total > 0:
raise ValueError("weights must sum to a positive total")
return float((v * w).sum() / total)
[docs]
def weighted_summary(
df: pd.DataFrame,
*,
value_cols: str | Iterable[str],
weight_col: str,
groupby: str | Iterable[str] | None = None,
skipna: bool = False,
) -> pd.DataFrame:
"""Grouped weighted means of one or more value columns.
Each value column ``x`` produces ``x_weighted`` =
:math:`\\sum wx / \\sum w` per group; the weight total is reported as
``{weight_col}_total`` so the base of every average is visible.
Typical use: premium-weighted rate actions by cohort, exposure-weighted
persistency by segment.
"""
values = as_list(value_cols)
groups = as_list(groupby)
validate_columns(df, groups + values + [weight_col])
work = df[groups + values + [weight_col]].copy()
w = work[weight_col].to_numpy(dtype=float)
if not np.all(np.isfinite(w)):
raise ValueError(f"{weight_col!r} must be finite")
if np.any(w < 0):
raise ValueError(f"{weight_col!r} must be non-negative")
for col in values:
mask = work[col].isna() if skipna else pd.Series(False, index=work.index)
work[f"_wx_{col}"] = work[col].fillna(0.0) * work[weight_col]
work[f"_w_{col}"] = work[weight_col].where(~mask, 0.0)
if groups:
agg = work.groupby(groups, dropna=False, as_index=False).sum(numeric_only=True)
else:
num_cols = [c for c in work.columns if c not in groups]
agg = pd.DataFrame({col: [work[col].sum()] for col in num_cols})
out = agg[groups].copy() if groups else pd.DataFrame(index=[0])
for col in values:
weight_sum = agg[f"_w_{col}"]
if (weight_sum <= 0).any():
raise ValueError(
f"weights sum to zero for at least one group when averaging {col!r}"
)
out[f"{col}_weighted"] = agg[f"_wx_{col}"] / weight_sum
if not skipna:
nan_groups = (
work.assign(_isna=work[col].isna())
.groupby(groups, dropna=False)["_isna"]
.any()
.to_numpy()
if groups
else np.array([work[col].isna().any()])
)
out.loc[nan_groups, f"{col}_weighted"] = np.nan
out[f"{weight_col}_total"] = agg[weight_col].to_numpy()
return out.reset_index(drop=True)