Source code for actuarialpy.weighted

r"""Explicit-weight aggregation for quantities that cannot be summed.

Additive amounts (claims, premium, exposure) roll up by summation, and
ratios of them roll up as ratios of sums -- that is
:func:`actuarialpy.summarize_experience`'s contract. Quantities that are
*already* rates or ratios at the row level (rate actions, trend
assumptions, persistency) cannot be summed and must be averaged with an
explicit weight:

.. math::

    \bar{x}_w = \frac{\sum_i w_i x_i}{\sum_i w_i}.

The weight is a **required** argument everywhere in this module. An
unweighted mean of rate actions silently equal-weights a small risk with a
large one; forcing the caller to name the weight (premium, exposure, ...)
makes that choice visible and reviewable.
"""

from __future__ import annotations

from collections.abc import Iterable
from typing import Any

import numpy as np
import pandas as pd

from actuarialpy.columns import as_list, validate_columns


[docs] def weighted_mean(values: Any, weights: Any, *, skipna: bool = False) -> float: """Weighted mean with validated, explicit weights. Parameters ---------- values : array-like Row-level rates or ratios to average. weights : array-like Non-negative, finite weights, same length as ``values``, with a positive total. skipna : bool When True, pairs where the value is NaN are dropped before averaging. Default False: a NaN value propagates to the result, so missing data surfaces instead of silently shrinking the base. """ v = np.asarray(values, dtype=float) w = np.asarray(weights, dtype=float) if v.shape != w.shape: raise ValueError( f"values and weights must have the same shape, got {v.shape} and {w.shape}" ) if v.ndim != 1: v = v.ravel() w = w.ravel() if not np.all(np.isfinite(w)): raise ValueError("weights must be finite") if np.any(w < 0): raise ValueError("weights must be non-negative") if skipna: keep = ~np.isnan(v) v, w = v[keep], w[keep] total = w.sum() if not total > 0: raise ValueError("weights must sum to a positive total") return float((v * w).sum() / total)
[docs] def weighted_summary( df: pd.DataFrame, *, value_cols: str | Iterable[str], weight_col: str, groupby: str | Iterable[str] | None = None, skipna: bool = False, ) -> pd.DataFrame: """Grouped weighted means of one or more value columns. Each value column ``x`` produces ``x_weighted`` = :math:`\\sum wx / \\sum w` per group; the weight total is reported as ``{weight_col}_total`` so the base of every average is visible. Typical use: premium-weighted rate actions by cohort, exposure-weighted persistency by segment. """ values = as_list(value_cols) groups = as_list(groupby) validate_columns(df, groups + values + [weight_col]) work = df[groups + values + [weight_col]].copy() w = work[weight_col].to_numpy(dtype=float) if not np.all(np.isfinite(w)): raise ValueError(f"{weight_col!r} must be finite") if np.any(w < 0): raise ValueError(f"{weight_col!r} must be non-negative") for col in values: mask = work[col].isna() if skipna else pd.Series(False, index=work.index) work[f"_wx_{col}"] = work[col].fillna(0.0) * work[weight_col] work[f"_w_{col}"] = work[weight_col].where(~mask, 0.0) if groups: agg = work.groupby(groups, dropna=False, as_index=False).sum(numeric_only=True) else: num_cols = [c for c in work.columns if c not in groups] agg = pd.DataFrame({col: [work[col].sum()] for col in num_cols}) out = agg[groups].copy() if groups else pd.DataFrame(index=[0]) for col in values: weight_sum = agg[f"_w_{col}"] if (weight_sum <= 0).any(): raise ValueError( f"weights sum to zero for at least one group when averaging {col!r}" ) out[f"{col}_weighted"] = agg[f"_wx_{col}"] / weight_sum if not skipna: nan_groups = ( work.assign(_isna=work[col].isna()) .groupby(groups, dropna=False)["_isna"] .any() .to_numpy() if groups else np.array([work[col].isna().any()]) ) out.loc[nan_groups, f"{col}_weighted"] = np.nan out[f"{weight_col}_total"] = agg[weight_col].to_numpy() return out.reset_index(drop=True)