Source code for actuarialpy.rolling

"""Rolling-period actuarial summaries."""

from __future__ import annotations

from collections.abc import Iterable

import pandas as pd

from actuarialpy.columns import as_list, per_exposure_name, validate_columns
from actuarialpy.experience import summarize_experience
from actuarialpy.metrics import loss_ratio, per_exposure


[docs] def rolling_summary( df: pd.DataFrame, *, date_col: str, window: int = 12, groupby: str | Iterable[str] | None = None, expense_cols: str | Iterable[str], revenue_cols: str | Iterable[str], exposure_cols: str | Iterable[str] | None = None, min_periods: int | None = None, drop_incomplete: bool = True, ratio_col: str = "loss_ratio", ) -> pd.DataFrame: """Calculate rolling sums and ratios by period and optional grouping. The output includes ``period_start`` and ``period_end``. By default only complete rolling windows are returned; for a 12-month window, the first output row appears after 12 months of data are available. """ if window <= 0: raise ValueError("window must be positive") groups = as_list(groupby) expenses = as_list(expense_cols) revenues = as_list(revenue_cols) exposures = as_list(exposure_cols) validate_columns(df, groups + [date_col] + expenses + revenues + exposures) min_periods = window if min_periods is None else min_periods base = summarize_experience( df, groupby=groups + [date_col], expense_cols=expenses, revenue_cols=revenues, exposure_cols=exposures, ratio_col="period_ratio", ).sort_values(groups + [date_col] if groups else [date_col]) amount_cols = ["total_expense", "total_revenue"] + exposures pieces = [] iterator = base.groupby(groups, dropna=False, sort=False) if groups else [((), base)] for _, part in iterator: part = part.sort_values(date_col).copy().reset_index(drop=True) rolled = part[amount_cols].rolling(window=window, min_periods=min_periods).sum() months_available = part["total_expense"].rolling(window=window, min_periods=1).count().astype(int) out = part[groups].copy() if groups else pd.DataFrame(index=part.index) dates = pd.to_datetime(part[date_col]) starts = [] for i in range(len(part)): start_i = max(0, i - window + 1) starts.append(dates.iloc[start_i]) out["period_start"] = starts out["period_end"] = dates out["months_available"] = months_available.values for col in amount_cols: out[col] = rolled[col].values out[ratio_col] = loss_ratio(out["total_expense"], out["total_revenue"]) for exposure in exposures: expense_per = per_exposure_name("total_expense", exposure) revenue_per = per_exposure_name("total_revenue", exposure) out[expense_per] = per_exposure(out["total_expense"], out[exposure]) out[revenue_per] = per_exposure(out["total_revenue"], out[exposure]) if drop_incomplete: out = out[out["months_available"] >= window].copy() pieces.append(out) if not pieces: return pd.DataFrame() result = pd.concat(pieces, ignore_index=True) if drop_incomplete: result = result.drop(columns=["months_available"]) return result