r"""Underwriting income statement: the two-tier margin view.
Underwriting results are reported in two tiers, generically across lines of
business. **Gross margin** is revenue less loss (claim / benefit) expense and
excludes operating expense -- which is also why operating expense never
enters a loss ratio. **Gain / (loss)** is gross margin less operating
expense: the underwriting result.
.. math::
\text{total revenue} &= \textstyle\sum \text{revenue components
(premium, refunds, recasts, ...)} \\
\text{total loss} &= \textstyle\sum \text{loss components
(claims by category, benefits, ...)} \\
\text{gross margin} &= \text{total revenue} - \text{total loss} \\
\text{gain / (loss)} &= \text{gross margin} - \text{total expense}
The three ratios mirror :func:`actuarialpy.loss_ratio`,
:func:`actuarialpy.expense_ratio`, and :func:`actuarialpy.combined_ratio`.
Component labels and ratio names are the caller's vocabulary: the library
only sums the components, and domain naming (a health shop's ``mlr``, a life
shop's ``benefit_ratio``) comes from the ``profile`` / ``labels`` options on
the output views -- never from the calculation itself.
Ratio conventions differ across shops -- and often across metrics on the same
exhibit -- so denominators here are **explicit parameters**, never
assumptions:
* ``loss_ratio``: total loss / *loss-ratio denominator*. Default
``"total_revenue"`` (revenue net of refunds and other offsets).
* ``expense_ratio``: total expense / *expense-ratio denominator*. Default
``"premium"``: the gross premium component named by ``premium_label``,
before refunds.
* ``gain ratio``: gain / *gain denominator*. Default ``"total_revenue"``.
With mixed denominators the identity ``gain ratio = 1 - combined ratio``
holds only approximately; it is exact when every denominator is the same
series. :meth:`UnderwritingSummary.reconciliation` reports the difference so
the convention drift is visible instead of silent.
These are management / pricing metrics. Regulated ratio calculations (for
example, a rebate loss ratio prescribed by statute) have their own numerator
and denominator adjustments and are out of scope for this module.
"""
from __future__ import annotations
from collections.abc import Iterable, Mapping
from dataclasses import dataclass, field
from typing import Any
import pandas as pd
from actuarialpy.columns import as_list, per_exposure_name, sum_columns, validate_columns
from actuarialpy.metrics import per_exposure, safe_divide
from actuarialpy.profiles import apply_profile_labels, get_profile_defaults
_DENOMINATORS = ("total_revenue", "premium")
def _validate_denominator(value: str, name: str) -> str:
if value not in _DENOMINATORS:
raise ValueError(
f"{name} must be one of {_DENOMINATORS}, got {value!r}"
)
return value
def _ratio_rename(profile: str | None, labels: Mapping[str, str] | None) -> dict[str, str]:
"""Output renames: the profile's ratio name for ``loss_ratio``, then labels."""
rename: dict[str, str] = {}
ratio_col = get_profile_defaults(profile).get("ratio_col")
if ratio_col is not None and ratio_col != "loss_ratio":
rename["loss_ratio"] = ratio_col
rename.update(dict(labels or {}))
return rename
[docs]
@dataclass
class UnderwritingSummary:
"""Two-tier underwriting income statement for a single entity or period.
Parameters
----------
revenue : Mapping[str, float]
Labeled revenue components (e.g. ``{"premium": ..., "refund": ...}``).
Offsets such as refunds should be signed (negative). The library
never interprets the labels; it only sums them.
losses : Mapping[str, float]
Labeled loss components -- claim or benefit expense by whatever
categories the caller uses.
expenses : Mapping[str, float] | float
Operating expense, itemized or as a single amount. Default 0.
exposure : float, optional
Exposure units (member months, policy months, earned exposures, ...)
for per-exposure figures. Required only when a ``*_per_exposure``
property is accessed.
premium_label : str
Which revenue component is the gross premium, used when a
denominator is ``"premium"``. Default ``"premium"``.
loss_ratio_denominator, expense_ratio_denominator, gain_denominator : str
``"total_revenue"`` or ``"premium"``. Defaults follow the common
exhibit convention: loss and gain ratios over total revenue, expense
ratio over gross premium.
Examples
--------
>>> uw = UnderwritingSummary(
... revenue={"premium": 1_200_000.0, "refund": -4_000.0},
... losses={"claims": 1_090_000.0},
... expenses=110_000.0,
... exposure=3_000.0,
... )
>>> round(uw.gross_margin, 0)
106000.0
>>> round(uw.gain, 0)
-4000.0
"""
revenue: Mapping[str, float]
losses: Mapping[str, float]
expenses: Mapping[str, float] | float = 0.0
exposure: float | None = None
premium_label: str = "premium"
loss_ratio_denominator: str = "total_revenue"
expense_ratio_denominator: str = "premium"
gain_denominator: str = "total_revenue"
_expense_items: Mapping[str, float] = field(init=False, repr=False)
def __post_init__(self) -> None:
if not self.revenue:
raise ValueError("revenue must contain at least one component")
if not self.losses:
raise ValueError("losses must contain at least one component")
if isinstance(self.expenses, Mapping):
self._expense_items = dict(self.expenses)
else:
self._expense_items = {"expense": float(self.expenses)}
for name in (
"loss_ratio_denominator",
"expense_ratio_denominator",
"gain_denominator",
):
_validate_denominator(getattr(self, name), name)
uses_premium = "premium" in (
self.loss_ratio_denominator,
self.expense_ratio_denominator,
self.gain_denominator,
)
if uses_premium and self.premium_label not in self.revenue:
raise ValueError(
f"premium_label {self.premium_label!r} is not a revenue "
f"component; available: {sorted(self.revenue)}"
)
if self.exposure is not None and not self.exposure > 0:
raise ValueError(
f"exposure must be positive when provided, got {self.exposure!r}"
)
[docs]
@classmethod
def from_per_exposure(
cls,
*,
revenue_per_exposure: Mapping[str, float],
loss_per_exposure: Mapping[str, float],
expense_per_exposure: Mapping[str, float] | float = 0.0,
exposure: float,
**kwargs: Any,
) -> "UnderwritingSummary":
"""Build a summary from per-exposure components and total exposure.
Forecast exhibits are usually stated per exposure unit (PMPM in a
health shop, per policy month in life); this converts each component
to amounts by ``exposure`` so totals, per-exposure figures, and
ratios all come from one set of inputs.
"""
if not exposure > 0:
raise ValueError(f"exposure must be positive, got {exposure!r}")
units = float(exposure)
if isinstance(expense_per_exposure, Mapping):
expenses: Mapping[str, float] | float = {
k: v * units for k, v in expense_per_exposure.items()
}
else:
expenses = float(expense_per_exposure) * units
return cls(
revenue={k: v * units for k, v in revenue_per_exposure.items()},
losses={k: v * units for k, v in loss_per_exposure.items()},
expenses=expenses,
exposure=units,
**kwargs,
)
# ----- totals ----- #
@property
def total_revenue(self) -> float:
return float(sum(self.revenue.values()))
@property
def total_loss(self) -> float:
return float(sum(self.losses.values()))
@property
def total_expense(self) -> float:
return float(sum(self._expense_items.values()))
@property
def gross_margin(self) -> float:
"""Tier one: total revenue less loss expense (operating expense excluded)."""
return self.total_revenue - self.total_loss
@property
def gain(self) -> float:
"""Tier two: gross margin less operating expense."""
return self.gross_margin - self.total_expense
# ----- ratios (explicit denominators) ----- #
def _denominator(self, which: str) -> float:
if which == "total_revenue":
return self.total_revenue
return float(self.revenue[self.premium_label])
@property
def loss_ratio(self) -> float:
"""Loss expense over the ``loss_ratio_denominator``."""
return float(
safe_divide(self.total_loss, self._denominator(self.loss_ratio_denominator))
)
@property
def expense_ratio(self) -> float:
"""Operating expense over the ``expense_ratio_denominator``."""
return float(
safe_divide(
self.total_expense, self._denominator(self.expense_ratio_denominator)
)
)
@property
def combined_ratio(self) -> float:
"""Loss ratio plus expense ratio, each on its own denominator."""
return self.loss_ratio + self.expense_ratio
@property
def gross_margin_ratio(self) -> float:
"""Gross margin over the ``loss_ratio_denominator`` (its complement)."""
return float(
safe_divide(
self.gross_margin, self._denominator(self.loss_ratio_denominator)
)
)
@property
def gain_ratio(self) -> float:
"""Gain / (loss) over the ``gain_denominator``."""
return float(
safe_divide(self.gain, self._denominator(self.gain_denominator))
)
[docs]
def reconciliation(self) -> float:
"""``gain_ratio - (1 - combined_ratio)``: the mixed-denominator gap.
Zero when every denominator is the same series; otherwise the size
of the drift introduced by quoting the loss, expense, and gain
ratios over different bases. Useful as an exhibit footnote or a
data-quality check.
"""
return self.gain_ratio - (1.0 - self.combined_ratio)
# ----- per exposure ----- #
def _require_exposure(self) -> float:
if self.exposure is None:
raise ValueError(
"exposure is required for per-exposure figures; pass it to "
"the constructor or use from_per_exposure(...)"
)
return float(self.exposure)
@property
def revenue_per_exposure(self) -> float:
return self.total_revenue / self._require_exposure()
@property
def loss_per_exposure(self) -> float:
return self.total_loss / self._require_exposure()
@property
def expense_per_exposure(self) -> float:
return self.total_expense / self._require_exposure()
@property
def gross_margin_per_exposure(self) -> float:
return self.gross_margin / self._require_exposure()
@property
def gain_per_exposure(self) -> float:
return self.gain / self._require_exposure()
# ----- views ----- #
[docs]
def to_frame(
self,
*,
profile: str | None = None,
labels: Mapping[str, str] | None = None,
) -> pd.DataFrame:
"""One tidy row of every total and ratio (per-exposure when given).
``profile`` renames only the loss-ratio column to the domain's ratio
name (``"health"`` -> ``mlr``, ``"life"`` -> ``benefit_ratio``);
``labels`` renames any output column. Calculations are unaffected.
"""
row: dict[str, float] = {
"total_revenue": self.total_revenue,
"total_loss": self.total_loss,
"total_expense": self.total_expense,
"gross_margin": self.gross_margin,
"gain": self.gain,
"loss_ratio": self.loss_ratio,
"expense_ratio": self.expense_ratio,
"combined_ratio": self.combined_ratio,
"gross_margin_ratio": self.gross_margin_ratio,
"gain_ratio": self.gain_ratio,
}
if self.exposure is not None:
row["exposure"] = float(self.exposure)
row["revenue_per_exposure"] = self.revenue_per_exposure
row["loss_per_exposure"] = self.loss_per_exposure
row["expense_per_exposure"] = self.expense_per_exposure
row["gross_margin_per_exposure"] = self.gross_margin_per_exposure
row["gain_per_exposure"] = self.gain_per_exposure
frame = pd.DataFrame([row])
return frame.rename(columns=_ratio_rename(profile, labels))
[docs]
def statement(
self,
*,
profile: str | None = None,
labels: Mapping[str, str] | None = None,
) -> pd.Series:
"""Exhibit-shaped Series: components, subtotals, tiers, then ratios."""
lines: dict[str, float] = {}
for label, value in self.revenue.items():
lines[label] = float(value)
lines["total_revenue"] = self.total_revenue
for label, value in self.losses.items():
lines[label] = float(value)
lines["total_loss"] = self.total_loss
lines["loss_ratio"] = self.loss_ratio
lines["gross_margin"] = self.gross_margin
for label, value in self._expense_items.items():
lines[label] = float(value)
lines["total_expense"] = self.total_expense
lines["expense_ratio"] = self.expense_ratio
lines["gain"] = self.gain
lines["gain_ratio"] = self.gain_ratio
series = pd.Series(lines, name="statement")
return series.rename(index=_ratio_rename(profile, labels))
[docs]
def underwriting_summary(
df: pd.DataFrame,
*,
groupby: str | Iterable[str] | None = None,
revenue_cols: str | Iterable[str],
loss_cols: str | Iterable[str],
expense_cols: str | Iterable[str],
exposure_col: str | None = None,
premium_col: str | None = None,
loss_ratio_denominator: str = "total_revenue",
expense_ratio_denominator: str = "premium",
gain_denominator: str = "total_revenue",
profile: str | None = None,
labels: dict[str, str] | None = None,
) -> pd.DataFrame:
"""Grouped two-tier underwriting summary from a tidy table.
Component columns are **summed first** and every ratio is computed on the
aggregated totals (ratio of sums, never an average of row-level ratios) --
the same contract as :func:`actuarialpy.summarize_experience`.
Parameters
----------
df : pd.DataFrame
One row per entity / period at whatever grain is being rolled up.
groupby : str | Iterable[str], optional
Grouping columns; omit for a single all-rows summary.
revenue_cols, loss_cols, expense_cols : str | Iterable[str]
Component columns for each tier. Revenue offsets (refunds) should be
signed.
exposure_col : str, optional
Exposure column; adds ``{amount}_per_{exposure_col}`` output columns.
Domain-style names (a health shop's ``_pmpm``) are applied via
``labels``, never inferred from the column name.
premium_col : str, optional
Gross premium column, required when any denominator is
``"premium"``.
loss_ratio_denominator, expense_ratio_denominator, gain_denominator : str
``"total_revenue"`` or ``"premium"``; see the module docstring for
the convention discussion.
profile : str, optional
Renames only the loss-ratio column to the domain's ratio name
(``"health"`` -> ``mlr``, ``"life"`` -> ``benefit_ratio``).
labels : dict, optional
Explicit output column renames, applied after ``profile``.
Returns
-------
pd.DataFrame
Group keys, component sums, ``total_revenue``, ``total_loss``,
``total_expense``, ``gross_margin``, ``gain``, the three ratios plus
``gross_margin_ratio`` and ``gain_ratio``, and per-exposure columns
when ``exposure_col`` is given.
"""
groups = as_list(groupby)
revenues = as_list(revenue_cols)
losses = as_list(loss_cols)
expenses = as_list(expense_cols)
for name, value in (
("loss_ratio_denominator", loss_ratio_denominator),
("expense_ratio_denominator", expense_ratio_denominator),
("gain_denominator", gain_denominator),
):
_validate_denominator(value, name)
uses_premium = "premium" in (
loss_ratio_denominator,
expense_ratio_denominator,
gain_denominator,
)
if uses_premium and premium_col is None:
raise ValueError(
'premium_col is required when any denominator is "premium"'
)
amount_cols = list(dict.fromkeys(revenues + losses + expenses))
if premium_col is not None:
validate_columns(df, [premium_col])
if premium_col not in amount_cols:
amount_cols.append(premium_col)
exposures = [exposure_col] if exposure_col is not None else []
validate_columns(df, groups + amount_cols + exposures)
if groups:
summary = (
df[groups + amount_cols + exposures]
.groupby(groups, dropna=False, as_index=False)
.sum(numeric_only=True)
)
else:
summary = pd.DataFrame(
{col: [df[col].sum()] for col in amount_cols + exposures}
)
summary["total_revenue"] = sum_columns(summary, revenues)
summary["total_loss"] = sum_columns(summary, losses)
summary["total_expense"] = sum_columns(summary, expenses)
summary["gross_margin"] = summary["total_revenue"] - summary["total_loss"]
summary["gain"] = summary["gross_margin"] - summary["total_expense"]
def _denom(which: str) -> pd.Series:
if which == "total_revenue":
return summary["total_revenue"]
return summary[premium_col]
summary["loss_ratio"] = safe_divide(
summary["total_loss"], _denom(loss_ratio_denominator)
)
summary["expense_ratio"] = safe_divide(
summary["total_expense"], _denom(expense_ratio_denominator)
)
summary["combined_ratio"] = summary["loss_ratio"] + summary["expense_ratio"]
summary["gross_margin_ratio"] = safe_divide(
summary["gross_margin"], _denom(loss_ratio_denominator)
)
summary["gain_ratio"] = safe_divide(summary["gain"], _denom(gain_denominator))
ordered = (
groups
+ [c for c in amount_cols if c in summary.columns]
+ exposures
+ [
"total_revenue",
"total_loss",
"total_expense",
"gross_margin",
"gain",
"loss_ratio",
"expense_ratio",
"combined_ratio",
"gross_margin_ratio",
"gain_ratio",
]
)
if exposure_col is not None:
for amount, base in (
("total_revenue", "revenue"),
("total_loss", "loss"),
("total_expense", "expense"),
("gross_margin", "gross_margin"),
("gain", "gain"),
):
name = per_exposure_name(base, exposure_col)
summary[name] = per_exposure(summary[amount], summary[exposure_col])
ordered.append(name)
summary = summary[list(dict.fromkeys(ordered))]
return apply_profile_labels(
summary.rename(columns=_ratio_rename(profile, None)), labels=labels
)