Source code for actuarialpy.underwriting

r"""Underwriting income statement: the two-tier margin view.

Underwriting results are reported in two tiers, generically across lines of
business. **Gross margin** is revenue less loss (claim / benefit) expense and
excludes operating expense -- which is also why operating expense never
enters a loss ratio. **Gain / (loss)** is gross margin less operating
expense: the underwriting result.

.. math::

    \text{total revenue} &= \textstyle\sum \text{revenue components
        (premium, refunds, recasts, ...)} \\
    \text{total loss}    &= \textstyle\sum \text{loss components
        (claims by category, benefits, ...)} \\
    \text{gross margin}  &= \text{total revenue} - \text{total loss} \\
    \text{gain / (loss)} &= \text{gross margin} - \text{total expense}

The three ratios mirror :func:`actuarialpy.loss_ratio`,
:func:`actuarialpy.expense_ratio`, and :func:`actuarialpy.combined_ratio`.
Component labels and ratio names are the caller's vocabulary: the library
only sums the components, and domain naming (a health shop's ``mlr``, a life
shop's ``benefit_ratio``) comes from the ``profile`` / ``labels`` options on
the output views -- never from the calculation itself.

Ratio conventions differ across shops -- and often across metrics on the same
exhibit -- so denominators here are **explicit parameters**, never
assumptions:

* ``loss_ratio``: total loss / *loss-ratio denominator*. Default
  ``"total_revenue"`` (revenue net of refunds and other offsets).
* ``expense_ratio``: total expense / *expense-ratio denominator*. Default
  ``"premium"``: the gross premium component named by ``premium_label``,
  before refunds.
* ``gain ratio``: gain / *gain denominator*. Default ``"total_revenue"``.

With mixed denominators the identity ``gain ratio = 1 - combined ratio``
holds only approximately; it is exact when every denominator is the same
series. :meth:`UnderwritingSummary.reconciliation` reports the difference so
the convention drift is visible instead of silent.

These are management / pricing metrics. Regulated ratio calculations (for
example, a rebate loss ratio prescribed by statute) have their own numerator
and denominator adjustments and are out of scope for this module.
"""

from __future__ import annotations

from collections.abc import Iterable, Mapping
from dataclasses import dataclass, field
from typing import Any

import pandas as pd

from actuarialpy.columns import as_list, per_exposure_name, sum_columns, validate_columns
from actuarialpy.metrics import per_exposure, safe_divide
from actuarialpy.profiles import apply_profile_labels, get_profile_defaults

_DENOMINATORS = ("total_revenue", "premium")

def _validate_denominator(value: str, name: str) -> str:
    if value not in _DENOMINATORS:
        raise ValueError(
            f"{name} must be one of {_DENOMINATORS}, got {value!r}"
        )
    return value


def _ratio_rename(profile: str | None, labels: Mapping[str, str] | None) -> dict[str, str]:
    """Output renames: the profile's ratio name for ``loss_ratio``, then labels."""
    rename: dict[str, str] = {}
    ratio_col = get_profile_defaults(profile).get("ratio_col")
    if ratio_col is not None and ratio_col != "loss_ratio":
        rename["loss_ratio"] = ratio_col
    rename.update(dict(labels or {}))
    return rename



[docs]
@dataclass
class UnderwritingSummary:
    """Two-tier underwriting income statement for a single entity or period.

    Parameters
    ----------
    revenue : Mapping[str, float]
        Labeled revenue components (e.g. ``{"premium": ..., "refund": ...}``).
        Offsets such as refunds should be signed (negative). The library
        never interprets the labels; it only sums them.
    losses : Mapping[str, float]
        Labeled loss components -- claim or benefit expense by whatever
        categories the caller uses.
    expenses : Mapping[str, float] | float
        Operating expense, itemized or as a single amount. Default 0.
    exposure : float, optional
        Exposure units (member months, policy months, earned exposures, ...)
        for per-exposure figures. Required only when a ``*_per_exposure``
        property is accessed.
    premium_label : str
        Which revenue component is the gross premium, used when a
        denominator is ``"premium"``. Default ``"premium"``.
    loss_ratio_denominator, expense_ratio_denominator, gain_denominator : str
        ``"total_revenue"`` or ``"premium"``. Defaults follow the common
        exhibit convention: loss and gain ratios over total revenue, expense
        ratio over gross premium.

    Examples
    --------
    >>> uw = UnderwritingSummary(
    ...     revenue={"premium": 1_200_000.0, "refund": -4_000.0},
    ...     losses={"claims": 1_090_000.0},
    ...     expenses=110_000.0,
    ...     exposure=3_000.0,
    ... )
    >>> round(uw.gross_margin, 0)
    106000.0
    >>> round(uw.gain, 0)
    -4000.0
    """

    revenue: Mapping[str, float]
    losses: Mapping[str, float]
    expenses: Mapping[str, float] | float = 0.0
    exposure: float | None = None
    premium_label: str = "premium"
    loss_ratio_denominator: str = "total_revenue"
    expense_ratio_denominator: str = "premium"
    gain_denominator: str = "total_revenue"
    _expense_items: Mapping[str, float] = field(init=False, repr=False)

    def __post_init__(self) -> None:
        if not self.revenue:
            raise ValueError("revenue must contain at least one component")
        if not self.losses:
            raise ValueError("losses must contain at least one component")
        if isinstance(self.expenses, Mapping):
            self._expense_items = dict(self.expenses)
        else:
            self._expense_items = {"expense": float(self.expenses)}
        for name in (
            "loss_ratio_denominator",
            "expense_ratio_denominator",
            "gain_denominator",
        ):
            _validate_denominator(getattr(self, name), name)
        uses_premium = "premium" in (
            self.loss_ratio_denominator,
            self.expense_ratio_denominator,
            self.gain_denominator,
        )
        if uses_premium and self.premium_label not in self.revenue:
            raise ValueError(
                f"premium_label {self.premium_label!r} is not a revenue "
                f"component; available: {sorted(self.revenue)}"
            )
        if self.exposure is not None and not self.exposure > 0:
            raise ValueError(
                f"exposure must be positive when provided, got {self.exposure!r}"
            )


[docs]
    @classmethod
    def from_per_exposure(
        cls,
        *,
        revenue_per_exposure: Mapping[str, float],
        loss_per_exposure: Mapping[str, float],
        expense_per_exposure: Mapping[str, float] | float = 0.0,
        exposure: float,
        **kwargs: Any,
    ) -> "UnderwritingSummary":
        """Build a summary from per-exposure components and total exposure.

        Forecast exhibits are usually stated per exposure unit (PMPM in a
        health shop, per policy month in life); this converts each component
        to amounts by ``exposure`` so totals, per-exposure figures, and
        ratios all come from one set of inputs.
        """
        if not exposure > 0:
            raise ValueError(f"exposure must be positive, got {exposure!r}")
        units = float(exposure)
        if isinstance(expense_per_exposure, Mapping):
            expenses: Mapping[str, float] | float = {
                k: v * units for k, v in expense_per_exposure.items()
            }
        else:
            expenses = float(expense_per_exposure) * units
        return cls(
            revenue={k: v * units for k, v in revenue_per_exposure.items()},
            losses={k: v * units for k, v in loss_per_exposure.items()},
            expenses=expenses,
            exposure=units,
            **kwargs,
        )


    # ----- totals ----- #
    @property
    def total_revenue(self) -> float:
        return float(sum(self.revenue.values()))

    @property
    def total_loss(self) -> float:
        return float(sum(self.losses.values()))

    @property
    def total_expense(self) -> float:
        return float(sum(self._expense_items.values()))

    @property
    def gross_margin(self) -> float:
        """Tier one: total revenue less loss expense (operating expense excluded)."""
        return self.total_revenue - self.total_loss

    @property
    def gain(self) -> float:
        """Tier two: gross margin less operating expense."""
        return self.gross_margin - self.total_expense

    # ----- ratios (explicit denominators) ----- #
    def _denominator(self, which: str) -> float:
        if which == "total_revenue":
            return self.total_revenue
        return float(self.revenue[self.premium_label])

    @property
    def loss_ratio(self) -> float:
        """Loss expense over the ``loss_ratio_denominator``."""
        return float(
            safe_divide(self.total_loss, self._denominator(self.loss_ratio_denominator))
        )

    @property
    def expense_ratio(self) -> float:
        """Operating expense over the ``expense_ratio_denominator``."""
        return float(
            safe_divide(
                self.total_expense, self._denominator(self.expense_ratio_denominator)
            )
        )

    @property
    def combined_ratio(self) -> float:
        """Loss ratio plus expense ratio, each on its own denominator."""
        return self.loss_ratio + self.expense_ratio

    @property
    def gross_margin_ratio(self) -> float:
        """Gross margin over the ``loss_ratio_denominator`` (its complement)."""
        return float(
            safe_divide(
                self.gross_margin, self._denominator(self.loss_ratio_denominator)
            )
        )

    @property
    def gain_ratio(self) -> float:
        """Gain / (loss) over the ``gain_denominator``."""
        return float(
            safe_divide(self.gain, self._denominator(self.gain_denominator))
        )


[docs]
    def reconciliation(self) -> float:
        """``gain_ratio - (1 - combined_ratio)``: the mixed-denominator gap.

        Zero when every denominator is the same series; otherwise the size
        of the drift introduced by quoting the loss, expense, and gain
        ratios over different bases. Useful as an exhibit footnote or a
        data-quality check.
        """
        return self.gain_ratio - (1.0 - self.combined_ratio)


    # ----- per exposure ----- #
    def _require_exposure(self) -> float:
        if self.exposure is None:
            raise ValueError(
                "exposure is required for per-exposure figures; pass it to "
                "the constructor or use from_per_exposure(...)"
            )
        return float(self.exposure)

    @property
    def revenue_per_exposure(self) -> float:
        return self.total_revenue / self._require_exposure()

    @property
    def loss_per_exposure(self) -> float:
        return self.total_loss / self._require_exposure()

    @property
    def expense_per_exposure(self) -> float:
        return self.total_expense / self._require_exposure()

    @property
    def gross_margin_per_exposure(self) -> float:
        return self.gross_margin / self._require_exposure()

    @property
    def gain_per_exposure(self) -> float:
        return self.gain / self._require_exposure()

    # ----- views ----- #

[docs]
    def to_frame(
        self,
        *,
        profile: str | None = None,
        labels: Mapping[str, str] | None = None,
    ) -> pd.DataFrame:
        """One tidy row of every total and ratio (per-exposure when given).

        ``profile`` renames only the loss-ratio column to the domain's ratio
        name (``"health"`` -> ``mlr``, ``"life"`` -> ``benefit_ratio``);
        ``labels`` renames any output column. Calculations are unaffected.
        """
        row: dict[str, float] = {
            "total_revenue": self.total_revenue,
            "total_loss": self.total_loss,
            "total_expense": self.total_expense,
            "gross_margin": self.gross_margin,
            "gain": self.gain,
            "loss_ratio": self.loss_ratio,
            "expense_ratio": self.expense_ratio,
            "combined_ratio": self.combined_ratio,
            "gross_margin_ratio": self.gross_margin_ratio,
            "gain_ratio": self.gain_ratio,
        }
        if self.exposure is not None:
            row["exposure"] = float(self.exposure)
            row["revenue_per_exposure"] = self.revenue_per_exposure
            row["loss_per_exposure"] = self.loss_per_exposure
            row["expense_per_exposure"] = self.expense_per_exposure
            row["gross_margin_per_exposure"] = self.gross_margin_per_exposure
            row["gain_per_exposure"] = self.gain_per_exposure
        frame = pd.DataFrame([row])
        return frame.rename(columns=_ratio_rename(profile, labels))



[docs]
    def statement(
        self,
        *,
        profile: str | None = None,
        labels: Mapping[str, str] | None = None,
    ) -> pd.Series:
        """Exhibit-shaped Series: components, subtotals, tiers, then ratios."""
        lines: dict[str, float] = {}
        for label, value in self.revenue.items():
            lines[label] = float(value)
        lines["total_revenue"] = self.total_revenue
        for label, value in self.losses.items():
            lines[label] = float(value)
        lines["total_loss"] = self.total_loss
        lines["loss_ratio"] = self.loss_ratio
        lines["gross_margin"] = self.gross_margin
        for label, value in self._expense_items.items():
            lines[label] = float(value)
        lines["total_expense"] = self.total_expense
        lines["expense_ratio"] = self.expense_ratio
        lines["gain"] = self.gain
        lines["gain_ratio"] = self.gain_ratio
        series = pd.Series(lines, name="statement")
        return series.rename(index=_ratio_rename(profile, labels))





[docs]
def underwriting_summary(
    df: pd.DataFrame,
    *,
    groupby: str | Iterable[str] | None = None,
    revenue_cols: str | Iterable[str],
    loss_cols: str | Iterable[str],
    expense_cols: str | Iterable[str],
    exposure_col: str | None = None,
    premium_col: str | None = None,
    loss_ratio_denominator: str = "total_revenue",
    expense_ratio_denominator: str = "premium",
    gain_denominator: str = "total_revenue",
    profile: str | None = None,
    labels: dict[str, str] | None = None,
) -> pd.DataFrame:
    """Grouped two-tier underwriting summary from a tidy table.

    Component columns are **summed first** and every ratio is computed on the
    aggregated totals (ratio of sums, never an average of row-level ratios) --
    the same contract as :func:`actuarialpy.summarize_experience`.

    Parameters
    ----------
    df : pd.DataFrame
        One row per entity / period at whatever grain is being rolled up.
    groupby : str | Iterable[str], optional
        Grouping columns; omit for a single all-rows summary.
    revenue_cols, loss_cols, expense_cols : str | Iterable[str]
        Component columns for each tier. Revenue offsets (refunds) should be
        signed.
    exposure_col : str, optional
        Exposure column; adds ``{amount}_per_{exposure_col}`` output columns.
        Domain-style names (a health shop's ``_pmpm``) are applied via
        ``labels``, never inferred from the column name.
    premium_col : str, optional
        Gross premium column, required when any denominator is
        ``"premium"``.
    loss_ratio_denominator, expense_ratio_denominator, gain_denominator : str
        ``"total_revenue"`` or ``"premium"``; see the module docstring for
        the convention discussion.
    profile : str, optional
        Renames only the loss-ratio column to the domain's ratio name
        (``"health"`` -> ``mlr``, ``"life"`` -> ``benefit_ratio``).
    labels : dict, optional
        Explicit output column renames, applied after ``profile``.

    Returns
    -------
    pd.DataFrame
        Group keys, component sums, ``total_revenue``, ``total_loss``,
        ``total_expense``, ``gross_margin``, ``gain``, the three ratios plus
        ``gross_margin_ratio`` and ``gain_ratio``, and per-exposure columns
        when ``exposure_col`` is given.
    """
    groups = as_list(groupby)
    revenues = as_list(revenue_cols)
    losses = as_list(loss_cols)
    expenses = as_list(expense_cols)
    for name, value in (
        ("loss_ratio_denominator", loss_ratio_denominator),
        ("expense_ratio_denominator", expense_ratio_denominator),
        ("gain_denominator", gain_denominator),
    ):
        _validate_denominator(value, name)
    uses_premium = "premium" in (
        loss_ratio_denominator,
        expense_ratio_denominator,
        gain_denominator,
    )
    if uses_premium and premium_col is None:
        raise ValueError(
            'premium_col is required when any denominator is "premium"'
        )

    amount_cols = list(dict.fromkeys(revenues + losses + expenses))
    if premium_col is not None:
        validate_columns(df, [premium_col])
        if premium_col not in amount_cols:
            amount_cols.append(premium_col)
    exposures = [exposure_col] if exposure_col is not None else []
    validate_columns(df, groups + amount_cols + exposures)

    if groups:
        summary = (
            df[groups + amount_cols + exposures]
            .groupby(groups, dropna=False, as_index=False)
            .sum(numeric_only=True)
        )
    else:
        summary = pd.DataFrame(
            {col: [df[col].sum()] for col in amount_cols + exposures}
        )

    summary["total_revenue"] = sum_columns(summary, revenues)
    summary["total_loss"] = sum_columns(summary, losses)
    summary["total_expense"] = sum_columns(summary, expenses)
    summary["gross_margin"] = summary["total_revenue"] - summary["total_loss"]
    summary["gain"] = summary["gross_margin"] - summary["total_expense"]

    def _denom(which: str) -> pd.Series:
        if which == "total_revenue":
            return summary["total_revenue"]
        return summary[premium_col]

    summary["loss_ratio"] = safe_divide(
        summary["total_loss"], _denom(loss_ratio_denominator)
    )
    summary["expense_ratio"] = safe_divide(
        summary["total_expense"], _denom(expense_ratio_denominator)
    )
    summary["combined_ratio"] = summary["loss_ratio"] + summary["expense_ratio"]
    summary["gross_margin_ratio"] = safe_divide(
        summary["gross_margin"], _denom(loss_ratio_denominator)
    )
    summary["gain_ratio"] = safe_divide(summary["gain"], _denom(gain_denominator))

    ordered = (
        groups
        + [c for c in amount_cols if c in summary.columns]
        + exposures
        + [
            "total_revenue",
            "total_loss",
            "total_expense",
            "gross_margin",
            "gain",
            "loss_ratio",
            "expense_ratio",
            "combined_ratio",
            "gross_margin_ratio",
            "gain_ratio",
        ]
    )
    if exposure_col is not None:
        for amount, base in (
            ("total_revenue", "revenue"),
            ("total_loss", "loss"),
            ("total_expense", "expense"),
            ("gross_margin", "gross_margin"),
            ("gain", "gain"),
        ):
            name = per_exposure_name(base, exposure_col)
            summary[name] = per_exposure(summary[amount], summary[exposure_col])
            ordered.append(name)
    summary = summary[list(dict.fromkeys(ordered))]
    return apply_profile_labels(
        summary.rename(columns=_ratio_rename(profile, None)), labels=labels
    )