Source code for actuarialpy.lifecycle

"""Policy/membership lifecycle primitives.

These derive *status* and *tenure* from effective and termination dates rather
than requiring a precomputed status label, and clip exposure to the window an
entity was actually in force during a period (the general "earned exposure"
idea). They are line-of-business agnostic: an entity may be a group, a policy,
a member, or a contract; dates may be policy effective/expiration or membership
enroll/disenroll.

Scope note: this module *derives the distinction and provides the levers*
(status, tenure, in-force windowing, earned exposure). It deliberately does not
encode differential treatment of cohorts (e.g. excluding first-year business
from a renewal blend, or weighting run-out). Those are pricing-methodology
choices that belong to the caller.
"""

from __future__ import annotations

import numpy as np
import pandas as pd

from actuarialpy.columns import validate_columns

STATUS_ACTIVE = "active"
STATUS_FIRST_YEAR = "first_year"
STATUS_TERMED = "termed"


def _to_dt(values) -> pd.Series:
    return pd.to_datetime(values)


def _months_between_series(start: pd.Series, end: pd.Series) -> pd.Series:
    """Whole month boundaries between two datetime Series (end - start), vectorized."""
    return (end.dt.year - start.dt.year) * 12 + (end.dt.month - start.dt.month)



[docs]
def add_tenure(
    df: pd.DataFrame,
    effective_col: str,
    as_of,
    *,
    tenure_col: str = "tenure_months",
    one_based: bool = False,
    copy: bool = True,
) -> pd.DataFrame:
    """Add tenure in whole months from each entity's effective date to ``as_of``.

    ``as_of`` is a single reference date (e.g. the experience as-of date). With
    ``one_based=True`` an entity effective in the as-of month has tenure 1 rather
    than 0, matching "months of experience" conventions.
    """
    validate_columns(df, [effective_col])
    result = df.copy() if copy else df
    eff = _to_dt(result[effective_col])
    as_of_ts = pd.to_datetime(as_of)
    tenure = (as_of_ts.year - eff.dt.year) * 12 + (as_of_ts.month - eff.dt.month)
    result[tenure_col] = tenure + 1 if one_based else tenure
    return result




[docs]
def derive_status(
    df: pd.DataFrame,
    *,
    effective_col: str,
    as_of,
    termination_col: str | None = None,
    first_year_months: int = 12,
    status_col: str = "status",
    labels: dict[str, str] | None = None,
    copy: bool = True,
) -> pd.DataFrame:
    """Derive an active / first-year / termed status as of a reference date.

    Classification (in precedence order):

    - **termed**: a termination date is present and on/before ``as_of``.
    - **first_year**: not termed and tenure (``as_of`` minus effective) is less
      than ``first_year_months``. The window is a parameter because "first year"
      means the first 12 months in some shops and the first policy year in
      others.
    - **active**: in force beyond the first-year window.

    ``labels`` optionally remaps the three canonical values, e.g.
    ``{"first_year": "First Year Account", "termed": "Term"}``.
    """
    cols = [effective_col] + ([termination_col] if termination_col else [])
    validate_columns(df, cols)
    result = df.copy() if copy else df

    eff = _to_dt(result[effective_col])
    as_of_ts = pd.to_datetime(as_of)
    tenure = (as_of_ts.year - eff.dt.year) * 12 + (as_of_ts.month - eff.dt.month)

    if termination_col:
        term = _to_dt(result[termination_col])
        termed = term.notna() & (term <= as_of_ts)
    else:
        termed = pd.Series(False, index=result.index)

    first_year = (~termed) & (tenure < first_year_months)

    status_values = np.where(termed, STATUS_TERMED, np.where(first_year, STATUS_FIRST_YEAR, STATUS_ACTIVE))
    status = pd.Series(status_values, index=result.index)
    if labels:
        status = status.map(lambda s: labels.get(s, s))
    result[status_col] = status
    return result




[docs]
def is_in_force(
    df: pd.DataFrame,
    *,
    effective_col: str,
    period_start,
    period_end,
    termination_col: str | None = None,
) -> pd.Series:
    """Boolean Series: in force at any point during ``[period_start, period_end]``.

    In force when effective on/before ``period_end`` and the entity had not
    terminated before ``period_start`` (a missing termination date means still
    in force).
    """
    cols = [effective_col] + ([termination_col] if termination_col else [])
    validate_columns(df, cols)
    eff = _to_dt(df[effective_col])
    start = pd.to_datetime(period_start)
    end = pd.to_datetime(period_end)
    in_force = eff <= end
    if termination_col:
        term = _to_dt(df[termination_col])
        in_force = in_force & (term.isna() | (term >= start))
    return in_force




[docs]
def add_months_in_force(
    df: pd.DataFrame,
    *,
    effective_col: str,
    period_start,
    period_end,
    termination_col: str | None = None,
    out_col: str = "months_in_force",
    copy: bool = True,
) -> pd.DataFrame:
    """Add whole months of overlap between each entity's in-force window and a period.

    The in-force window is ``[effective, termination]`` (a missing termination
    means the period end). The result is clipped to ``[period_start, period_end]``
    and floored at 0. Month counting is inclusive of both endpoint months, so a
    full coverage of an N-month period returns N.
    """
    cols = [effective_col] + ([termination_col] if termination_col else [])
    validate_columns(df, cols)
    result = df.copy() if copy else df

    start = pd.to_datetime(period_start)
    end = pd.to_datetime(period_end)

    eff = _to_dt(result[effective_col])
    if termination_col:
        term = _to_dt(result[termination_col]).fillna(end)
    else:
        term = pd.Series(end, index=result.index)

    eff_clipped = eff.clip(lower=start)
    term_clipped = term.clip(upper=end)
    months = _months_between_series(eff_clipped, term_clipped) + 1
    result[out_col] = months.clip(lower=0)
    return result




[docs]
def earned_exposure(
    df: pd.DataFrame,
    exposure_col: str,
    *,
    effective_col: str,
    period_start,
    period_end,
    termination_col: str | None = None,
    period_months: int | None = None,
    out_col: str | None = None,
    copy: bool = True,
) -> pd.DataFrame:
    """Prorate a full-period exposure by the fraction of the period in force.

    ``earned = exposure * months_in_force / period_months``. Use this when each
    row carries a full-period exposure (e.g. annualized) that must be reduced for
    mid-period entry or termination. If your data is already monthly, filtering
    to in-force months with :func:`is_in_force` is usually simpler.
    """
    validate_columns(df, [exposure_col])
    result = add_months_in_force(
        df,
        effective_col=effective_col,
        termination_col=termination_col,
        period_start=period_start,
        period_end=period_end,
        out_col="_months_in_force_tmp",
        copy=copy,
    )
    if period_months is None:
        start = pd.to_datetime(period_start)
        end = pd.to_datetime(period_end)
        period_months = (end.year - start.year) * 12 + (end.month - start.month) + 1
    name = out_col or f"earned_{exposure_col}"
    fraction = result["_months_in_force_tmp"] / period_months
    result[name] = result[exposure_col] * fraction
    return result.drop(columns="_months_in_force_tmp")