Source code for actuarialpy.components

"""Component/category summaries and driver analysis."""

from __future__ import annotations

from collections.abc import Iterable

import pandas as pd

from actuarialpy.columns import as_list, per_exposure_name, validate_columns
from actuarialpy.metrics import per_exposure, safe_divide
from actuarialpy.trend import _comparison_masks


[docs] def summarize_components( df: pd.DataFrame, *, groupby: str | Iterable[str] | None = None, component_cols: str | Iterable[str], exposure_col: str | None = None, total_col: str = "total_expense", include_shares: bool = True, ) -> pd.DataFrame: """Summarize component/category amounts, per-exposure values, and shares.""" groups = as_list(groupby) components = as_list(component_cols) required = groups + components + ([exposure_col] if exposure_col else []) validate_columns(df, required) amount_cols = components + ([exposure_col] if exposure_col else []) if groups: summary = df[groups + amount_cols].groupby(groups, dropna=False, as_index=False).sum(numeric_only=True) else: summary = pd.DataFrame({col: [df[col].sum()] for col in amount_cols}) summary[total_col] = summary[components].sum(axis=1) if exposure_col: for component in components: summary[per_exposure_name(component, exposure_col)] = per_exposure(summary[component], summary[exposure_col]) summary[per_exposure_name(total_col, exposure_col)] = per_exposure(summary[total_col], summary[exposure_col]) if include_shares: for component in components: summary[f"{component}_share"] = safe_divide(summary[component], summary[total_col]) return summary
[docs] def component_driver_analysis( df: pd.DataFrame, *, period_col: str | None = None, prior_period=None, current_period=None, date_col: str | None = None, prior_start=None, prior_end=None, current_start=None, current_end=None, prior_filter=None, current_filter=None, component_cols: str | Iterable[str], exposure_col: str | None = None, groupby: str | Iterable[str] | None = None, ) -> pd.DataFrame: """Explain component drivers of change between two periods. The primary comparison is based on component totals, or component amount per exposure when ``exposure_col`` is supplied. The API matches ``trend_summary`` and supports period-column, date-range, or explicit-filter comparisons. """ groups = as_list(groupby) components = as_list(component_cols) required = groups + components + ([exposure_col] if exposure_col else []) if period_col is not None: required.append(period_col) if date_col is not None: required.append(date_col) validate_columns(df, required) prior_filter, current_filter, mode = _comparison_masks( df, period_col=period_col, prior_period=prior_period, current_period=current_period, date_col=date_col, prior_start=prior_start, prior_end=prior_end, current_start=current_start, current_end=current_end, prior_filter=prior_filter, current_filter=current_filter, ) prior_df = df.loc[prior_filter] current_df = df.loc[current_filter] prior_sum = summarize_components( prior_df, groupby=groups, component_cols=components, exposure_col=exposure_col, include_shares=False, ) current_sum = summarize_components( current_df, groupby=groups, component_cols=components, exposure_col=exposure_col, include_shares=False, ) if groups: merged = prior_sum.merge(current_sum, on=groups, how="outer", suffixes=("_prior", "_current")) else: merged = pd.concat([prior_sum.add_suffix("_prior"), current_sum.add_suffix("_current")], axis=1) rows = [] for _, row in merged.iterrows(): key_data = {g: row[g] for g in groups} if groups else {} changes = {} total_change = 0 for comp in components: metric = per_exposure_name(comp, exposure_col) if exposure_col else comp prior_val = row.get(f"{metric}_prior", 0) current_val = row.get(f"{metric}_current", 0) prior_val = 0 if pd.isna(prior_val) else prior_val current_val = 0 if pd.isna(current_val) else current_val changes[comp] = current_val - prior_val total_change += changes[comp] for comp in components: metric = per_exposure_name(comp, exposure_col) if exposure_col else comp prior_val = row.get(f"{metric}_prior", 0) current_val = row.get(f"{metric}_current", 0) prior_val = 0 if pd.isna(prior_val) else prior_val current_val = 0 if pd.isna(current_val) else current_val period_data = {} if mode == "period": period_data = {"prior_period": prior_period, "current_period": current_period} elif mode == "date": period_data = { "prior_start": pd.to_datetime(prior_start), "prior_end": pd.to_datetime(prior_end), "current_start": pd.to_datetime(current_start), "current_end": pd.to_datetime(current_end), } rows.append( { **key_data, **period_data, "component": comp, "prior": prior_val, "current": current_val, "change": current_val - prior_val, "trend": safe_divide(current_val, prior_val) - 1, "contribution_to_change": safe_divide(changes[comp], total_change), } ) return pd.DataFrame(rows)
[docs] def component_trend(*args, **kwargs) -> pd.DataFrame: """Alias for ``component_driver_analysis``. The preferred name is ``component_driver_analysis`` because the function explains drivers of total component change, not just component-specific trend. """ return component_driver_analysis(*args, **kwargs)