From f2972286fe21e9f1c132e568ccc9f8d6b1c8288b Mon Sep 17 00:00:00 2001 From: Ethan Kang Date: Sat, 16 May 2026 01:20:24 -0700 Subject: [PATCH 1/2] docs: add workflow doctest examples --- chainladder/workflow/gridsearch.py | 75 +++++++++++++++++++++++++++++- chainladder/workflow/voting.py | 35 ++++++++++++++ 2 files changed, 109 insertions(+), 1 deletion(-) diff --git a/chainladder/workflow/gridsearch.py b/chainladder/workflow/gridsearch.py index 227a84fb..28d74cda 100644 --- a/chainladder/workflow/gridsearch.py +++ b/chainladder/workflow/gridsearch.py @@ -54,6 +54,42 @@ class GridSearch(BaseEstimator): results_: DataFrame A DataFrame with each param_grid key as a column and the ``scoring`` score as the last column + + Examples + -------- + Each row of ``results_`` is one ``ParameterGrid`` draw; changing + ``param_grid`` changes how many fits run and the reported scores. + + .. testsetup:: + + import chainladder as cl + import numpy as np + + .. testcode:: + + clrd = cl.load_sample("clrd") + medmal = clrd.groupby("LOB").sum().loc["medmal"]["CumPaidLoss"] + prem = clrd.groupby("LOB").sum().loc["medmal"]["EarnedPremDIR"].latest_diagonal + pipe = cl.Pipeline( + [("dev", cl.Development()), ("benk", cl.Benktander())] + ) + param_grid = {"benk__n_iters": [1, 4]} + scoring = { + "IBNR": lambda m: float(np.nansum(m.named_steps.benk.ibnr_.values)) + } + grid = cl.GridSearch( + pipe, param_grid, scoring=scoring, n_jobs=1 + ).fit(medmal, benk__sample_weight=prem) + print(len(grid.results_)) + print(int(round(grid.results_["IBNR"].iloc[0], 0))) + print(int(round(grid.results_["IBNR"].iloc[1], 0))) + + .. testoutput:: + + 2 + 1624377 + 1442665 + """ def __init__(self, estimator, param_grid, scoring, verbose=0, @@ -139,7 +175,44 @@ class Pipeline(PipelineSL, EstimatorIO): ---------- named_steps: bunch object, a dictionary with attribute access Read-only attribute to access any step parameter by user given name. - Keys are step names and values are steps parameters.""" + Keys are step names and values are steps parameters. + + Examples + -------- + Hyper-parameters are set with the ``step__param`` naming convention from + scikit-learn. Here ``Development`` averaging changes aggregate IBNR from + the same ``Chainladder`` final step. + + .. testsetup:: + + import chainladder as cl + import numpy as np + + .. testcode:: + + tri = cl.load_sample("raa") + pipe = cl.Pipeline( + [ + ("dev", cl.Development(average="simple")), + ("cl", cl.Chainladder()), + ] + ) + ib_simple = int( + round(float(np.nansum(pipe.fit_predict(tri).ibnr_.values)), 0) + ) + pipe.set_params(dev__average="volume") + ib_volume = int( + round(float(np.nansum(pipe.fit_predict(tri).ibnr_.values)), 0) + ) + print(ib_simple) + print(ib_volume) + + .. testoutput:: + + 93643 + 52135 + + """ def fit(self, X, y=None, sample_weight=None, **fit_params): if sample_weight: diff --git a/chainladder/workflow/voting.py b/chainladder/workflow/voting.py index b1bd8723..0035be99 100644 --- a/chainladder/workflow/voting.py +++ b/chainladder/workflow/voting.py @@ -239,6 +239,41 @@ class VotingChainladder(_BaseChainladderVoting, MethodBase): 1988 23106.943030 1989 20004.502125 1990 21605.832631 + + ``weights`` and ``default_weighting`` change how sub-model ultimates are + blended; skewing weights toward ``Chainladder`` pulls the ensemble away + from ``BornhuetterFerguson`` on late accident years. + + .. testcode:: + + import numpy as np + + raa = cl.load_sample("raa") + cl_ult = cl.Chainladder().fit(raa).ultimate_ + apriori = cl_ult * 0 + (float(cl_ult.sum()) / 10) + estimators = [ + ("bcl", cl.Chainladder()), + ("bf", cl.BornhuetterFerguson(apriori=1.0)), + ] + even = cl.VotingChainladder( + estimators=estimators, + weights=None, + default_weighting=(0.5, 0.5), + ).fit(raa, sample_weight=apriori) + w = np.ones((1, 1, raa.shape[2], 2)) + w[..., 0] = 0.9 + w[..., 1] = 0.1 + skewed = cl.VotingChainladder(estimators=estimators, weights=w).fit( + raa, sample_weight=apriori + ) + print(round(float(even.ultimate_.values[0, 0, -1, 0]), 2)) + print(round(float(skewed.ultimate_.values[0, 0, -1, 0]), 2)) + + .. testoutput:: + + 19694.23 + 18660.8 + """ @_deprecate_positional_args From b3dd40f1705cd68dfc887d47852269e1c59f6994 Mon Sep 17 00:00:00 2001 From: Ethan Kang Date: Sat, 16 May 2026 15:52:05 -0700 Subject: [PATCH 2/2] docs: address workflow review feedback --- chainladder/workflow/gridsearch.py | 41 +++++++++++++----------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/chainladder/workflow/gridsearch.py b/chainladder/workflow/gridsearch.py index 28d74cda..a602abf0 100644 --- a/chainladder/workflow/gridsearch.py +++ b/chainladder/workflow/gridsearch.py @@ -57,38 +57,36 @@ class GridSearch(BaseEstimator): Examples -------- - Each row of ``results_`` is one ``ParameterGrid`` draw; changing - ``param_grid`` changes how many fits run and the reported scores. + Use ``GridSearch`` when you want to compare modeling choices with the + same scoring rule. Here the grid compares simple and volume averages by + reading the fitted development ``sigma_`` from each candidate pipeline. .. testsetup:: import chainladder as cl - import numpy as np - .. testcode:: clrd = cl.load_sample("clrd") medmal = clrd.groupby("LOB").sum().loc["medmal"]["CumPaidLoss"] - prem = clrd.groupby("LOB").sum().loc["medmal"]["EarnedPremDIR"].latest_diagonal pipe = cl.Pipeline( - [("dev", cl.Development()), ("benk", cl.Benktander())] + [("dev", cl.Development()), ("cl", cl.Chainladder())] ) - param_grid = {"benk__n_iters": [1, 4]} + param_grid = {"dev__average": ["simple", "volume"]} scoring = { - "IBNR": lambda m: float(np.nansum(m.named_steps.benk.ibnr_.values)) + "sigma": lambda m: float(m.named_steps.dev.sigma_.values.sum()) } grid = cl.GridSearch( pipe, param_grid, scoring=scoring, n_jobs=1 - ).fit(medmal, benk__sample_weight=prem) + ).fit(medmal) print(len(grid.results_)) - print(int(round(grid.results_["IBNR"].iloc[0], 0))) - print(int(round(grid.results_["IBNR"].iloc[1], 0))) + print(round(grid.results_["sigma"].iloc[0], 3)) + print(round(grid.results_["sigma"].iloc[1], 3)) .. testoutput:: 2 - 1624377 - 1442665 + 1.422 + 206.183 """ @@ -179,15 +177,14 @@ class Pipeline(PipelineSL, EstimatorIO): Examples -------- - Hyper-parameters are set with the ``step__param`` naming convention from - scikit-learn. Here ``Development`` averaging changes aggregate IBNR from - the same ``Chainladder`` final step. + Use ``Pipeline`` when the same triangle should pass through several + estimators as one workflow. The ``step__param`` naming convention lets you + change one step, here ``Development.average``, without rebuilding the + whole pipeline. .. testsetup:: import chainladder as cl - import numpy as np - .. testcode:: tri = cl.load_sample("raa") @@ -197,13 +194,9 @@ class Pipeline(PipelineSL, EstimatorIO): ("cl", cl.Chainladder()), ] ) - ib_simple = int( - round(float(np.nansum(pipe.fit_predict(tri).ibnr_.values)), 0) - ) + ib_simple = int(round(float(pipe.fit_predict(tri).ibnr_.sum()), 0)) pipe.set_params(dev__average="volume") - ib_volume = int( - round(float(np.nansum(pipe.fit_predict(tri).ibnr_.values)), 0) - ) + ib_volume = int(round(float(pipe.fit_predict(tri).ibnr_.sum()), 0)) print(ib_simple) print(ib_volume)