From 9175ae74d4db73ab28456b1c90ec09aefce6fe8c Mon Sep 17 00:00:00 2001
From: Ethan Kang <ethan.yskang@gmail.com>
Date: Sat, 16 May 2026 01:21:03 -0700
Subject: [PATCH 1/3] docs: add utility doctest examples

---
 chainladder/utils/utility_functions.py | 193 ++++++++++++++++++++++++-
 1 file changed, 187 insertions(+), 6 deletions(-)

diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py
index 06630484..324f6ce3 100644
--- a/chainladder/utils/utility_functions.py
+++ b/chainladder/utils/utility_functions.py
@@ -249,12 +249,6 @@ def load_sample(key: str, *args, **kwargs) -> Triangle:
             ]
             origin: str = "Accident Half-Year"
             development: str = "Calendar Half-Year"
-        if "uspp" in key.lower():
-            columns: list = [
-                "Reported Claims",
-                "Paid Claims",
-                "Earned Premium"
-            ]
 
     df = pd.read_csv(filepath_or_buffer=dataset_path)
 
@@ -271,6 +265,43 @@ def load_sample(key: str, *args, **kwargs) -> Triangle:
 
 
 def read_pickle(path):
+    """Load an object serialized with ``to_pickle`` (``dill`` format).
+
+    Parameters
+    ----------
+    path : str or path-like
+        Path to the pickle file.
+
+    Returns
+    -------
+    object
+        The deserialized triangle or estimator.
+
+    Examples
+    --------
+
+    .. testsetup::
+
+        import tempfile
+        import os
+
+    .. testcode::
+
+        import chainladder as cl
+
+        tri = cl.load_sample("raa")
+        fd, p = tempfile.mkstemp(suffix=".pkl")
+        os.close(fd)
+        tri.to_pickle(p)
+        back = cl.read_pickle(p)
+        os.remove(p)
+        print(back == tri)
+
+    .. testoutput::
+
+        True
+
+    """
     with open(path, "rb") as pkl:
         return dill.load(pkl)
 
@@ -407,6 +438,26 @@ def read_csv(
 
 
 def read_json(json_str, array_backend=None):
+    """Deserialize JSON produced by ``to_json`` (triangle, estimator, or pipeline).
+
+    Examples
+    --------
+
+    .. testsetup::
+
+        import chainladder as cl
+
+    .. testcode::
+
+        dev = cl.Development(average="volume")
+        dev2 = cl.read_json(dev.to_json())
+        print(dev2.get_params()["average"])
+
+    .. testoutput::
+
+        volume
+
+    """
     from chainladder import Triangle
     from chainladder.workflow import Pipeline
 
@@ -602,6 +653,25 @@ def concat(
     Returns
     -------
     Updated triangle
+
+    Examples
+    --------
+
+    .. testsetup::
+
+        import chainladder as cl
+
+    .. testcode::
+
+        clrd = cl.load_sample("clrd").groupby("LOB").sum().iloc[:2]
+        tri = clrd[["CumPaidLoss", "IncurLoss"]]
+        both = cl.concat([tri.iloc[:, 0:1], tri.iloc[:, 1:2]], axis=1)
+        print(both.shape[1])
+
+    .. testoutput::
+
+        2
+
     """
     if type(objs) not in (list, tuple):
         raise TypeError("objects to be concatenated must be in a list or tuple")
@@ -706,10 +776,50 @@ def num_to_nan(arr: ArrayLike) -> ArrayLike:
 
 
 def minimum(x1, x2):
+    """Element-wise minimum of two triangles (delegates to ``Triangle.minimum``).
+
+    Examples
+    --------
+
+    .. testsetup::
+
+        import chainladder as cl
+
+    .. testcode::
+
+        tri = cl.load_sample("raa")
+        lo = cl.minimum(tri, tri * 0.5)
+        print(round(float(lo.values[0, 0, 0, 0]), 4))
+
+    .. testoutput::
+
+        2506.0
+
+    """
     return x1.minimum(x2)
 
 
 def maximum(x1, x2):
+    """Element-wise maximum of two triangles (delegates to ``Triangle.maximum``).
+
+    Examples
+    --------
+
+    .. testsetup::
+
+        import chainladder as cl
+
+    .. testcode::
+
+        tri = cl.load_sample("raa")
+        hi = cl.maximum(tri, tri * 0.5)
+        print(round(float(hi.values[0, 0, 0, 0]), 4))
+
+    .. testoutput::
+
+        5012.0
+
+    """
     return x1.maximum(x2)
 
 
@@ -741,6 +851,77 @@ class PatsyFormula(BaseEstimator, TransformerMixin):
     design_info_:
         The patsy instructions for generating the design_matrix, X.
 
+    Examples
+    --------
+    ``TweedieGLM`` passes ``design_matrix`` through ``PatsyFormula`` when
+    building its internal ``DevelopmentML`` pipeline. Adding ``C(origin)``
+    expands the GLM and changes the fitted ``ldf_``.
+
+    .. testsetup::
+
+        import chainladder as cl
+
+    .. testcode::
+
+        genins = cl.load_sample("genins")
+        by_dev = cl.TweedieGLM(design_matrix="C(development)").fit(genins)
+        by_both = cl.TweedieGLM(
+            design_matrix="C(development) + C(origin)"
+        ).fit(genins)
+        print(len(by_dev.coef_))
+        print(len(by_both.coef_))
+        print(round(float(by_dev.ldf_.values[0, 0, 0, 0]), 6))
+        print(round(float(by_both.ldf_.values[0, 0, 0, 0]), 6))
+
+    .. testoutput::
+
+        10
+        19
+        3.508469
+        3.491031
+
+    The same formula strings are used explicitly as a pipeline step in
+    ``DevelopmentML``.
+
+    .. testcode::
+
+        from sklearn.linear_model import LinearRegression
+        from sklearn.pipeline import Pipeline
+        from chainladder.utils.utility_functions import PatsyFormula
+
+        genins = cl.load_sample("genins")
+        col = genins.columns[0]
+        dev_only = cl.DevelopmentML(
+            Pipeline(
+                [
+                    ("design_matrix", PatsyFormula("C(development)")),
+                    ("model", LinearRegression(fit_intercept=False)),
+                ]
+            ),
+            y_ml=col,
+            fit_incrementals=False,
+        ).fit(genins)
+        with_origin = cl.DevelopmentML(
+            Pipeline(
+                [
+                    (
+                        "design_matrix",
+                        PatsyFormula("C(development) + C(origin)"),
+                    ),
+                    ("model", LinearRegression(fit_intercept=False)),
+                ]
+            ),
+            y_ml=col,
+            fit_incrementals=False,
+        ).fit(genins)
+        print(len(dev_only.estimator_ml.named_steps.model.coef_))
+        print(len(with_origin.estimator_ml.named_steps.model.coef_))
+
+    .. testoutput::
+
+        10
+        19
+
     """
 
     def __init__(self, formula=None):

From b159d362d901cedf6123f425d222a57afebbce3f Mon Sep 17 00:00:00 2001
From: Ethan Kang <ethan.yskang@gmail.com>
Date: Sat, 16 May 2026 15:49:34 -0700
Subject: [PATCH 2/3] docs: address utility review feedback

---
 chainladder/utils/utility_functions.py | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py
index 324f6ce3..e6b0febd 100644
--- a/chainladder/utils/utility_functions.py
+++ b/chainladder/utils/utility_functions.py
@@ -249,6 +249,12 @@ def load_sample(key: str, *args, **kwargs) -> Triangle:
             ]
             origin: str = "Accident Half-Year"
             development: str = "Calendar Half-Year"
+        if "uspp" in key.lower():
+            columns: list = [
+                "Reported Claims",
+                "Paid Claims",
+                "Earned Premium"
+            ]
 
     df = pd.read_csv(filepath_or_buffer=dataset_path)
 
@@ -279,6 +285,8 @@ def read_pickle(path):
 
     Examples
     --------
+    Use ``read_pickle`` when a triangle or estimator was saved with
+    ``to_pickle`` and should be restored as the same chainladder object.
 
     .. testsetup::
 
@@ -442,6 +450,8 @@ def read_json(json_str, array_backend=None):
 
     Examples
     --------
+    Use ``read_json`` to round-trip model configuration through JSON, for
+    example when a fitted workflow needs to be persisted outside Python.
 
     .. testsetup::
 
@@ -656,6 +666,8 @@ def concat(
 
     Examples
     --------
+    Concatenate along the column axis when separate triangles share the same
+    index, origin, and development axes but carry different measures.
 
     .. testsetup::
 
@@ -780,6 +792,7 @@ def minimum(x1, x2):
 
     Examples
     --------
+    Cap a triangle cell-by-cell by comparing it with another triangle of limits.
 
     .. testsetup::
 
@@ -804,6 +817,8 @@ def maximum(x1, x2):
 
     Examples
     --------
+    Floor a triangle cell-by-cell by comparing it with another triangle of
+    minimum acceptable values.
 
     .. testsetup::
 
@@ -853,9 +868,9 @@ class PatsyFormula(BaseEstimator, TransformerMixin):
 
     Examples
     --------
-    ``TweedieGLM`` passes ``design_matrix`` through ``PatsyFormula`` when
-    building its internal ``DevelopmentML`` pipeline. Adding ``C(origin)``
-    expands the GLM and changes the fitted ``ldf_``.
+    Add origin effects when a development-only GLM is too restrictive.
+    ``TweedieGLM`` sends ``design_matrix`` through ``PatsyFormula`` before
+    fitting, so adding ``C(origin)`` expands the model matrix.
 
     .. testsetup::
 
@@ -880,8 +895,8 @@ class PatsyFormula(BaseEstimator, TransformerMixin):
         3.508469
         3.491031
 
-    The same formula strings are used explicitly as a pipeline step in
-    ``DevelopmentML``.
+    Use the same formula strings explicitly in ``DevelopmentML`` when building
+    a custom scikit-learn pipeline.
 
     .. testcode::
 

From cf48166a35fff634d15b92345eacdb44080ad607 Mon Sep 17 00:00:00 2001
From: Ethan Kang <ekang78@gatech.edu>
Date: Mon, 18 May 2026 03:45:42 -0700
Subject: [PATCH 3/3] docs: revise utility examples to use problem-solution
 narrative per @henrydingliu

- read_pickle: show fitted Development estimator round-trip via pickle, verify transform works after restore
- read_json: show full Pipeline serialization round-trip with step names and params
- concat: show paid+incurred column join enabling MunichAdjustment directly
- minimum: compare volume vs simple CL ultimates, pick element-wise lower for low-side scenario
- maximum: same comparison, pick element-wise higher for high-side scenario
- PatsyFormula: clarify when to use custom DevelopmentML pipeline vs TweedieGLM; show ldf_ output instead of coefficient count
---
 chainladder/utils/utility_functions.py | 141 ++++++++++++++++---------
 1 file changed, 90 insertions(+), 51 deletions(-)

diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py
index e6b0febd..afeb462e 100644
--- a/chainladder/utils/utility_functions.py
+++ b/chainladder/utils/utility_functions.py
@@ -285,8 +285,8 @@ def read_pickle(path):
 
     Examples
     --------
-    Use ``read_pickle`` when a triangle or estimator was saved with
-    ``to_pickle`` and should be restored as the same chainladder object.
+    A fitted ``Development`` transformer can be saved to disk and restored
+    later so the same patterns can be applied to new data without re-fitting.
 
     .. testsetup::
 
@@ -298,16 +298,17 @@ def read_pickle(path):
         import chainladder as cl
 
         tri = cl.load_sample("raa")
+        dev = cl.Development(average="volume").fit(tri)
         fd, p = tempfile.mkstemp(suffix=".pkl")
         os.close(fd)
-        tri.to_pickle(p)
-        back = cl.read_pickle(p)
+        dev.to_pickle(p)
+        restored = cl.read_pickle(p)
         os.remove(p)
-        print(back == tri)
+        print(restored.transform(tri).ldf_.values[0, 0, 0, :4].round(4))
 
     .. testoutput::
 
-        True
+        [2.9994 1.6235 1.2709 1.1717]
 
     """
     with open(path, "rb") as pkl:
@@ -450,8 +451,9 @@ def read_json(json_str, array_backend=None):
 
     Examples
     --------
-    Use ``read_json`` to round-trip model configuration through JSON, for
-    example when a fitted workflow needs to be persisted outside Python.
+    When a full reserving workflow needs to be stored as text—in a database,
+    config file, or REST API—``to_json`` serializes the pipeline and
+    ``read_json`` reconstructs it with all step parameters intact.
 
     .. testsetup::
 
@@ -459,12 +461,19 @@ def read_json(json_str, array_backend=None):
 
     .. testcode::
 
-        dev = cl.Development(average="volume")
-        dev2 = cl.read_json(dev.to_json())
-        print(dev2.get_params()["average"])
+        from chainladder.workflow import Pipeline
+
+        pipe = Pipeline([
+            ("dev", cl.Development(average="volume")),
+            ("cl", cl.Chainladder()),
+        ])
+        pipe2 = cl.read_json(pipe.to_json())
+        print([step[0] for step in pipe2.steps])
+        print(pipe2.named_steps["dev"].get_params()["average"])
 
     .. testoutput::
 
+        ['dev', 'cl']
         volume
 
     """
@@ -666,8 +675,10 @@ def concat(
 
     Examples
     --------
-    Concatenate along the column axis when separate triangles share the same
-    index, origin, and development axes but carry different measures.
+    When paid and incurred triangles are constructed separately, ``concat``
+    along ``axis=1`` combines them into one multi-column triangle.
+    ``MunichAdjustment`` requires both columns in the same object, so
+    concatenating them first is the natural setup step.
 
     .. testsetup::
 
@@ -675,14 +686,18 @@ def concat(
 
     .. testcode::
 
-        clrd = cl.load_sample("clrd").groupby("LOB").sum().iloc[:2]
-        tri = clrd[["CumPaidLoss", "IncurLoss"]]
-        both = cl.concat([tri.iloc[:, 0:1], tri.iloc[:, 1:2]], axis=1)
-        print(both.shape[1])
+        clrd = cl.load_sample("clrd").groupby("LOB").sum()
+        wkcomp = clrd.iloc[5:6]
+        paid = wkcomp["CumPaidLoss"]
+        incurred = wkcomp["IncurLoss"]
+        combined = cl.concat([paid, incurred], axis=1)
+        adj = cl.MunichAdjustment(paid_to_incurred=("CumPaidLoss", "IncurLoss"))
+        result = adj.fit_transform(combined)
+        print(result.ldf_["CumPaidLoss"].values[0, 0, 0, :4].round(4))
 
     .. testoutput::
 
-        2
+        [2.2342 1.3548 1.1517 1.0883]
 
     """
     if type(objs) not in (list, tuple):
@@ -788,11 +803,19 @@ def num_to_nan(arr: ArrayLike) -> ArrayLike:
 
 
 def minimum(x1, x2):
-    """Element-wise minimum of two triangles (delegates to ``Triangle.minimum``).
+    """Element-wise minimum of two triangles or a triangle and a scalar
+    (delegates to ``Triangle.minimum``).
+
+    Parameters
+    ----------
+    x1 : Triangle
+    x2 : Triangle or scalar
 
     Examples
     --------
-    Cap a triangle cell-by-cell by comparing it with another triangle of limits.
+    When two chainladder runs use different development factor selections,
+    the ultimates may disagree at each origin. ``minimum`` picks the lower
+    ultimate at each origin, producing the low-side scenario.
 
     .. testsetup::
 
@@ -801,24 +824,41 @@ def minimum(x1, x2):
     .. testcode::
 
         tri = cl.load_sample("raa")
-        lo = cl.minimum(tri, tri * 0.5)
-        print(round(float(lo.values[0, 0, 0, 0]), 4))
+        ult_vol = cl.Chainladder().fit(
+            cl.Development(average="volume").fit_transform(tri)
+        ).ultimate_
+        ult_sim = cl.Chainladder().fit(
+            cl.Development(average="simple").fit_transform(tri)
+        ).ultimate_
+        print(ult_vol.values[0, 0, -5:, 0].round(0))
+        print(ult_sim.values[0, 0, -5:, 0].round(0))
+        low_side = cl.minimum(ult_vol, ult_sim)
+        print(low_side.values[0, 0, -5:, 0].round(0))
 
     .. testoutput::
 
-        2506.0
+        [19501. 17749. 24019. 16045. 18402.]
+        [19807. 18201. 25475. 17776. 55781.]
+        [19501. 17749. 24019. 16045. 18402.]
 
     """
     return x1.minimum(x2)
 
 
 def maximum(x1, x2):
-    """Element-wise maximum of two triangles (delegates to ``Triangle.maximum``).
+    """Element-wise maximum of two triangles or a triangle and a scalar
+    (delegates to ``Triangle.maximum``).
+
+    Parameters
+    ----------
+    x1 : Triangle
+    x2 : Triangle or scalar
 
     Examples
     --------
-    Floor a triangle cell-by-cell by comparing it with another triangle of
-    minimum acceptable values.
+    ``maximum`` picks the higher ultimate at each origin, producing the
+    high-side scenario. This is useful for stress testing or setting a
+    conservative reserve when two methods produce different estimates.
 
     .. testsetup::
 
@@ -827,12 +867,22 @@ def maximum(x1, x2):
     .. testcode::
 
         tri = cl.load_sample("raa")
-        hi = cl.maximum(tri, tri * 0.5)
-        print(round(float(hi.values[0, 0, 0, 0]), 4))
+        ult_vol = cl.Chainladder().fit(
+            cl.Development(average="volume").fit_transform(tri)
+        ).ultimate_
+        ult_sim = cl.Chainladder().fit(
+            cl.Development(average="simple").fit_transform(tri)
+        ).ultimate_
+        print(ult_vol.values[0, 0, -5:, 0].round(0))
+        print(ult_sim.values[0, 0, -5:, 0].round(0))
+        high_side = cl.maximum(ult_vol, ult_sim)
+        print(high_side.values[0, 0, -5:, 0].round(0))
 
     .. testoutput::
 
-        5012.0
+        [19501. 17749. 24019. 16045. 18402.]
+        [19807. 18201. 25475. 17776. 55781.]
+        [19807. 18201. 25475. 17776. 55781.]
 
     """
     return x1.maximum(x2)
@@ -868,9 +918,11 @@ class PatsyFormula(BaseEstimator, TransformerMixin):
 
     Examples
     --------
-    Add origin effects when a development-only GLM is too restrictive.
-    ``TweedieGLM`` sends ``design_matrix`` through ``PatsyFormula`` before
-    fitting, so adding ``C(origin)`` expands the model matrix.
+    If a development-only Poisson GLM produces residuals that vary
+    systematically by accident year, adding ``C(origin)`` to the formula
+    introduces origin-level intercepts and reduces that structure. The
+    expanded model matrix has more columns (one per development period plus one
+    per origin), which ``PatsyFormula`` builds from the same R-style string.
 
     .. testsetup::
 
@@ -895,8 +947,10 @@ class PatsyFormula(BaseEstimator, TransformerMixin):
         3.508469
         3.491031
 
-    Use the same formula strings explicitly in ``DevelopmentML`` when building
-    a custom scikit-learn pipeline.
+    When ``TweedieGLM`` is not flexible enough (for example, when you need a
+    non-Tweedie model or a continuous origin term), build a custom
+    ``DevelopmentML`` pipeline and use ``PatsyFormula`` as the preprocessing
+    step with the same formula syntax.
 
     .. testcode::
 
@@ -916,26 +970,11 @@ class PatsyFormula(BaseEstimator, TransformerMixin):
             y_ml=col,
             fit_incrementals=False,
         ).fit(genins)
-        with_origin = cl.DevelopmentML(
-            Pipeline(
-                [
-                    (
-                        "design_matrix",
-                        PatsyFormula("C(development) + C(origin)"),
-                    ),
-                    ("model", LinearRegression(fit_intercept=False)),
-                ]
-            ),
-            y_ml=col,
-            fit_incrementals=False,
-        ).fit(genins)
-        print(len(dev_only.estimator_ml.named_steps.model.coef_))
-        print(len(with_origin.estimator_ml.named_steps.model.coef_))
+        print(dev_only.ldf_.values[0, 0, 0, :4].round(4))
 
     .. testoutput::
 
-        10
-        19
+        [3.515  1.735  1.3993 1.152 ]
 
     """