Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
235 changes: 235 additions & 0 deletions chainladder/utils/utility_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,46 @@ def load_sample(key: str, *args, **kwargs) -> Triangle:


def read_pickle(path):
"""Load an object serialized with ``to_pickle`` (``dill`` format).

Parameters
----------
path : str or path-like
Path to the pickle file.

Returns
-------
object
The deserialized triangle or estimator.

Examples
--------
A fitted ``Development`` transformer can be saved to disk and restored
later so the same patterns can be applied to new data without re-fitting.

.. testsetup::
Comment thread
henrydingliu marked this conversation as resolved.

import tempfile
import os

.. testcode::

import chainladder as cl

tri = cl.load_sample("raa")
dev = cl.Development(average="volume").fit(tri)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to demonstrate that to_pickle does something, we should use non-default parameters. something like avg = simple, n = 4.

fd, p = tempfile.mkstemp(suffix=".pkl")
os.close(fd)
dev.to_pickle(p)
restored = cl.read_pickle(p)
os.remove(p)
print(restored.transform(tri).ldf_.values[0, 0, 0, :4].round(4))
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we print the full ldf_ from both the original and the restored estimators?


.. testoutput::

[2.9994 1.6235 1.2709 1.1717]

"""
with open(path, "rb") as pkl:
return dill.load(pkl)

Expand Down Expand Up @@ -407,6 +447,36 @@ def read_csv(


def read_json(json_str, array_backend=None):
"""Deserialize JSON produced by ``to_json`` (triangle, estimator, or pipeline).

Examples
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this example feels empty without seeing the actual json string. please follow the example from pandas

--------
When a full reserving workflow needs to be stored as text—in a database,
config file, or REST API—``to_json`` serializes the pipeline and
``read_json`` reconstructs it with all step parameters intact.

.. testsetup::

import chainladder as cl

.. testcode::

from chainladder.workflow import Pipeline

pipe = Pipeline([
("dev", cl.Development(average="volume")),
("cl", cl.Chainladder()),
])
pipe2 = cl.read_json(pipe.to_json())
print([step[0] for step in pipe2.steps])
print(pipe2.named_steps["dev"].get_params()["average"])

.. testoutput::

['dev', 'cl']
volume

"""
from chainladder import Triangle
from chainladder.workflow import Pipeline

Expand Down Expand Up @@ -602,6 +672,33 @@ def concat(
Returns
-------
Updated triangle

Examples
--------
When paid and incurred triangles are constructed separately, ``concat``
along ``axis=1`` combines them into one multi-column triangle.
``MunichAdjustment`` requires both columns in the same object, so
concatenating them first is the natural setup step.

.. testsetup::

import chainladder as cl

.. testcode::

clrd = cl.load_sample("clrd").groupby("LOB").sum()
wkcomp = clrd.iloc[5:6]
paid = wkcomp["CumPaidLoss"]
incurred = wkcomp["IncurLoss"]
combined = cl.concat([paid, incurred], axis=1)
adj = cl.MunichAdjustment(paid_to_incurred=("CumPaidLoss", "IncurLoss"))
result = adj.fit_transform(combined)
print(result.ldf_["CumPaidLoss"].values[0, 0, 0, :4].round(4))
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good use case for concat. can we focus the test output around concat only?


.. testoutput::

[2.2342 1.3548 1.1517 1.0883]

"""
if type(objs) not in (list, tuple):
raise TypeError("objects to be concatenated must be in a list or tuple")
Expand Down Expand Up @@ -706,10 +803,88 @@ def num_to_nan(arr: ArrayLike) -> ArrayLike:


def minimum(x1, x2):
"""Element-wise minimum of two triangles or a triangle and a scalar
(delegates to ``Triangle.minimum``).

Parameters
----------
x1 : Triangle
x2 : Triangle or scalar

Examples
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need more basic docstring before a doctest. what's x1? what's x2?

--------
When two chainladder runs use different development factor selections,
the ultimates may disagree at each origin. ``minimum`` picks the lower
ultimate at each origin, producing the low-side scenario.

.. testsetup::

import chainladder as cl

.. testcode::

tri = cl.load_sample("raa")
ult_vol = cl.Chainladder().fit(
cl.Development(average="volume").fit_transform(tri)
).ultimate_
ult_sim = cl.Chainladder().fit(
cl.Development(average="simple").fit_transform(tri)
).ultimate_
print(ult_vol.values[0, 0, -5:, 0].round(0))
print(ult_sim.values[0, 0, -5:, 0].round(0))
low_side = cl.minimum(ult_vol, ult_sim)
print(low_side.values[0, 0, -5:, 0].round(0))

.. testoutput::

[19501. 17749. 24019. 16045. 18402.]
[19807. 18201. 25475. 17776. 55781.]
[19501. 17749. 24019. 16045. 18402.]

"""
return x1.minimum(x2)


def maximum(x1, x2):
"""Element-wise maximum of two triangles or a triangle and a scalar
(delegates to ``Triangle.maximum``).

Parameters
----------
x1 : Triangle
x2 : Triangle or scalar

Examples
--------
``maximum`` picks the higher ultimate at each origin, producing the
high-side scenario. This is useful for stress testing or setting a
conservative reserve when two methods produce different estimates.

.. testsetup::

import chainladder as cl

.. testcode::

tri = cl.load_sample("raa")
ult_vol = cl.Chainladder().fit(
cl.Development(average="volume").fit_transform(tri)
).ultimate_
ult_sim = cl.Chainladder().fit(
cl.Development(average="simple").fit_transform(tri)
).ultimate_
print(ult_vol.values[0, 0, -5:, 0].round(0))
print(ult_sim.values[0, 0, -5:, 0].round(0))
high_side = cl.maximum(ult_vol, ult_sim)
print(high_side.values[0, 0, -5:, 0].round(0))

.. testoutput::

[19501. 17749. 24019. 16045. 18402.]
[19807. 18201. 25475. 17776. 55781.]
[19807. 18201. 25475. 17776. 55781.]

"""
return x1.maximum(x2)


Expand Down Expand Up @@ -741,6 +916,66 @@ class PatsyFormula(BaseEstimator, TransformerMixin):
design_info_:
The patsy instructions for generating the design_matrix, X.

Examples
--------
If a development-only Poisson GLM produces residuals that vary
systematically by accident year, adding ``C(origin)`` to the formula
introduces origin-level intercepts and reduces that structure. The
expanded model matrix has more columns (one per development period plus one
per origin), which ``PatsyFormula`` builds from the same R-style string.

.. testsetup::

import chainladder as cl

.. testcode::

genins = cl.load_sample("genins")
by_dev = cl.TweedieGLM(design_matrix="C(development)").fit(genins)
by_both = cl.TweedieGLM(
design_matrix="C(development) + C(origin)"
).fit(genins)
print(len(by_dev.coef_))
print(len(by_both.coef_))
print(round(float(by_dev.ldf_.values[0, 0, 0, 0]), 6))
print(round(float(by_both.ldf_.values[0, 0, 0, 0]), 6))

.. testoutput::
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we be showing all the numbers?


10
19
3.508469
3.491031

When ``TweedieGLM`` is not flexible enough (for example, when you need a
non-Tweedie model or a continuous origin term), build a custom
``DevelopmentML`` pipeline and use ``PatsyFormula`` as the preprocessing
step with the same formula syntax.

.. testcode::

from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from chainladder.utils.utility_functions import PatsyFormula

genins = cl.load_sample("genins")
col = genins.columns[0]
dev_only = cl.DevelopmentML(
Pipeline(
[
("design_matrix", PatsyFormula("C(development)")),
("model", LinearRegression(fit_intercept=False)),
]
),
y_ml=col,
fit_incrementals=False,
).fit(genins)
print(dev_only.ldf_.values[0, 0, 0, :4].round(4))

.. testoutput::

[3.515 1.735 1.3993 1.152 ]

"""

def __init__(self, formula=None):
Expand Down
Loading