diff --git a/chainladder/development/constant.py b/chainladder/development/constant.py index e223c25b..cfce1d37 100644 --- a/chainladder/development/constant.py +++ b/chainladder/development/constant.py @@ -3,10 +3,12 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. from chainladder.development.base import DevelopmentBase import pandas as pd +import numpy as np +import warnings class DevelopmentConstant(DevelopmentBase): - """ A Estimator that allows for including of external patterns into a + """A Estimator that allows for including of external patterns into a Development style model. When this estimator is fit against a triangle, only the grain of the existing triangle is retained. @@ -39,56 +41,153 @@ def __init__(self, patterns=None, style="ldf", callable_axis=0, groupby=None): self.callable_axis = callable_axis self.groupby = groupby + def _prepare_cdf_patterns(self, patterns, n_dev_periods): + + patterns = dict(patterns) + + sorted_keys = sorted(patterns.keys()) + pattern_values = np.array([float(patterns[k]) for k in sorted_keys]) + + # convert ldfs to cdfs; cdf patterns are used as-is + if self.style == "ldf": + cdf_values = np.cumprod(pattern_values[::-1])[::-1] + else: + cdf_values = pattern_values + + cdf_patterns = {int(k): float(v) for k, v in zip(sorted_keys, cdf_values)} + + # patterns that fit within the triangle have no tail + if len(cdf_patterns) <= n_dev_periods: + return cdf_patterns, 1.0 + + # separate the tail factor and rebase the remaining cdfs onto it + tail_cdf = cdf_patterns[int(sorted_keys[n_dev_periods])] + for k in sorted_keys[:n_dev_periods]: + cdf_patterns[int(k)] /= tail_cdf + + return cdf_patterns, tail_cdf + def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like -     Set of LDFs to which the munich adjustment will be applied. + Set of LDFs to which the munich adjustment will be applied. y : Ignored sample_weight : Ignored Returns ------- self : object -     Returns the instance itself. + Returns the instance itself. """ from chainladder import options - if X.is_cumulative == False: + + # convert to cumulative triangle + if not X.is_cumulative: obj = self._set_fit_groups(X).incr_to_cum().val_to_dev().copy() else: obj = self._set_fit_groups(X).val_to_dev().copy() + xp = obj.get_array_module() - obj = obj.iloc[..., :1, :-1]*0+1 + tri_dev_periods = len(obj.ddims) + if callable(self.patterns): + # on index if self.callable_axis == 0: - ldf = obj.index.apply(self.patterns, axis=1) - ldf = ( - pd.concat(ldf.apply(pd.DataFrame, index=[0]).values, axis=0) - .fillna(1)[obj.ddims].values) - ldf = xp.array(ldf[:, None, None, :]) + rows = obj.index + # on columns elif self.callable_axis == 1: - ldf = obj.columns.to_frame(index=False).apply(self.patterns, axis=1) - ldf = ( - pd.concat(ldf.apply(pd.DataFrame, index=[0]).values, axis=0) - .fillna(1)[obj.ddims].values) - ldf = xp.array(ldf[None, :, None, :]) + rows = obj.columns.to_frame(index=False) + else: + raise ValueError("callable axis needs to be 0 or 1") + + patterns = self.patterns(rows.iloc[0]) + else: + # force the patterns to a dictionary + patterns = dict(self.patterns) + + # separate the cdf patterns from the tail; _prepare_cdf_patterns already + # returns tail_cdf=1 when the patterns do not extend past the triangle. + cdf_patterns, tail_cdf = self._prepare_cdf_patterns(patterns, tri_dev_periods) + pattern_dev_periods = len(cdf_patterns) + + # determine whether to include the last development period in the patterns + if pattern_dev_periods < tri_dev_periods: + warnings.warn( + "Supplied patterns are shorter than the triangle development " + "periods. Missing ages will be filled with a factor of 1.0.", + UserWarning, + stacklevel=2, + ) + include_last = False + elif pattern_dev_periods == tri_dev_periods: + include_last = True + else: + include_last = tail_cdf != 1 + + dev_slice = slice(None) if include_last else slice(None, -1) + + # this is the object to fill out the patterns, skeleton frame + obj = obj.iloc[..., :1, dev_slice] * 0 + 1 + + if callable(self.patterns): + + def _callable_row(row): + raw_patterns = self.patterns(row) + cdf_row, row_tail_cdf = self._prepare_cdf_patterns( + raw_patterns, tri_dev_periods + ) + fit_row = raw_patterns if self.style == "ldf" else cdf_row + return dict(fit_row), row_tail_cdf + + prepared = rows.apply(_callable_row, axis=1) + ldf = ( + pd.concat( + [pd.DataFrame(item[0], index=[0]) for item in prepared], + axis=0, + ) + .fillna(1)[obj.ddims] + .values + ) + tail_cdfs = xp.array([item[1] for item in prepared]) + + if self.callable_axis == 0: + ldf = xp.array(ldf[:, None, None, :]) + tail_cdfs = tail_cdfs[:, None, None] else: - raise ValueError('callable axis needs to be 0 or 1') + ldf = xp.array(ldf[None, :, None, :]) + tail_cdfs = tail_cdfs[None, :, None] + else: - ldf = xp.array([float(self.patterns[item]) for item in obj.ddims]) + fit_patterns = patterns if self.style == "ldf" else cdf_patterns + + # fill any triangle ages missing from the patterns with a factor of 1.0 + for ddim in obj.ddims: + if not any(ddim == k or int(ddim) == int(k) for k in fit_patterns): + fit_patterns[int(ddim)] = 1.0 + + ldf = xp.array([float(fit_patterns[int(item)]) for item in obj.ddims]) ldf = ldf[None, None, None, :] + tail_cdfs = tail_cdf + if self.style == "cdf": ldf = xp.concatenate((ldf[..., :-1] / ldf[..., 1:], ldf[..., -1:]), -1) + + # apply tail_cdf to the last ldfs of the triangle + ldf[..., -1] = ldf[..., -1] * tail_cdfs + obj = obj * ldf obj._set_slicers() + self.ldf_ = obj self.ldf_.is_pattern = True self.ldf_.is_cumulative = False self.ldf_.valuation_date = pd.to_datetime(options.ULT_VAL) + return self def transform(self, X): - """ If X and self are of different shapes, align self to X, else + """If X and self are of different shapes, align self to X, else return self. Parameters diff --git a/chainladder/development/tests/test_constant.py b/chainladder/development/tests/test_constant.py index 1c84fc41..7e3ea677 100644 --- a/chainladder/development/tests/test_constant.py +++ b/chainladder/development/tests/test_constant.py @@ -23,36 +23,477 @@ def test_constant_ldf(raa): dev_c = cl.DevelopmentConstant(patterns=link_ratios, style="ldf").fit(raa) assert xp.allclose(dev.ldf_.values, dev_c.ldf_.values, atol=1e-5) + def test_constant_callable_axis0(clrd, atol): - agway = clrd.loc['Agway Ins Co', 'CumPaidLoss'] + agway = clrd.loc["Agway Ins Co", "CumPaidLoss"] + def paid_cdfs(x): - """ A function that returns different CDFs depending on a specified LOB """ + """A function that returns different CDFs depending on a specified LOB""" cdfs = { - 'comauto': [3.832, 1.874, 1.386, 1.181, 1.085, 1.043, 1.022, 1.013, 1.007, 1], - 'medmal': [24.168, 4.127, 2.103, 1.528, 1.275, 1.161, 1.088, 1.047, 1.018, 1], - 'othliab': [10.887, 3.416, 1.957, 1.433, 1.231, 1.119, 1.06, 1.031, 1.011, 1], - 'ppauto': [2.559, 1.417, 1.181, 1.084, 1.04, 1.019, 1.009, 1.004, 1.001, 1], - 'prodliab': [13.703, 5.613, 2.92, 1.765, 1.385, 1.177, 1.072, 1.034, 1.008, 1], - 'wkcomp': [4.106, 1.865, 1.418, 1.234, 1.141, 1.09, 1.056, 1.03, 1.01, 1]} + "comauto": [ + 3.832, + 1.874, + 1.386, + 1.181, + 1.085, + 1.043, + 1.022, + 1.013, + 1.007, + 1, + ], + "medmal": [ + 24.168, + 4.127, + 2.103, + 1.528, + 1.275, + 1.161, + 1.088, + 1.047, + 1.018, + 1, + ], + "othliab": [ + 10.887, + 3.416, + 1.957, + 1.433, + 1.231, + 1.119, + 1.06, + 1.031, + 1.011, + 1, + ], + "ppauto": [2.559, 1.417, 1.181, 1.084, 1.04, 1.019, 1.009, 1.004, 1.001, 1], + "prodliab": [ + 13.703, + 5.613, + 2.92, + 1.765, + 1.385, + 1.177, + 1.072, + 1.034, + 1.008, + 1, + ], + "wkcomp": [4.106, 1.865, 1.418, 1.234, 1.141, 1.09, 1.056, 1.03, 1.01, 1], + } patterns = pd.DataFrame(cdfs, index=range(12, 132, 12)).T - return patterns.loc[x.loc['LOB']].to_dict() - model = cl.DevelopmentConstant(patterns=paid_cdfs, callable_axis=0, style='cdf') - assert abs(model.fit_transform(agway).cdf_.loc['comauto'].iloc[..., 0].sum() - 3.832) < atol + return patterns.loc[x.loc["LOB"]].to_dict() + + model = cl.DevelopmentConstant(patterns=paid_cdfs, callable_axis=0, style="cdf") + assert ( + abs(model.fit_transform(agway).cdf_.loc["comauto"].iloc[..., 0].sum() - 3.832) + < atol + ) + def test_constant_callable_axis1(clrd, atol): - agway = clrd.loc['Agway Ins Co', 'comauto'] + agway = clrd.loc["Agway Ins Co", "comauto"] cdfs = { - 'IncurLoss': [3.832, 1.874, 1.386, 1.181, 1.085, 1.043, 1.022, 1.013, 1.007, 1], - 'CumPaidLoss': [24.168, 4.127, 2.103, 1.528, 1.275, 1.161, 1.088, 1.047, 1.018, 1], - 'BulkLoss': [10.887, 3.416, 1.957, 1.433, 1.231, 1.119, 1.06, 1.031, 1.011, 1], - 'EarnedPremDIR': [2.559, 1.417, 1.181, 1.084, 1.04, 1.019, 1.009, 1.004, 1.001, 1], - 'EarnedPremCeded': [13.703, 5.613, 2.92, 1.765, 1.385, 1.177, 1.072, 1.034, 1.008, 1], - 'EarnedPremNet': [4.106, 1.865, 1.418, 1.234, 1.141, 1.09, 1.056, 1.03, 1.01, 1]} + "IncurLoss": [3.832, 1.874, 1.386, 1.181, 1.085, 1.043, 1.022, 1.013, 1.007, 1], + "CumPaidLoss": [ + 24.168, + 4.127, + 2.103, + 1.528, + 1.275, + 1.161, + 1.088, + 1.047, + 1.018, + 1, + ], + "BulkLoss": [10.887, 3.416, 1.957, 1.433, 1.231, 1.119, 1.06, 1.031, 1.011, 1], + "EarnedPremDIR": [ + 2.559, + 1.417, + 1.181, + 1.084, + 1.04, + 1.019, + 1.009, + 1.004, + 1.001, + 1, + ], + "EarnedPremCeded": [ + 13.703, + 5.613, + 2.92, + 1.765, + 1.385, + 1.177, + 1.072, + 1.034, + 1.008, + 1, + ], + "EarnedPremNet": [ + 4.106, + 1.865, + 1.418, + 1.234, + 1.141, + 1.09, + 1.056, + 1.03, + 1.01, + 1, + ], + } patterns = pd.DataFrame(cdfs, index=range(12, 132, 12)).T + def paid_cdfs(x): - """ A function that returns different CDFs depending on a specified column """ - return patterns.loc[x.loc['columns']].to_dict() + """A function that returns different CDFs depending on a specified column""" + return patterns.loc[x.loc["columns"]].to_dict() + with pytest.raises(ValueError): - xerror = cl.DevelopmentConstant(patterns=paid_cdfs, callable_axis=2, style='cdf').fit(agway) - lhs = cl.DevelopmentConstant(patterns=paid_cdfs, callable_axis=1, style='cdf').fit(agway).cdf_ - assert np.all(abs(lhs.values[0,:,0,:]-patterns.values[:,:-1]) < atol) \ No newline at end of file + xerror = cl.DevelopmentConstant( + patterns=paid_cdfs, callable_axis=2, style="cdf" + ).fit(agway) + lhs = ( + cl.DevelopmentConstant(patterns=paid_cdfs, callable_axis=1, style="cdf") + .fit(agway) + .cdf_ + ) + assert np.all(abs(lhs.values[0, :, 0, :] - patterns.values) < atol) + + +def test_constant_pattern_no_tail(): + reported_patterns = { + 12: 4.0, + 24: 2.9, + 36: 1.8, + 48: 1.4, + 60: 1.2, + 72: 1.1, + 84: 1.03, + 96: 1.02, + # 108: 1.005, + } + auto_bi = cl.load_sample("friedland_auto_bi_insurer") + reported_BI_claim = cl.DevelopmentConstant( + patterns=reported_patterns, style="cdf" + ).fit_transform(auto_bi["Reported Claims"]) + + assert np.all( + np.round(reported_BI_claim.cdf_.to_frame().values.flatten(), 6) + == np.array([4.0, 2.9, 1.8, 1.4, 1.2, 1.1, 1.03, 1.02]) + ) + assert np.all( + np.round(reported_BI_claim.ldf_.to_frame().values.flatten(), 6) + == np.round( + np.array( + [ + 4.0 / 2.9, + 2.9 / 1.8, + 1.8 / 1.4, + 1.4 / 1.2, + 1.2 / 1.1, + 1.1 / 1.03, + 1.03 / 1.02, + 1.02, + ] + ), + 6, + ) + ) + + +def test_constant_pattern_has_tail(): + reported_patterns = { + 12: 4.0, + 24: 2.9, + 36: 1.8, + 48: 1.4, + 60: 1.2, + 72: 1.1, + 84: 1.03, + 96: 1.02, + 108: 1.005, + } + auto_bi = cl.load_sample("friedland_auto_bi_insurer") + reported_BI_claim = cl.DevelopmentConstant( + patterns=reported_patterns, style="cdf" + ).fit_transform(auto_bi["Reported Claims"]) + + assert np.all( + np.round(reported_BI_claim.cdf_.to_frame().values.flatten(), 6) + == np.array([4.0, 2.9, 1.8, 1.4, 1.2, 1.1, 1.03, 1.02, 1.005]) + ) + assert np.all( + np.round(reported_BI_claim.ldf_.to_frame().values.flatten(), 6) + == np.round( + np.array( + [ + 4.0 / 2.9, + 2.9 / 1.8, + 1.8 / 1.4, + 1.4 / 1.2, + 1.2 / 1.1, + 1.1 / 1.03, + 1.03 / 1.02, + 1.02 / 1.005, + 1.005, + ] + ), + 6, + ) + ) + + +def test_constant_pattern_exact_cdf(raa): + reported_patterns = { + 12: 1.1, + 24: 1.1, + 36: 1.1, + 48: 1.1, + 60: 1.1, + 72: 1.1, + 84: 1.1, + 96: 1.1, + 108: 1.1, + 120: 1.1, + } + + result = cl.DevelopmentConstant( + patterns=reported_patterns, style="cdf" + ).fit_transform(raa) + + assert np.all( + result.cdf_.to_frame().values.flatten() + == np.array([1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1]) + ) + assert np.all( + result.ldf_.to_frame().values.flatten() + == np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.1]) + ) + + +def test_constant_pattern_exact_ldf(raa): + reported_patterns = { + 12: 1.1, + 24: 1.1, + 36: 1.1, + 48: 1.1, + 60: 1.1, + 72: 1.1, + 84: 1.1, + 96: 1.1, + 108: 1.1, + 120: 1.1, + } + + result = cl.DevelopmentConstant( + patterns=reported_patterns, style="ldf" + ).fit_transform(raa) + + assert np.all( + np.round(result.cdf_.to_frame().values.flatten(), 6) + == np.round( + np.array( + [ + 1.1**10, + 1.1**9, + 1.1**8, + 1.1**7, + 1.1**6, + 1.1**5, + 1.1**4, + 1.1**3, + 1.1**2, + 1.1, + ] + ), + 6, + ) + ) + assert np.all( + result.ldf_.to_frame().values.flatten() + == np.array( + [ + 1.1, + 1.1, + 1.1, + 1.1, + 1.1, + 1.1, + 1.1, + 1.1, + 1.1, + 1.1, + ] + ) + ) + + +def test_constant_pattern_short_cdf(raa): + reported_patterns = { + 12: 1.1, + 24: 1.1, + 36: 1.1, + 48: 1.1, + 60: 1.1, + 72: 1.1, + # 84: 1.1, + # 96: 1.1, + # 108: 1.1, + # 120: 1.1, + } + + result = cl.DevelopmentConstant( + patterns=reported_patterns, style="cdf" + ).fit_transform(raa) + + assert np.all( + result.cdf_.to_frame().values.flatten() + == np.array([1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.0, 1.0, 1.0]) + ) + assert np.all( + result.ldf_.to_frame().values.flatten() + == np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.1, 1.0, 1.0, 1.0]) + ) + + +def test_constant_pattern_short_ldf(raa): + reported_patterns = { + 12: 1.1, + 24: 1.1, + 36: 1.1, + 48: 1.1, + 60: 1.1, + 72: 1.1, + # 84: 1.1, + # 96: 1.1, + # 108: 1.1, + # 120: 1.1, + } + + result = cl.DevelopmentConstant( + patterns=reported_patterns, style="ldf" + ).fit_transform(raa) + + assert np.all( + np.round(result.cdf_.to_frame().values.flatten(), 6) + == np.round( + np.array([1.1**6, 1.1**5, 1.1**4, 1.1**3, 1.1**2, 1.1, 1.0, 1.0, 1.0]), 6 + ) + ) + assert np.all( + result.ldf_.to_frame().values.flatten() + == np.array([1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.0, 1.0, 1.0]) + ) + + +def test_constant_pattern_long_cdf(raa): + reported_patterns = { + 12: 1.1, + 24: 1.1, + 36: 1.1, + 48: 1.1, + 60: 1.1, + 72: 1.1, + 84: 1.1, + 96: 1.1, + 108: 1.1, + 120: 1.1, + 132: 1.1, + } + + result = cl.DevelopmentConstant( + patterns=reported_patterns, style="cdf" + ).fit_transform(raa) + assert np.all( + np.round(result.cdf_.to_frame().values.flatten(), 6) + == np.array([1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1]) + ) + assert np.all( + np.round(result.ldf_.to_frame().values.flatten(), 6) + == np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.1]) + ) + + +def test_constant_pattern_long_ldf(raa): + reported_patterns = { + 12: 1.1, + 24: 1.1, + 36: 1.1, + 48: 1.1, + 60: 1.1, + 72: 1.1, + 84: 1.1, + 96: 1.1, + 108: 1.1, + 120: 1.1, + 132: 1.1, + } + + result = cl.DevelopmentConstant( + patterns=reported_patterns, style="ldf" + ).fit_transform(raa) + + assert np.all( + np.round(result.cdf_.to_frame().values.flatten(), 6) + == np.round( + np.array( + [ + 1.1**11, + 1.1**10, + 1.1**9, + 1.1**8, + 1.1**7, + 1.1**6, + 1.1**5, + 1.1**4, + 1.1**3, + 1.1**2, + ] + ), + 6, + ) + ) + assert np.all( + result.ldf_.to_frame().values.flatten() + == np.array( + [ + 1.1, + 1.1, + 1.1, + 1.1, + 1.1, + 1.1, + 1.1, + 1.1, + 1.1, + 1.1**2, + ] + ) + ) + + +def test_constant_incr(): + raa_incr = cl.load_sample("raa").cum_to_incr() + reported_patterns = { + 12: 4.0, + 24: 2.9, + 36: 1.8, + 48: 1.4, + 60: 1.2, + 72: 1.1, + 84: 1.03, + 96: 1.02, + 108: 1.005, + } + + result = cl.DevelopmentConstant( + patterns=reported_patterns, style="cdf" + ).fit_transform(raa_incr) + + assert np.all( + np.round(result.cdf_.to_frame().values.flatten(), 6) + == np.array([4.0, 2.9, 1.8, 1.4, 1.2, 1.1, 1.03, 1.02, 1.005]) + )