From d374f96fa186459429062458d82c9ad1ac521f59 Mon Sep 17 00:00:00 2001 From: benwu Date: Tue, 12 May 2026 16:28:03 +0800 Subject: [PATCH] [ENH] Add EfficientMVSK optimizer for mean-variance-skewness-kurtosis optimization Add MVSK portfolio optimization via the yand-mvsk package, which implements Yau's Affine-Normal Descent algorithm. This extends PyPortfolioOpt beyond mean-variance by jointly optimizing over all four moments, penalizing crash risk (skewness) and tail risk (kurtosis). --- pypfopt/__init__.py | 2 + pypfopt/efficient_frontier/__init__.py | 2 + pypfopt/efficient_frontier/efficient_mvsk.py | 225 +++++++++++++++++++ pyproject.toml | 1 + tests/test_efficient_mvsk.py | 157 +++++++++++++ 5 files changed, 387 insertions(+) create mode 100644 pypfopt/efficient_frontier/efficient_mvsk.py create mode 100644 tests/test_efficient_mvsk.py diff --git a/pypfopt/__init__.py b/pypfopt/__init__.py index 249f0e6d..e347cfca 100755 --- a/pypfopt/__init__.py +++ b/pypfopt/__init__.py @@ -9,6 +9,7 @@ EfficientCDaR, EfficientCVaR, EfficientFrontier, + EfficientMVSK, EfficientSemivariance, ) from .hierarchical_portfolio import HRPOpt @@ -27,6 +28,7 @@ "EfficientSemivariance", "EfficientCVaR", "EfficientCDaR", + "EfficientMVSK", "HRPOpt", "CovarianceShrinkage", ] diff --git a/pypfopt/efficient_frontier/__init__.py b/pypfopt/efficient_frontier/__init__.py index 35fac6e0..59c7fb06 100644 --- a/pypfopt/efficient_frontier/__init__.py +++ b/pypfopt/efficient_frontier/__init__.py @@ -6,6 +6,7 @@ from .efficient_cdar import EfficientCDaR from .efficient_cvar import EfficientCVaR from .efficient_frontier import EfficientFrontier +from .efficient_mvsk import EfficientMVSK from .efficient_semivariance import EfficientSemivariance __all__ = [ @@ -13,4 +14,5 @@ "EfficientCVaR", "EfficientSemivariance", "EfficientCDaR", + "EfficientMVSK", ] diff --git a/pypfopt/efficient_frontier/efficient_mvsk.py b/pypfopt/efficient_frontier/efficient_mvsk.py new file mode 100644 index 00000000..d31a5659 --- /dev/null +++ b/pypfopt/efficient_frontier/efficient_mvsk.py @@ -0,0 +1,225 @@ +""" +The ``efficient_mvsk`` submodule houses the EfficientMVSK class, which +generates optimal portfolios by jointly optimizing over the first four +moments (mean, variance, skewness, kurtosis) using Yau's Affine-Normal +Descent algorithm. + +Requires the ``yand-mvsk`` package: ``pip install yand-mvsk``. +""" + +import numpy as np +import pandas as pd + +from pypfopt.base import BaseOptimizer + + +class EfficientMVSK(BaseOptimizer): + """ + Optimize portfolios using the mean-variance-skewness-kurtosis (MVSK) + framework via the YAND algorithm (Wang, Niu, Sheshmani, Yau, 2025). + + The objective minimized is:: + + f(x) = -c1 * mean(x) + c2 * var(x) - c3 * skew(x) + c4 * kurt(x) + + where the preference coefficients ``c`` can be derived from a CRRA + utility function or specified directly. + + This extends the classical mean-variance framework by penalizing + negative skewness (crash risk) and excess kurtosis (tail risk). + + Instance variables: + + - Inputs: + + - ``n_assets`` - int + - ``tickers`` - str list + - ``returns`` - np.ndarray, shape (T, n) + - ``gamma`` - float, CRRA risk aversion + - ``c`` - np.ndarray, shape (4,), preference vector + + - Output: ``weights`` - np.ndarray + + Public methods: + + - ``min_mvsk()`` minimizes the MVSK objective + - ``portfolio_performance()`` calculates return, volatility, Sharpe, + skewness and excess kurtosis of the optimized portfolio. + - ``clean_weights()`` rounds the weights and clips near-zeros. + - ``save_weights_to_file()`` saves the weights to csv, json, or txt. + """ + + def __init__( + self, + returns, + gamma=6.0, + c=None, + weight_bounds=(0, 1), + ): + """ + Parameters + ---------- + returns : pd.DataFrame or np.array + (historic) returns for all your assets (no NaNs). + Rows are observations, columns are assets. + gamma : float, optional + CRRA risk-aversion parameter, defaults to 6.0. + Higher values penalize variance, skewness and kurtosis more. + Ignored if ``c`` is provided. + c : array-like, shape (4,), optional + Custom preference vector [c1, c2, c3, c4]. + If provided, overrides ``gamma``. + weight_bounds : tuple (float, float), optional + (min_weight, max_weight) for each asset, defaults to (0, 1). + Only long-only constraints are fully supported; upper bounds + are accepted for API compatibility but values < 1 will raise + NotImplementedError. + + Raises + ------ + ImportError + if ``yand-mvsk`` is not installed + NotImplementedError + if per-asset upper weight bounds < 1 are requested + """ + try: + from yand_mvsk import crra_coefficients, check_convexity + except ImportError: + raise ImportError( + "Please install yand-mvsk: pip install yand-mvsk" + ) + + if isinstance(returns, pd.DataFrame): + tickers = list(returns.columns) + returns = returns.values + else: + tickers = None + + self.returns = np.asarray(returns, dtype=float) + n_assets = self.returns.shape[1] + + if tickers is None: + tickers = list(range(n_assets)) + + super().__init__(n_assets, tickers) + + self.gamma = gamma + self.c = np.asarray(c, dtype=float) if c is not None else crra_coefficients(gamma) + self._convex = check_convexity(self.c) + self._result = None + + # Parse weight bounds + if isinstance(weight_bounds, tuple) and len(weight_bounds) == 2: + lb, ub = weight_bounds + self._lower_bound = 0.0 if lb is None else float(lb) + if ub is not None and ub < 1.0: + raise NotImplementedError( + "Per-asset upper weight bounds < 1 are not yet supported " + "by the YAND-MVSK solver." + ) + else: + raise TypeError( + "weight_bounds must be a (lower, upper) tuple. " + "Per-asset bounds are not supported." + ) + + @classmethod + def from_prices(cls, prices, gamma=6.0, **kwargs): + """ + Create an EfficientMVSK instance from a price DataFrame or array. + Automatically computes simple returns. + + Parameters + ---------- + prices : pd.DataFrame or np.array + historic asset prices + gamma : float, optional + CRRA risk-aversion parameter, defaults to 6.0 + **kwargs + forwarded to the constructor + + Returns + ------- + EfficientMVSK + """ + if isinstance(prices, pd.DataFrame): + returns = prices.pct_change().dropna() + else: + prices = np.asarray(prices, dtype=float) + returns = prices[1:] / prices[:-1] - 1.0 + return cls(returns, gamma=gamma, **kwargs) + + def min_mvsk(self): + """ + Minimize the MVSK objective function (maximize risk-adjusted utility + across the first four moments). + + Returns + ------- + OrderedDict + asset weights for the MVSK-optimal portfolio + """ + from yand_mvsk import yand_mvsk_solve + + tau = max(self._lower_bound, 1e-8) + self._result = yand_mvsk_solve(self.returns, self.c, tau=tau) + + if not self._result.converged: + import warnings + warnings.warn( + f"YAND-MVSK solver did not converge after {self._result.n_iter} " + f"iterations (KKT residual: {self._result.kkt_residual:.2e}). " + "Results may be suboptimal." + ) + + self.weights = self._result.x + return self._make_output_weights() + + @property + def convex(self): + """Whether the preference coefficients satisfy the convexity condition.""" + return self._convex + + def portfolio_performance(self, verbose=False, risk_free_rate=0.0): + """ + After optimizing, calculate (and optionally print) the performance + of the optimal portfolio, including higher-moment statistics. + + Parameters + ---------- + verbose : bool, optional + whether performance should be printed, defaults to False + risk_free_rate : float, optional + risk-free rate of borrowing/lending, defaults to 0.0 + + Raises + ------ + ValueError + if weights have not been calculated yet + + Returns + ------- + (float, float, float, float, float) + expected return, volatility, Sharpe ratio, skewness, excess kurtosis. + """ + if self.weights is None: + raise ValueError("Weights not yet computed") + + port_returns = self.returns @ self.weights + mu = port_returns.mean() * 252 + vol = port_returns.std(ddof=1) * np.sqrt(252) + sharpe = (mu - risk_free_rate) / vol if vol > 1e-10 else 0.0 + + centered = port_returns - port_returns.mean() + m2 = np.mean(centered ** 2) + skew = np.mean(centered ** 3) / (m2 ** 1.5) if m2 > 1e-30 else 0.0 + kurt = np.mean(centered ** 4) / (m2 ** 2) - 3.0 if m2 > 1e-30 else 0.0 + + if verbose: + print("Expected annual return: {:.1f}%".format(100 * mu)) + print("Annual volatility: {:.1f}%".format(100 * vol)) + print("Sharpe Ratio: {:.2f}".format(sharpe)) + print("Skewness: {:.3f}".format(skew)) + print("Excess Kurtosis: {:.3f}".format(kurt)) + + return mu, vol, sharpe, skew, kurt diff --git a/pyproject.toml b/pyproject.toml index b2c99ee1..027197d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ all_extras = [ "ecos>=2.0.14,<2.1", "plotly>=5.0.0,<7", "cvxopt; python_version < '3.14'", + "yand-mvsk>=0.2.0", ] # dev - the developer dependency set, for contributors and CI diff --git a/tests/test_efficient_mvsk.py b/tests/test_efficient_mvsk.py new file mode 100644 index 00000000..e8c656f5 --- /dev/null +++ b/tests/test_efficient_mvsk.py @@ -0,0 +1,157 @@ +import numpy as np +import pytest +from skbase.utils.dependencies import _check_soft_dependencies + +from pypfopt import expected_returns +from tests.utilities_for_tests import get_data + +mvsk_installed = _check_soft_dependencies("yand-mvsk", severity="none") + + +def setup_efficient_mvsk(**kwargs): + from pypfopt import EfficientMVSK + + df = get_data().dropna(axis=0, how="any") + historic_returns = expected_returns.returns_from_prices(df) + return EfficientMVSK(historic_returns, **kwargs) + + +def setup_efficient_mvsk_from_prices(**kwargs): + from pypfopt import EfficientMVSK + + df = get_data().dropna(axis=0, how="any") + return EfficientMVSK.from_prices(df, **kwargs) + + +@pytest.mark.skipif(not mvsk_installed, reason="yand-mvsk not installed") +class TestEfficientMVSK: + def test_mvsk_example(self): + mv = setup_efficient_mvsk() + w = mv.min_mvsk() + + assert isinstance(w, dict) + assert set(w.keys()) == set(mv.tickers) + np.testing.assert_almost_equal(mv.weights.sum(), 1) + assert all(i >= -1e-5 for i in w.values()) + + def test_mvsk_converged(self): + mv = setup_efficient_mvsk() + mv.min_mvsk() + assert mv._result.converged + + def test_mvsk_beats_equal_weight(self): + mv = setup_efficient_mvsk() + mv.min_mvsk() + from yand_mvsk import MVSKOracle + + oracle = MVSKOracle(mv.returns, mv.c) + eq = np.ones(mv.n_assets) / mv.n_assets + assert oracle.value(mv.weights) <= oracle.value(eq) + + def test_mvsk_weights_long_only(self): + mv = setup_efficient_mvsk() + mv.min_mvsk() + assert np.all(mv.weights >= -1e-8) + + def test_mvsk_clean_weights(self): + mv = setup_efficient_mvsk() + mv.min_mvsk() + clean = mv.clean_weights(cutoff=1e-4, rounding=4) + assert isinstance(clean, dict) + assert set(clean.keys()) == set(mv.tickers) + assert all(v >= 0 for v in clean.values()) + + def test_mvsk_portfolio_performance(self): + mv = setup_efficient_mvsk() + mv.min_mvsk() + perf = mv.portfolio_performance() + + assert isinstance(perf, tuple) + assert len(perf) == 5 + mu, vol, sharpe, skew, kurt = perf + assert isinstance(mu, float) + assert vol > 0 + + def test_mvsk_portfolio_performance_verbose(self, capsys): + mv = setup_efficient_mvsk() + mv.min_mvsk() + perf = mv.portfolio_performance(verbose=True) + captured = capsys.readouterr() + assert "Expected annual return" in captured.out + assert "Sharpe Ratio" in captured.out + assert "Skewness" in captured.out + + perf2 = mv.portfolio_performance() + np.testing.assert_equal(perf, perf2) + + def test_mvsk_performance_before_optimize_raises(self): + mv = setup_efficient_mvsk() + with pytest.raises(ValueError): + mv.portfolio_performance() + + def test_mvsk_from_prices(self): + mv = setup_efficient_mvsk_from_prices() + w = mv.min_mvsk() + assert isinstance(w, dict) + np.testing.assert_almost_equal(mv.weights.sum(), 1) + + def test_mvsk_from_prices_numpy(self): + from pypfopt import EfficientMVSK + + df = get_data().dropna(axis=0, how="any") + mv = EfficientMVSK.from_prices(df.values) + w = mv.min_mvsk() + assert isinstance(w, dict) + np.testing.assert_almost_equal(mv.weights.sum(), 1) + + def test_mvsk_custom_gamma(self): + mv2 = setup_efficient_mvsk(gamma=2.0) + mv2.min_mvsk() + + mv20 = setup_efficient_mvsk(gamma=20.0) + mv20.min_mvsk() + + vol2 = mv2.portfolio_performance()[1] + vol20 = mv20.portfolio_performance()[1] + assert vol2 >= vol20 + + def test_mvsk_custom_coefficients(self): + from yand_mvsk import crra_coefficients + + c = crra_coefficients(6.0) + mv = setup_efficient_mvsk(c=c) + w = mv.min_mvsk() + assert isinstance(w, dict) + np.testing.assert_almost_equal(mv.weights.sum(), 1) + + def test_mvsk_convex_property(self): + mv = setup_efficient_mvsk(gamma=6.0) + assert mv.convex + + def test_mvsk_weight_bounds_lower(self): + mv = setup_efficient_mvsk(weight_bounds=(0.01, 1)) + mv.min_mvsk() + assert np.all(mv.weights >= 0.01 - 1e-6) + + def test_mvsk_upper_bounds_raises(self): + with pytest.raises(NotImplementedError): + setup_efficient_mvsk(weight_bounds=(0, 0.5)) + + def test_mvsk_bad_bounds_raises(self): + with pytest.raises(TypeError): + setup_efficient_mvsk(weight_bounds=(0, 0.5, 1)) + + def test_mvsk_tickers(self): + mv = setup_efficient_mvsk() + w = mv.min_mvsk() + assert "GOOG" in w + assert "AAPL" in w + + def test_mvsk_save_weights(self, tmp_path): + mv = setup_efficient_mvsk() + mv.min_mvsk() + filepath = str(tmp_path / "weights.csv") + mv.save_weights_to_file(filepath) + import os + + assert os.path.exists(filepath)