From 9f08e242ec8b682329e1015af24133a6b584daa8 Mon Sep 17 00:00:00 2001 From: Mike Hankin Date: Sun, 3 Nov 2024 21:23:05 -0800 Subject: [PATCH 1/3] type hints for vinecopula.py, nx Graph handling for fit_vinecop, and placeholder raise NotImplemented for fit_conditionalvine --- src/vinecopulas/vinecopula.py | 63 ++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/src/vinecopulas/vinecopula.py b/src/vinecopulas/vinecopula.py index e0c7ebb..3bad4cc 100644 --- a/src/vinecopulas/vinecopula.py +++ b/src/vinecopulas/vinecopula.py @@ -15,7 +15,7 @@ import networkx as nx import matplotlib.pyplot as plt from vinecopulas.bivariate import * - +from typing import List, Tuple, Literal, Union, Optional # %% Copulas copulas = { @@ -40,7 +40,7 @@ # %% fitting vinecopula -def fit_vinecop(u1, copsi, vine="R", printing=True): +def fit_vinecop(u1: np.ndarray, copsi: List[int], vine: Literal["R", "C", "D", nx.Graph]="R", printing:bool=True) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Fit a regular vine copula to data. @@ -95,12 +95,14 @@ def fit_vinecop(u1, copsi, vine="R", printing=True): ) # create a new dataframe where the row with the highest ktau is the first row else: if ( - order1.v1[i] in list(order1.v2[:i]) - or order1.v1[i] in list(order1.v1[:i]) + order1.v1[i] in list(order2.v2[:i]) + or order1.v1[i] in list(order2.v1[:i]) ) and ( - order1.v2[i] in list(order1.v2[:i]) - or order1.v2[i] in list(order1.v1[:i]) - ): # see if both v1i and v2i are already in order2, first rows with unique variables need to be added to ensure all variables are included + order1.v2[i] in list(order2.v2[:i]) + or order1.v2[i] in list(order2.v1[:i]) + ): # see if both v1i and v2i are already in order2, + # first rows with unique variables need to be + # added to ensure all variables are included continue else: inde.append(i) # add used rows to inde @@ -254,6 +256,19 @@ def fit_vinecop(u1, copsi, vine="R", printing=True): order2 = order1[(order1.v1 == i) | (order1.v2 == i)].reset_index( drop=True ) # select the rows that include this variable + else: + # vine is a graph. + # TODO: confirm that it is a tree and that every column is included in the tree. + temp_order = [] + for edge_src, edge_dest in vine.edges: + if edge_src > edge_dest: + # Flip them + edge_src, edge_dest = edge_dest, edge_src + ta_temp = order1.loc[(order1.v1 == edge_src) & (order1.v2 == edge_dest), "tauabs"] + assert len(ta_temp) == 1, "The edge is not in the order1" + temp_order.append({"v1": edge_src, "v2": edge_dest, "tauabs": ta_temp.values[0]}) + order2 = pd.DataFrame(temp_order) + vine = "R" order1 = order2 # make order1 == the first tree del order2 @@ -1082,7 +1097,7 @@ def fit_vinecop(u1, copsi, vine="R", printing=True): return a, p, c -def density_vinecop(u, M, P, C): +def density_vinecop(u: np.ndarray, M: np.ndarray, P:np.ndarray, C:np.ndarray) -> np.ndarray: """ Computes the density function of a vine copula. @@ -1172,7 +1187,7 @@ def density_vinecop(u, M, P, C): # %% fitting vine copula with sspecific structure -def fit_vinecopstructure(u1, copsi, a): +def fit_vinecopstructure(u1: np.ndarray, copsi: List[int], a: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Fit a regular vine copula to data based on a known vine structure matrix. @@ -1391,7 +1406,7 @@ def fit_vinecopstructure(u1, copsi, a): # %% Sampling vine copula -def sample_vinecop(a, p, c, s): +def sample_vinecop(a:np.ndarray, p:np.ndarray, c:np.ndarray, s:np.ndarray) -> np.ndarray: """ Generate random samples from an R-vine. @@ -1485,7 +1500,7 @@ def sample_vinecop(a, p, c, s): # %% Sampling conditonal vine copula -def sample_vinecopconditional(a, p, c, s, Xc): +def sample_vinecopconditional(a:np.ndarray, p:np.ndarray, c:np.ndarray, s:int, Xc:np.ndarray) -> np.ndarray: """ Generate conditional samples from an R-vine based on a provided sampling order @@ -1590,19 +1605,13 @@ def sample_vinecopconditional(a, p, c, s, Xc): # %%sampling orders -def samplingorder(a): +def samplingorder(a: np.ndarray) -> List[List[int]]: """ Provides all the different sampling orders that are possible for the fitted vine-copula. Arguments: *a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc. - *p* : Parameters of the bivariate copulae provided as a triangular matrix. - - *c* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula (see...refer to where this information would be) - - - Returns: *sortingorder* : A list of the different sampling orders available for the fitted vine-copula @@ -1669,7 +1678,7 @@ def samplingorder(a): # %%matrices of specific sampling order -def samplingmatrix(a, c, p, sorder): +def samplingmatrix(a:np.ndarray, c:np.ndarray, p:np.ndarray, sorder: List[int]) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Provides the triangular matrices for which the samples can be generated based on the specific sampling order. @@ -1762,14 +1771,14 @@ def samplingmatrix(a, c, p, sorder): # %% -def fit_conditionalvine(u1, vint, copsi, vine="R", condition=1, printing=True): +def fit_conditionalvine(u1: np.ndarray, vint: Union[int, List[int]], copsi: List[int], vine: Literal["R", "D", "C", nx.Graph]="R", condition: Literal[1,2]=1, printing:bool=True): """ Fit a regular vine copula which allows for a conditional sample of a variable of interest. Arguments: *u1* : the data, provided as a numpy array where each column contains a seperate variable (eg. u1,u2,...,un), which have already been transferred to standard uniform margins (0<= u <= 1) - *vint* : the variables of interest, provided as an integere or list that refers to the variables column numbers in u1, where the first column is 0 and the second column is 1, etc. + *vint* : the variables of interest, provided as an integer or list that refers to the variables column numbers in u1, where the first column is 0 and the second column is 1, etc. *copsi* : A list of integers referring to the copulae of interest for which the fit has to be evauluated in the vine copula. eg. a list of [1, 10] refers to the Gaussian and Frank copula (see `Table 1 `__). @@ -1786,6 +1795,9 @@ def fit_conditionalvine(u1, vint, copsi, vine="R", condition=1, printing=True): *c* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula (see `Table 1 `__). """ + if isinstance(vine, nx.Graph): + raise NotImplementedError("Only vine=R, D, or C are currently supported") + # TODO: Add support for vine as a graph # Reference: Dißmann et al. 2013 adapted dimen = u1.shape[1] # number of variables (number of columns) @@ -3464,22 +3476,19 @@ def fit_conditionalvine(u1, vint, copsi, vine="R", condition=1, printing=True): # %% -def plotvine(a, plottitle=None, variables=None, savepath=None): +def plotvine(a: np.ndarray, plottitle: Optional[str]=None, variables: Optional[List[str]]=None, savepath: Optional[str]=None): """ Plots the vine structure Arguments: *a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc. + *variables* : list of variables + *pltotitle* : title of the plot *savepath* : path to save the plot - Returns: - *plot* - - - """ dimen = a.shape[0] # dimension of copula order = pd.DataFrame( From 6c81189d54c8472ddbfa6cd0388d3f2c35091a2a Mon Sep 17 00:00:00 2001 From: Mike Hankin Date: Sun, 3 Nov 2024 21:30:13 -0800 Subject: [PATCH 2/3] type hints for marginals.py --- src/vinecopulas/marginals.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/vinecopulas/marginals.py b/src/vinecopulas/marginals.py index 9de3f08..2a8c800 100644 --- a/src/vinecopulas/marginals.py +++ b/src/vinecopulas/marginals.py @@ -19,10 +19,14 @@ from itertools import product import sys +from typing import Tuple, List, Literal +DISCRETE_DIST_TYPE = Tuple[st.rv_discrete, np.ndarray, float] +CONTINUOUS_DIST_TYPE = Tuple[st.rv_continuous, np.ndarray, float] + # %% best fit discrete distribution -def best_fit_distributiondiscrete(data, bound=False, criterion="BIC"): +def best_fit_distributiondiscrete(data: np.ndarray, bound: bool=False, criterion: Literal["AIC", "BIC", "ML"]="BIC") -> DISCRETE_DIST_TYPE: """ Fits the best discrete distribution to data. @@ -34,7 +38,7 @@ def best_fit_distributiondiscrete(data, bound=False, criterion="BIC"): *criterion* : Either BIC, AIC or ML is used to choose the best distribution Returns: - *bestdist* : the best distribution and its parameters. + *bestdist* : the best distribution and its parameters, as a tuple of (distribution, parameters, criterion_value). """ # distributions @@ -138,8 +142,9 @@ def best_fit_distributiondiscrete(data, bound=False, criterion="BIC"): return bestdist + # %% best fit distribution -def best_fit_distribution(data, criterion="BIC", dists = []): +def best_fit_distribution(data: np.ndarray, criterion: Literal["BIC", "AIC", "ML"]="BIC", dists: List["str"] = [])-> CONTINUOUS_DIST_TYPE: """ Fits the best continuous distribution to data. @@ -235,7 +240,7 @@ def best_fit_distribution(data, criterion="BIC", dists = []): # %% pseudodata -def pseudodata(data): +def pseudodata(data: np.ndarray) -> np.ndarray: """ Compute the pseudo-observations for the given data (transforms data to standard uniform margins) @@ -255,7 +260,7 @@ def pseudodata(data): # %% -def pseudodiscr(xcdf, xpmf): +def pseudodiscr(xcdf:np.ndarray, xpmf:np.ndarray) -> np.ndarray: """ Compute the pseudo-observations for the given variable that is discrete. From 00026223989a55a555128dd83885727286b3f8a0 Mon Sep 17 00:00:00 2001 From: Mike Hankin Date: Sun, 3 Nov 2024 21:42:50 -0800 Subject: [PATCH 3/3] all but one func have typehints for bivariate.py --- src/vinecopulas/bivariate.py | 38 +++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/vinecopulas/bivariate.py b/src/vinecopulas/bivariate.py index 9913fd8..c07c2a7 100644 --- a/src/vinecopulas/bivariate.py +++ b/src/vinecopulas/bivariate.py @@ -17,14 +17,15 @@ from scipy.linalg import cholesky from itertools import product import sys - +from typing import Tuple, List, Union, Literal +CORR_PARAM_TYPE = Union[float, List[float]] #%% Copulas copulas = {1: 'Gaussian', 2 : 'Gumbel0', 3 :'Gumbel90' , 4 : 'Gumbel180', 5 : 'Gumbel270', 6 : 'Clayton0', 7 : 'Clayton90', 8 : 'Clayton180', 9: 'Clayton270', 10: 'Frank', 11: 'Joe0', 12: 'Joe90', 13: 'Joe180', 14: 'Joe270', 15: 'Student'} #%% fitting -def fit(cop, u): +def fit(cop:int, u: np.ndarray) -> CORR_PARAM_TYPE: """ Fits a specific copula to data. @@ -70,13 +71,14 @@ def fit(cop, u): # Perform Cholesky decomposition R= np.linalg.cholesky(R) - def invcdf(p): + def invcdf(p: float) -> float: if p <= 0.9: q = (1000 / 9) * p else: q = 100 * (1 - 10 * (p - 0.9))**-5 return q + # TODO: type hints for negloglike def negloglike(mu_, u, R): nu_ = invcdf(mu_) t_values = st.t.ppf(u, nu_) @@ -101,7 +103,7 @@ def negloglike(mu_, u, R): #%% best fit -def bestcop(cops, u): +def bestcop(cops: List[int], u: np.ndarray) -> Tuple[int, CORR_PARAM_TYPE, float]: """ Fits the best copula to data based on a selected list of copulas to fit to using the AIC. @@ -140,7 +142,7 @@ def bestcop(cops, u): #%%Copula random -def random(cop, par, n): +def random(cop: int, par: CORR_PARAM_TYPE, n:int) -> np.ndarray: """ Generates random numbers from a chosen copula with specific parameters. @@ -170,7 +172,7 @@ def random(cop, par, n): if cop > 1 and cop < 6: v1 = np.random.uniform(0.00001,0.999999,n) v2 = np.random.uniform(0.00001,0.999999,n) - def equation2(w): + def equation2(w: np.ndarray) -> np.ndarray: return (w * (1-(np.log(w)/alpha))) - v2 w_guess = v2 w = newton(equation2, w_guess,maxiter=2000) @@ -217,7 +219,7 @@ def equation2(w): if cop > 10 and cop < 15: v1 = np.random.uniform(0.00001,0.999999,n) v2 = np.random.uniform(0.00001,0.999999,n) - def equation2(w): + def equation2(w:np.ndarray) -> np.ndarray: return w - ((1/alpha) * ((np.log((1-(1-w)**alpha))*(1-(1-w)**alpha))/((1-w)**(alpha-1)))) - v2 w_guess = v2 w = newton(equation2, w_guess,maxiter=2000) @@ -255,7 +257,7 @@ def equation2(w): #%% Copula conditional random -def randomconditional(cop, ui, par, n, un = 1): +def randomconditional(cop:int, ui:np.ndarray, par: CORR_PARAM_TYPE, n:int, un: Literal[1,2] = 1) -> np.ndarray: """ Generates conditional random numbers from a chosen copula with specific parameters. @@ -404,7 +406,7 @@ def randomconditional(cop, ui, par, n, un = 1): return uii #%% CDF -def CDF(cop, u, par): +def CDF(cop:int, u: np.ndarray, par: CORR_PARAM_TYPE) -> np.ndarray: """ Computes the cumulative distribution function. @@ -521,8 +523,8 @@ def CDF(cop, u, par): return p #%% PDF - -def PDF(cop, u, par): + +def PDF(cop:int, u:np.ndarray, par:CORR_PARAM_TYPE) -> np.ndarray: """ Computes the probability density function. @@ -623,7 +625,7 @@ def PDF(cop, u, par): #%% h function -def hfunc(cop, u1, u2, par, un = 1): +def hfunc(cop: int, u1:np.ndarray, u2:np.ndarray, par: CORR_PARAM_TYPE, un: Literal[1,2] = 1) -> np.ndarray: """ Computes the h-function (conditional CDF) of a copula with respect to variable u1 or u2. @@ -773,7 +775,7 @@ def hfunc(cop, u1, u2, par, un = 1): #%% h-inverse func -def hfuncinverse(cop, ui, y, par, un = 1): +def hfuncinverse(cop:int, ui: np.ndarray, y:np.ndarray, par:CORR_PARAM_TYPE, un: Literal[1,2] = 1) -> np.ndarray: """ Computes the inverse h-function (inverse conditional CDF) of a copula with respect to variable u1 or u2. @@ -812,11 +814,11 @@ def hfuncinverse(cop, ui, y, par, un = 1): else: uii_guess = 1- ui if un == 1: - def equation(u2): + def equation(u2: np.ndarray) -> np.ndarray: return hfunc(cop, ui, u2, par, un) - y uii = newton(equation, uii_guess,maxiter=200000, tol=1e-10) if un == 2: - def equation(u1): + def equation(u1: np.ndarray) -> np.ndarray: return hfunc(cop, u1, ui, par, un) - y uii = newton(equation, uii_guess,maxiter=200000, tol=1e-10) @@ -863,12 +865,12 @@ def equation(u1): else: uii_guess = 1- ui if un == 1: - def equation(u2): + def equation(u2: np.ndarray) -> np.ndarray: return hfunc(cop, ui, u2, par, un) - y uii = newton(equation, uii_guess,maxiter=2000, tol=1e-10) if un == 2: - def equation(u1): + def equation(u1: np.ndarray) -> np.ndarray: return hfunc(cop, u1, ui, par, un) - y uii = newton(equation, uii_guess,maxiter=2000, tol=1e-10) @@ -884,7 +886,7 @@ def equation(u1): return uii #%% negative likelyhood -def neg_likelihood(par,cop,u): +def neg_likelihood(cop:int,u: np.ndarray, par: CORR_PARAM_TYPE) -> float: """ Computes the negative likelihood function.