VU-IVM · meh2135 · Nov 4, 2024 · Nov 4, 2024 · Nov 4, 2024
diff --git a/src/vinecopulas/bivariate.py b/src/vinecopulas/bivariate.py
@@ -17,14 +17,15 @@
 from scipy.linalg import cholesky
 from itertools import product
 import sys
-
+from typing import Tuple, List, Union, Literal
+CORR_PARAM_TYPE = Union[float, List[float]]
 #%% Copulas
 
 copulas = {1: 'Gaussian', 2 : 'Gumbel0', 3 :'Gumbel90' , 4 : 'Gumbel180', 5 : 'Gumbel270', 6 : 'Clayton0', 7 : 'Clayton90', 8 : 'Clayton180', 9: 'Clayton270', 10: 'Frank', 11: 'Joe0', 12: 'Joe90', 13: 'Joe180', 14: 'Joe270', 15: 'Student'} 
 
 #%% fitting
 
-def fit(cop, u):
+def fit(cop:int, u: np.ndarray) -> CORR_PARAM_TYPE:
     """
     Fits a specific copula to data.
 
@@ -70,13 +71,14 @@ def fit(cop, u):
         # Perform Cholesky decomposition
         R= np.linalg.cholesky(R)
 
-        def invcdf(p):
+        def invcdf(p: float) -> float:
             if p <= 0.9:
                 q = (1000 / 9) * p
             else:
                 q = 100 * (1 - 10 * (p - 0.9))**-5
             return q
 
+        # TODO: type hints for negloglike
         def negloglike(mu_, u, R):
             nu_ = invcdf(mu_)
             t_values = st.t.ppf(u, nu_)
@@ -101,7 +103,7 @@ def negloglike(mu_, u, R):
 #%% best fit
 
 
-def bestcop(cops, u):
+def bestcop(cops: List[int], u: np.ndarray) -> Tuple[int, CORR_PARAM_TYPE, float]:
     """
     Fits the best copula to data based on a selected list of copulas to fit to using the AIC.
 
@@ -140,7 +142,7 @@ def bestcop(cops, u):
 
 #%%Copula random
 
-def random(cop, par, n): 
+def random(cop: int, par: CORR_PARAM_TYPE, n:int) -> np.ndarray: 
     """
     Generates random numbers from a chosen copula with specific parameters.
 
@@ -170,7 +172,7 @@ def random(cop, par, n):
     if cop > 1 and cop < 6:
         v1 = np.random.uniform(0.00001,0.999999,n)
         v2 = np.random.uniform(0.00001,0.999999,n)
-        def equation2(w):
+        def equation2(w: np.ndarray) -> np.ndarray:
             return (w * (1-(np.log(w)/alpha))) - v2
         w_guess = v2
         w = newton(equation2, w_guess,maxiter=2000)
@@ -217,7 +219,7 @@ def equation2(w):
     if  cop > 10 and cop < 15:
         v1 = np.random.uniform(0.00001,0.999999,n)
         v2 = np.random.uniform(0.00001,0.999999,n)
-        def equation2(w):
+        def equation2(w:np.ndarray) -> np.ndarray:
             return w - ((1/alpha) * ((np.log((1-(1-w)**alpha))*(1-(1-w)**alpha))/((1-w)**(alpha-1)))) - v2
         w_guess = v2
         w = newton(equation2, w_guess,maxiter=2000)
@@ -255,7 +257,7 @@ def equation2(w):
 
 #%% Copula conditional random
 
-def randomconditional(cop, ui, par, n, un = 1):
+def randomconditional(cop:int, ui:np.ndarray, par: CORR_PARAM_TYPE, n:int, un: Literal[1,2] = 1) -> np.ndarray:
 
     """
     Generates conditional random numbers from a chosen copula with specific parameters.
@@ -404,7 +406,7 @@ def randomconditional(cop, ui, par, n, un = 1):
     return uii
 
 #%% CDF
-def CDF(cop, u, par):
+def CDF(cop:int, u: np.ndarray, par: CORR_PARAM_TYPE) -> np.ndarray:
 
     """
     Computes the cumulative distribution function.
@@ -521,8 +523,8 @@ def CDF(cop, u, par):
     return p
 
 #%% PDF
-    
-def PDF(cop, u, par):
+
+def PDF(cop:int, u:np.ndarray, par:CORR_PARAM_TYPE) -> np.ndarray:
 
     """
     Computes the probability density function.
@@ -623,7 +625,7 @@ def PDF(cop, u, par):
 
 #%% h function
 
-def hfunc(cop, u1, u2, par, un = 1):
+def hfunc(cop: int, u1:np.ndarray, u2:np.ndarray, par: CORR_PARAM_TYPE, un: Literal[1,2] = 1) -> np.ndarray:
     """
     Computes the h-function (conditional CDF) of a copula with respect to variable u1 or u2.
 
@@ -773,7 +775,7 @@ def hfunc(cop, u1, u2, par, un = 1):
 
 #%% h-inverse func
 
-def hfuncinverse(cop, ui, y, par, un = 1):
+def hfuncinverse(cop:int, ui: np.ndarray, y:np.ndarray, par:CORR_PARAM_TYPE, un: Literal[1,2] = 1) -> np.ndarray:
     """
     Computes the inverse h-function (inverse conditional CDF) of a copula with respect to variable u1 or u2.
 
@@ -812,11 +814,11 @@ def hfuncinverse(cop, ui, y, par, un = 1):
         else:
             uii_guess = 1- ui
         if un == 1:
-            def equation(u2):
+            def equation(u2: np.ndarray) -> np.ndarray:
                 return hfunc(cop, ui, u2, par, un) - y
             uii = newton(equation, uii_guess,maxiter=200000, tol=1e-10)
         if un == 2:
-            def equation(u1):
+            def equation(u1: np.ndarray) -> np.ndarray:
                 return hfunc(cop, u1, ui, par, un) - y
             uii = newton(equation, uii_guess,maxiter=200000, tol=1e-10)
 
@@ -863,12 +865,12 @@ def equation(u1):
         else:
             uii_guess = 1- ui
         if un == 1:
-            def equation(u2):
+            def equation(u2: np.ndarray) -> np.ndarray:
                 return hfunc(cop, ui, u2, par, un) - y 
 
             uii = newton(equation, uii_guess,maxiter=2000, tol=1e-10)
         if un == 2:
-            def equation(u1):
+            def equation(u1: np.ndarray) -> np.ndarray:
                 return hfunc(cop, u1, ui, par, un) - y
             uii = newton(equation, uii_guess,maxiter=2000, tol=1e-10)
 
@@ -884,7 +886,7 @@ def equation(u1):
     return uii
 
 #%% negative likelyhood
-def neg_likelihood(par,cop,u):
+def neg_likelihood(cop:int,u: np.ndarray, par: CORR_PARAM_TYPE) -> float:
     """
     Computes the negative likelihood function.
 

diff --git a/src/vinecopulas/marginals.py b/src/vinecopulas/marginals.py
@@ -19,10 +19,14 @@
 from itertools import product
 import sys
 
+from typing import Tuple, List, Literal
+DISCRETE_DIST_TYPE =  Tuple[st.rv_discrete, np.ndarray, float]
+CONTINUOUS_DIST_TYPE = Tuple[st.rv_continuous, np.ndarray, float]
+
 # %% best fit discrete distribution
 
 
-def best_fit_distributiondiscrete(data, bound=False, criterion="BIC"):
+def best_fit_distributiondiscrete(data: np.ndarray, bound: bool=False, criterion: Literal["AIC", "BIC", "ML"]="BIC") -> DISCRETE_DIST_TYPE:
     """
     Fits the best discrete distribution to data.
 
@@ -34,7 +38,7 @@ def best_fit_distributiondiscrete(data, bound=False, criterion="BIC"):
         *criterion* : Either BIC, AIC or ML is used to choose the best distribution
 
     Returns:
-     *bestdist* : the best distribution and its parameters.
+     *bestdist* : the best distribution and its parameters, as a tuple of (distribution, parameters, criterion_value).
     """
 
     # distributions
@@ -138,8 +142,9 @@ def best_fit_distributiondiscrete(data, bound=False, criterion="BIC"):
     return bestdist
 
 
+
 # %% best fit distribution
-def best_fit_distribution(data, criterion="BIC", dists = []):
+def best_fit_distribution(data: np.ndarray, criterion: Literal["BIC", "AIC", "ML"]="BIC", dists: List["str"] = [])-> CONTINUOUS_DIST_TYPE:
     """
     Fits the best continuous distribution to data.
 
@@ -235,7 +240,7 @@ def best_fit_distribution(data, criterion="BIC", dists = []):
 
 
 # %% pseudodata
-def pseudodata(data):
+def pseudodata(data: np.ndarray) -> np.ndarray:
     """
     Compute the pseudo-observations for the given data (transforms data to standard uniform margins)
 
@@ -255,7 +260,7 @@ def pseudodata(data):
 
 
 # %%
-def pseudodiscr(xcdf, xpmf):
+def pseudodiscr(xcdf:np.ndarray, xpmf:np.ndarray) -> np.ndarray:
     """
     Compute the pseudo-observations for the given variable that is discrete.
 

diff --git a/src/vinecopulas/vinecopula.py b/src/vinecopulas/vinecopula.py
@@ -15,7 +15,7 @@
 import networkx as nx
 import matplotlib.pyplot as plt
 from vinecopulas.bivariate import *
-
+from typing import List, Tuple, Literal, Union, Optional
 # %% Copulas
 
 copulas = {
@@ -40,7 +40,7 @@
 # %% fitting vinecopula
 
 
-def fit_vinecop(u1, copsi, vine="R", printing=True):
+def fit_vinecop(u1: np.ndarray, copsi: List[int], vine: Literal["R", "C", "D", nx.Graph]="R", printing:bool=True) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
     """
     Fit a regular vine copula to data.
 
@@ -95,12 +95,14 @@ def fit_vinecop(u1, copsi, vine="R", printing=True):
                 )  # create a new dataframe where the row with the highest ktau is the first row
             else:
                 if (
-                    order1.v1[i] in list(order1.v2[:i])
-                    or order1.v1[i] in list(order1.v1[:i])
+                    order1.v1[i] in list(order2.v2[:i])
+                    or order1.v1[i] in list(order2.v1[:i])
                 ) and (
-                    order1.v2[i] in list(order1.v2[:i])
-                    or order1.v2[i] in list(order1.v1[:i])
-                ):  # see if both v1i and v2i are already in order2, first rows with unique variables need to be added to ensure all variables are included
+                    order1.v2[i] in list(order2.v2[:i])
+                    or order1.v2[i] in list(order2.v1[:i])
+                ):  # see if both v1i and v2i are already in order2, 
+                    # first rows with unique variables need to be 
+                    # added to ensure all variables are included
                     continue
                 else:
                     inde.append(i)  # add used rows to inde
@@ -254,6 +256,19 @@ def fit_vinecop(u1, copsi, vine="R", printing=True):
         order2 = order1[(order1.v1 == i) | (order1.v2 == i)].reset_index(
             drop=True
         )  # select the rows that include this variable
+    else:
+        # vine is a graph.
+        # TODO: confirm that it is a tree and that every column is included in the tree.
+        temp_order = []
+        for edge_src, edge_dest in vine.edges:
+            if edge_src > edge_dest:
+                # Flip them
+                edge_src, edge_dest = edge_dest, edge_src
+            ta_temp = order1.loc[(order1.v1 == edge_src) & (order1.v2 == edge_dest), "tauabs"]
+            assert len(ta_temp) == 1, "The edge is not in the order1"
+            temp_order.append({"v1": edge_src, "v2": edge_dest, "tauabs": ta_temp.values[0]})
+        order2 = pd.DataFrame(temp_order)
+        vine = "R"
 
     order1 = order2  # make order1 == the first tree
     del order2
@@ -1082,7 +1097,7 @@ def fit_vinecop(u1, copsi, vine="R", printing=True):
 
     return a, p, c
 
-def density_vinecop(u, M, P, C):
+def density_vinecop(u: np.ndarray, M: np.ndarray, P:np.ndarray, C:np.ndarray) -> np.ndarray:
     """
     Computes the density function of a vine copula.
 
@@ -1172,7 +1187,7 @@ def density_vinecop(u, M, P, C):
 
 
 # %% fitting vine copula with sspecific structure
-def fit_vinecopstructure(u1, copsi, a):
+def fit_vinecopstructure(u1: np.ndarray, copsi: List[int], a: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
     """
     Fit a regular vine copula to data based on a known vine structure matrix.
 
@@ -1391,7 +1406,7 @@ def fit_vinecopstructure(u1, copsi, a):
 
 
 # %% Sampling vine copula
-def sample_vinecop(a, p, c, s):
+def sample_vinecop(a:np.ndarray, p:np.ndarray, c:np.ndarray, s:np.ndarray) -> np.ndarray:
     """
     Generate random samples from an R-vine.
 
@@ -1485,7 +1500,7 @@ def sample_vinecop(a, p, c, s):
 
 
 # %% Sampling conditonal vine copula
-def sample_vinecopconditional(a, p, c, s, Xc):
+def sample_vinecopconditional(a:np.ndarray, p:np.ndarray, c:np.ndarray, s:int, Xc:np.ndarray) -> np.ndarray:
     """
     Generate conditional samples from an R-vine based on a provided sampling order
 
@@ -1590,19 +1605,13 @@ def sample_vinecopconditional(a, p, c, s, Xc):
 # %%sampling orders
 
 
-def samplingorder(a):
+def samplingorder(a: np.ndarray) -> List[List[int]]:
     """
     Provides all the different sampling orders that are possible for the fitted vine-copula.
 
     Arguments:
         *a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.
 
-        *p* : Parameters of the bivariate copulae provided as a triangular matrix.
-
-        *c* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula (see...refer to where this information would be)
-
-
-
     Returns:
      *sortingorder* :  A list of the different sampling orders available for the fitted vine-copula
 
@@ -1669,7 +1678,7 @@ def samplingorder(a):
 # %%matrices of specific sampling order
 
 
-def samplingmatrix(a, c, p, sorder):
+def samplingmatrix(a:np.ndarray, c:np.ndarray, p:np.ndarray, sorder: List[int]) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
     """
     Provides the triangular matrices for which the samples can be generated based on the specific sampling order.
 
@@ -1762,14 +1771,14 @@ def samplingmatrix(a, c, p, sorder):
 # %%
 
 
-def fit_conditionalvine(u1, vint, copsi, vine="R", condition=1, printing=True):
+def fit_conditionalvine(u1: np.ndarray, vint: Union[int, List[int]], copsi: List[int], vine: Literal["R", "D", "C", nx.Graph]="R", condition: Literal[1,2]=1, printing:bool=True):
     """
     Fit a regular vine copula which allows for a conditional sample of a variable of interest.
 
     Arguments:
         *u1* :  the data, provided as a numpy array where each column contains a seperate variable (eg. u1,u2,...,un), which have already been transferred to standard uniform margins (0<= u <= 1)
 
-        *vint* : the variables of interest, provided as an integere or list that refers to the variables column numbers in u1, where the first column is 0 and the second column is 1, etc.
+        *vint* : the variables of interest, provided as an integer or list that refers to the variables column numbers in u1, where the first column is 0 and the second column is 1, etc.
 
         *copsi* : A list of integers referring to the copulae of interest for which the fit has to be evauluated in the vine copula. eg. a list of [1, 10] refers to the Gaussian and Frank copula  (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).
 
@@ -1786,6 +1795,9 @@ def fit_conditionalvine(u1, vint, copsi, vine="R", condition=1, printing=True):
       *c* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula  (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).
 
     """
+    if isinstance(vine, nx.Graph):
+        raise NotImplementedError("Only vine=R, D, or C are currently supported")
+    # TODO: Add support for vine as a graph
 
     # Reference: Dißmann et al. 2013 adapted
     dimen = u1.shape[1]  # number of variables (number of columns)
@@ -3464,22 +3476,19 @@ def fit_conditionalvine(u1, vint, copsi, vine="R", condition=1, printing=True):
 # %%
 
 
-def plotvine(a, plottitle=None, variables=None, savepath=None):
+def plotvine(a: np.ndarray, plottitle: Optional[str]=None, variables: Optional[List[str]]=None, savepath: Optional[str]=None):
     """
     Plots the vine structure
 
     Arguments:
         *a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.
 
+        *variables* : list of variables
+
         *pltotitle* : title of the plot
 
         *savepath* : path to save the plot
 
-    Returns:
-     *plot*
-
-
-
     """
     dimen = a.shape[0]  # dimension of copula
     order = pd.DataFrame(