Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 20 additions & 18 deletions src/vinecopulas/bivariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@
from scipy.linalg import cholesky
from itertools import product
import sys

from typing import Tuple, List, Union, Literal
CORR_PARAM_TYPE = Union[float, List[float]]
#%% Copulas

copulas = {1: 'Gaussian', 2 : 'Gumbel0', 3 :'Gumbel90' , 4 : 'Gumbel180', 5 : 'Gumbel270', 6 : 'Clayton0', 7 : 'Clayton90', 8 : 'Clayton180', 9: 'Clayton270', 10: 'Frank', 11: 'Joe0', 12: 'Joe90', 13: 'Joe180', 14: 'Joe270', 15: 'Student'}

#%% fitting

def fit(cop, u):
def fit(cop:int, u: np.ndarray) -> CORR_PARAM_TYPE:
"""
Fits a specific copula to data.

Expand Down Expand Up @@ -70,13 +71,14 @@ def fit(cop, u):
# Perform Cholesky decomposition
R= np.linalg.cholesky(R)

def invcdf(p):
def invcdf(p: float) -> float:
if p <= 0.9:
q = (1000 / 9) * p
else:
q = 100 * (1 - 10 * (p - 0.9))**-5
return q

# TODO: type hints for negloglike
def negloglike(mu_, u, R):
nu_ = invcdf(mu_)
t_values = st.t.ppf(u, nu_)
Expand All @@ -101,7 +103,7 @@ def negloglike(mu_, u, R):
#%% best fit


def bestcop(cops, u):
def bestcop(cops: List[int], u: np.ndarray) -> Tuple[int, CORR_PARAM_TYPE, float]:
"""
Fits the best copula to data based on a selected list of copulas to fit to using the AIC.

Expand Down Expand Up @@ -140,7 +142,7 @@ def bestcop(cops, u):

#%%Copula random

def random(cop, par, n):
def random(cop: int, par: CORR_PARAM_TYPE, n:int) -> np.ndarray:
"""
Generates random numbers from a chosen copula with specific parameters.

Expand Down Expand Up @@ -170,7 +172,7 @@ def random(cop, par, n):
if cop > 1 and cop < 6:
v1 = np.random.uniform(0.00001,0.999999,n)
v2 = np.random.uniform(0.00001,0.999999,n)
def equation2(w):
def equation2(w: np.ndarray) -> np.ndarray:
return (w * (1-(np.log(w)/alpha))) - v2
w_guess = v2
w = newton(equation2, w_guess,maxiter=2000)
Expand Down Expand Up @@ -217,7 +219,7 @@ def equation2(w):
if cop > 10 and cop < 15:
v1 = np.random.uniform(0.00001,0.999999,n)
v2 = np.random.uniform(0.00001,0.999999,n)
def equation2(w):
def equation2(w:np.ndarray) -> np.ndarray:
return w - ((1/alpha) * ((np.log((1-(1-w)**alpha))*(1-(1-w)**alpha))/((1-w)**(alpha-1)))) - v2
w_guess = v2
w = newton(equation2, w_guess,maxiter=2000)
Expand Down Expand Up @@ -255,7 +257,7 @@ def equation2(w):

#%% Copula conditional random

def randomconditional(cop, ui, par, n, un = 1):
def randomconditional(cop:int, ui:np.ndarray, par: CORR_PARAM_TYPE, n:int, un: Literal[1,2] = 1) -> np.ndarray:

"""
Generates conditional random numbers from a chosen copula with specific parameters.
Expand Down Expand Up @@ -404,7 +406,7 @@ def randomconditional(cop, ui, par, n, un = 1):
return uii

#%% CDF
def CDF(cop, u, par):
def CDF(cop:int, u: np.ndarray, par: CORR_PARAM_TYPE) -> np.ndarray:

"""
Computes the cumulative distribution function.
Expand Down Expand Up @@ -521,8 +523,8 @@ def CDF(cop, u, par):
return p

#%% PDF
def PDF(cop, u, par):

def PDF(cop:int, u:np.ndarray, par:CORR_PARAM_TYPE) -> np.ndarray:

"""
Computes the probability density function.
Expand Down Expand Up @@ -623,7 +625,7 @@ def PDF(cop, u, par):

#%% h function

def hfunc(cop, u1, u2, par, un = 1):
def hfunc(cop: int, u1:np.ndarray, u2:np.ndarray, par: CORR_PARAM_TYPE, un: Literal[1,2] = 1) -> np.ndarray:
"""
Computes the h-function (conditional CDF) of a copula with respect to variable u1 or u2.

Expand Down Expand Up @@ -773,7 +775,7 @@ def hfunc(cop, u1, u2, par, un = 1):

#%% h-inverse func

def hfuncinverse(cop, ui, y, par, un = 1):
def hfuncinverse(cop:int, ui: np.ndarray, y:np.ndarray, par:CORR_PARAM_TYPE, un: Literal[1,2] = 1) -> np.ndarray:
"""
Computes the inverse h-function (inverse conditional CDF) of a copula with respect to variable u1 or u2.

Expand Down Expand Up @@ -812,11 +814,11 @@ def hfuncinverse(cop, ui, y, par, un = 1):
else:
uii_guess = 1- ui
if un == 1:
def equation(u2):
def equation(u2: np.ndarray) -> np.ndarray:
return hfunc(cop, ui, u2, par, un) - y
uii = newton(equation, uii_guess,maxiter=200000, tol=1e-10)
if un == 2:
def equation(u1):
def equation(u1: np.ndarray) -> np.ndarray:
return hfunc(cop, u1, ui, par, un) - y
uii = newton(equation, uii_guess,maxiter=200000, tol=1e-10)

Expand Down Expand Up @@ -863,12 +865,12 @@ def equation(u1):
else:
uii_guess = 1- ui
if un == 1:
def equation(u2):
def equation(u2: np.ndarray) -> np.ndarray:
return hfunc(cop, ui, u2, par, un) - y

uii = newton(equation, uii_guess,maxiter=2000, tol=1e-10)
if un == 2:
def equation(u1):
def equation(u1: np.ndarray) -> np.ndarray:
return hfunc(cop, u1, ui, par, un) - y
uii = newton(equation, uii_guess,maxiter=2000, tol=1e-10)

Expand All @@ -884,7 +886,7 @@ def equation(u1):
return uii

#%% negative likelyhood
def neg_likelihood(par,cop,u):
def neg_likelihood(cop:int,u: np.ndarray, par: CORR_PARAM_TYPE) -> float:
"""
Computes the negative likelihood function.

Expand Down
15 changes: 10 additions & 5 deletions src/vinecopulas/marginals.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@
from itertools import product
import sys

from typing import Tuple, List, Literal
DISCRETE_DIST_TYPE = Tuple[st.rv_discrete, np.ndarray, float]
CONTINUOUS_DIST_TYPE = Tuple[st.rv_continuous, np.ndarray, float]

# %% best fit discrete distribution


def best_fit_distributiondiscrete(data, bound=False, criterion="BIC"):
def best_fit_distributiondiscrete(data: np.ndarray, bound: bool=False, criterion: Literal["AIC", "BIC", "ML"]="BIC") -> DISCRETE_DIST_TYPE:
"""
Fits the best discrete distribution to data.

Expand All @@ -34,7 +38,7 @@ def best_fit_distributiondiscrete(data, bound=False, criterion="BIC"):
*criterion* : Either BIC, AIC or ML is used to choose the best distribution

Returns:
*bestdist* : the best distribution and its parameters.
*bestdist* : the best distribution and its parameters, as a tuple of (distribution, parameters, criterion_value).
"""

# distributions
Expand Down Expand Up @@ -138,8 +142,9 @@ def best_fit_distributiondiscrete(data, bound=False, criterion="BIC"):
return bestdist



# %% best fit distribution
def best_fit_distribution(data, criterion="BIC", dists = []):
def best_fit_distribution(data: np.ndarray, criterion: Literal["BIC", "AIC", "ML"]="BIC", dists: List["str"] = [])-> CONTINUOUS_DIST_TYPE:
"""
Fits the best continuous distribution to data.

Expand Down Expand Up @@ -235,7 +240,7 @@ def best_fit_distribution(data, criterion="BIC", dists = []):


# %% pseudodata
def pseudodata(data):
def pseudodata(data: np.ndarray) -> np.ndarray:
"""
Compute the pseudo-observations for the given data (transforms data to standard uniform margins)

Expand All @@ -255,7 +260,7 @@ def pseudodata(data):


# %%
def pseudodiscr(xcdf, xpmf):
def pseudodiscr(xcdf:np.ndarray, xpmf:np.ndarray) -> np.ndarray:
"""
Compute the pseudo-observations for the given variable that is discrete.

Expand Down
63 changes: 36 additions & 27 deletions src/vinecopulas/vinecopula.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import networkx as nx
import matplotlib.pyplot as plt
from vinecopulas.bivariate import *

from typing import List, Tuple, Literal, Union, Optional
# %% Copulas

copulas = {
Expand All @@ -40,7 +40,7 @@
# %% fitting vinecopula


def fit_vinecop(u1, copsi, vine="R", printing=True):
def fit_vinecop(u1: np.ndarray, copsi: List[int], vine: Literal["R", "C", "D", nx.Graph]="R", printing:bool=True) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Fit a regular vine copula to data.

Expand Down Expand Up @@ -95,12 +95,14 @@ def fit_vinecop(u1, copsi, vine="R", printing=True):
) # create a new dataframe where the row with the highest ktau is the first row
else:
if (
order1.v1[i] in list(order1.v2[:i])
or order1.v1[i] in list(order1.v1[:i])
order1.v1[i] in list(order2.v2[:i])
or order1.v1[i] in list(order2.v1[:i])
) and (
order1.v2[i] in list(order1.v2[:i])
or order1.v2[i] in list(order1.v1[:i])
): # see if both v1i and v2i are already in order2, first rows with unique variables need to be added to ensure all variables are included
order1.v2[i] in list(order2.v2[:i])
or order1.v2[i] in list(order2.v1[:i])
): # see if both v1i and v2i are already in order2,
# first rows with unique variables need to be
# added to ensure all variables are included
continue
else:
inde.append(i) # add used rows to inde
Expand Down Expand Up @@ -254,6 +256,19 @@ def fit_vinecop(u1, copsi, vine="R", printing=True):
order2 = order1[(order1.v1 == i) | (order1.v2 == i)].reset_index(
drop=True
) # select the rows that include this variable
else:
# vine is a graph.
# TODO: confirm that it is a tree and that every column is included in the tree.
temp_order = []
for edge_src, edge_dest in vine.edges:
if edge_src > edge_dest:
# Flip them
edge_src, edge_dest = edge_dest, edge_src
ta_temp = order1.loc[(order1.v1 == edge_src) & (order1.v2 == edge_dest), "tauabs"]
assert len(ta_temp) == 1, "The edge is not in the order1"
temp_order.append({"v1": edge_src, "v2": edge_dest, "tauabs": ta_temp.values[0]})
order2 = pd.DataFrame(temp_order)
vine = "R"

order1 = order2 # make order1 == the first tree
del order2
Expand Down Expand Up @@ -1082,7 +1097,7 @@ def fit_vinecop(u1, copsi, vine="R", printing=True):

return a, p, c

def density_vinecop(u, M, P, C):
def density_vinecop(u: np.ndarray, M: np.ndarray, P:np.ndarray, C:np.ndarray) -> np.ndarray:
"""
Computes the density function of a vine copula.

Expand Down Expand Up @@ -1172,7 +1187,7 @@ def density_vinecop(u, M, P, C):


# %% fitting vine copula with sspecific structure
def fit_vinecopstructure(u1, copsi, a):
def fit_vinecopstructure(u1: np.ndarray, copsi: List[int], a: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
Fit a regular vine copula to data based on a known vine structure matrix.

Expand Down Expand Up @@ -1391,7 +1406,7 @@ def fit_vinecopstructure(u1, copsi, a):


# %% Sampling vine copula
def sample_vinecop(a, p, c, s):
def sample_vinecop(a:np.ndarray, p:np.ndarray, c:np.ndarray, s:np.ndarray) -> np.ndarray:
"""
Generate random samples from an R-vine.

Expand Down Expand Up @@ -1485,7 +1500,7 @@ def sample_vinecop(a, p, c, s):


# %% Sampling conditonal vine copula
def sample_vinecopconditional(a, p, c, s, Xc):
def sample_vinecopconditional(a:np.ndarray, p:np.ndarray, c:np.ndarray, s:int, Xc:np.ndarray) -> np.ndarray:
"""
Generate conditional samples from an R-vine based on a provided sampling order

Expand Down Expand Up @@ -1590,19 +1605,13 @@ def sample_vinecopconditional(a, p, c, s, Xc):
# %%sampling orders


def samplingorder(a):
def samplingorder(a: np.ndarray) -> List[List[int]]:
"""
Provides all the different sampling orders that are possible for the fitted vine-copula.

Arguments:
*a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.

*p* : Parameters of the bivariate copulae provided as a triangular matrix.

*c* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula (see...refer to where this information would be)



Returns:
*sortingorder* : A list of the different sampling orders available for the fitted vine-copula

Expand Down Expand Up @@ -1669,7 +1678,7 @@ def samplingorder(a):
# %%matrices of specific sampling order


def samplingmatrix(a, c, p, sorder):
def samplingmatrix(a:np.ndarray, c:np.ndarray, p:np.ndarray, sorder: List[int]) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Provides the triangular matrices for which the samples can be generated based on the specific sampling order.

Expand Down Expand Up @@ -1762,14 +1771,14 @@ def samplingmatrix(a, c, p, sorder):
# %%


def fit_conditionalvine(u1, vint, copsi, vine="R", condition=1, printing=True):
def fit_conditionalvine(u1: np.ndarray, vint: Union[int, List[int]], copsi: List[int], vine: Literal["R", "D", "C", nx.Graph]="R", condition: Literal[1,2]=1, printing:bool=True):
"""
Fit a regular vine copula which allows for a conditional sample of a variable of interest.

Arguments:
*u1* : the data, provided as a numpy array where each column contains a seperate variable (eg. u1,u2,...,un), which have already been transferred to standard uniform margins (0<= u <= 1)

*vint* : the variables of interest, provided as an integere or list that refers to the variables column numbers in u1, where the first column is 0 and the second column is 1, etc.
*vint* : the variables of interest, provided as an integer or list that refers to the variables column numbers in u1, where the first column is 0 and the second column is 1, etc.

*copsi* : A list of integers referring to the copulae of interest for which the fit has to be evauluated in the vine copula. eg. a list of [1, 10] refers to the Gaussian and Frank copula (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).

Expand All @@ -1786,6 +1795,9 @@ def fit_conditionalvine(u1, vint, copsi, vine="R", condition=1, printing=True):
*c* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).

"""
if isinstance(vine, nx.Graph):
raise NotImplementedError("Only vine=R, D, or C are currently supported")
# TODO: Add support for vine as a graph

# Reference: Dißmann et al. 2013 adapted
dimen = u1.shape[1] # number of variables (number of columns)
Expand Down Expand Up @@ -3464,22 +3476,19 @@ def fit_conditionalvine(u1, vint, copsi, vine="R", condition=1, printing=True):
# %%


def plotvine(a, plottitle=None, variables=None, savepath=None):
def plotvine(a: np.ndarray, plottitle: Optional[str]=None, variables: Optional[List[str]]=None, savepath: Optional[str]=None):
"""
Plots the vine structure

Arguments:
*a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.

*variables* : list of variables

*pltotitle* : title of the plot

*savepath* : path to save the plot

Returns:
*plot*



"""
dimen = a.shape[0] # dimension of copula
order = pd.DataFrame(
Expand Down