diff --git a/README.rst b/README.rst index 65e969a..1f51349 100644 --- a/README.rst +++ b/README.rst @@ -3,27 +3,28 @@ mixedvines Package for Python ============================= Package for canonical vine copula trees with mixed continuous and discrete -marginals. If you use this software for publication, please cite [ONKEN2016]_. +marginals. If you use this software for publication, please cite +[ONKEN2016]_. Description ----------- This package contains a complete framework based on canonical vine copulas for -modelling multivariate data that are partly discrete and partly continuous. The -resulting multivariate distributions are flexible with rich dependence +modelling multivariate data that are partly discrete and partly continuous. +The resulting multivariate distributions are flexible with rich dependence structures and marginals. For continuous marginals, implementations of the normal and the gamma distributions are provided. For discrete marginals, Poisson, binomial and negative binomial distributions are provided. As bivariate copula building -blocks, the Gaussian, Frank and Clayton families as well as rotation transformed -families are provided. Additional marginal and pair copula distributions can be -added easily. +blocks, the Gaussian, Frank and Clayton families as well as rotation +transformed families are provided. Additional marginal and pair copula +distributions can be added easily. -The package includes methods for sampling, likelihood calculation and inference, -all of which have quadratic complexity. These procedures are combined to -estimate entropy by means of Monte Carlo integration. +The package includes methods for sampling, likelihood calculation and +inference, all of which have quadratic complexity. These procedures are +combined to estimate entropy by means of Monte Carlo integration. Please see [ONKEN2016]_ for a more detailed description of the framework. @@ -55,11 +56,11 @@ To install the mixedvines package, run:: Usage ----- -Suppose that data are given in a NumPy array ``samples`` with shape ``(n, d)``, -where ``n`` is the number of samples and ``d`` is the number of elements per -sample. First, specify which of the elements are continuous. If, for instance, -the distribution has three elements and the first and last element are -continuous whereas the second element is discrete: +Suppose that data are given in a NumPy array ``samples`` with shape +``(n, d)``, where ``n`` is the number of samples and ``d`` is the number of +elements per sample. First, specify which of the elements are continuous. +If, for instance, the distribution has three elements and the first and last +element are continuous whereas the second element is discrete: .. code-block:: python @@ -72,8 +73,9 @@ To fit a mixed vine to the samples: from mixedvines.mixedvine import MixedVine vine = MixedVine.fit(samples, is_continuous) -``vine`` is now a ``MixedVine`` object. To draw samples from the distribution, -calculate their density and estimate the distribution entropy in units of bits: +``vine`` is now a ``MixedVine`` object. To draw samples from the +distribution, calculate their density and estimate the distribution entropy in +units of bits: .. code-block:: python diff --git a/docs/conf.py b/docs/conf.py index e8e8954..ee8d9df 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -77,7 +77,7 @@ def __getattr__(cls, name): # The short X.Y version. version = u'1.3' # The full version, including alpha/beta/rc tags. -release = u'1.3.0' +release = u'1.3.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -151,7 +151,7 @@ def __getattr__(cls, name): # The name for this set of Sphinx documents. # " v documentation" by default. # -# html_title = u'mixedvines v1.3.0' +# html_title = u'mixedvines v1.3.1' # A shorter title for the navigation bar. Default is the same as html_title. # diff --git a/docs/intro.rst b/docs/intro.rst index 05be72f..4372d55 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -3,17 +3,17 @@ Introduction This packge contains a complete framework based on canonical vine copulas for -modelling multivariate data that are partly discrete and partly continuous. The -resulting multivariate distributions are flexible with rich dependence +modelling multivariate data that are partly discrete and partly continuous. +The resulting multivariate distributions are flexible with rich dependence structures and marginals. For continuous marginals, implementations of the normal and the gamma distributions are provided. For discrete marginals, Poisson, binomial and negative binomial distributions are provided. As bivariate copula building -blocks, the Gaussian, Frank and Clayton families as well as rotation transformed -families are provided. Additional marginal and pair copula distributions can be -added easily. +blocks, the Gaussian, Frank and Clayton families as well as rotation +transformed families are provided. Additional marginal and pair copula +distributions can be added easily. -The package includes methods for sampling, likelihood calculation and inference, -all of which have quadratic complexity. These procedures are combined to -estimate entropy by means of Monte Carlo integration. +The package includes methods for sampling, likelihood calculation and +inference, all of which have quadratic complexity. These procedures are +combined to estimate entropy by means of Monte Carlo integration. diff --git a/docs/usage.rst b/docs/usage.rst index 534d8b5..106e48d 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -2,11 +2,11 @@ Usage ===== -Suppose that data are given in a NumPy array ``samples`` with shape ``(n, d)``, -where ``n`` is the number of samples and ``d`` is the number of elements per -sample. First, specify which of the elements are continuous. If, for instance, -the distribution has three elements and the first and last element are -continuous whereas the second element is discrete: +Suppose that data are given in a NumPy array ``samples`` with shape +``(n, d)``, where ``n`` is the number of samples and ``d`` is the number of +elements per sample. First, specify which of the elements are continuous. +If, for instance, the distribution has three elements and the first and last +element are continuous whereas the second element is discrete: .. code-block:: python @@ -21,19 +21,19 @@ To fit a mixed vine to the samples: ``vine`` is now a ``MixedVine`` object. Note that for the canonical vine, the order of elements is important. Elements should be sorted according to the -importance of their dependencies to other elements where elements with important -dependencies to many other elements should come first. A heuristic way to -select the order of elements is to calculate Kendall's tau between all element -pairs, to obtain a score for each element by summing the taus of the pairs the -element occurs in and to sort elements in descending order according to their -scores. This is what the ``MixedVine.fit`` method does internally by default to -construct an improved canonical vine tree. This internal sorting is used to -construct the vine tree only, so the order of elements is not changed in a user -visible way. To prevent this internal sorting, set the ``keep_order`` argument -to ``True``. - -To draw samples from the distribution, calculate their density and estimate the -distribution entropy in units of bits: +importance of their dependencies to other elements where elements with +important dependencies to many other elements should come first. A heuristic +way to select the order of elements is to calculate Kendall's tau between all +element pairs, to obtain a score for each element by summing the taus of the +pairs the element occurs in and to sort elements in descending order according +to their scores. This is what the ``MixedVine.fit`` method does internally by +default to construct an improved canonical vine tree. This internal sorting +is used to construct the vine tree only, so the order of elements is not +changed in a user visible way. To prevent this internal sorting, set the +``keep_order`` argument to ``True``. + +To draw samples from the distribution, calculate their density and estimate +the distribution entropy in units of bits: .. code-block:: python diff --git a/mixedvines/__init__.py b/mixedvines/__init__.py index f636d0a..4a40a02 100644 --- a/mixedvines/__init__.py +++ b/mixedvines/__init__.py @@ -15,6 +15,24 @@ # # You should have received a copy of the GNU General Public License along with # this program. If not, see . +""" +mixedvines +========== + +Provides canonical vine copula trees with mixed continuous and discrete +marginals. The main class is `MixedVine` implementing a copula vine model with +mixed marginals. + +Modules +------- +mixedvine + Copula vine model with mixed marginals. +copula + Bivariate copula distributions. +marginal + Univariate marginal distributions. + +""" from . import marginal from . import copula from . import mixedvine diff --git a/mixedvines/copula.py b/mixedvines/copula.py index af6c786..a6a6d4f 100644 --- a/mixedvines/copula.py +++ b/mixedvines/copula.py @@ -15,10 +15,22 @@ # # You should have received a copy of the GNU General Public License along with # this program. If not, see . -''' -This module implements copula distributions. -''' -import sys +"""This module implements bivariate copula distributions. + +Classes +------- +Copula + Abstract class representing a copula. +IndependenceCopula + Independence copula. +GaussianCopula + Copula from the Gaussian family. +ClaytonCopula + Copula from the Clayton family. +FrankCopula + Copula from the Frank family. + +""" import abc from scipy.optimize import minimize from scipy.stats import norm, uniform, multivariate_normal @@ -26,8 +38,7 @@ class Copula(abc.ABC): - ''' - This abstract class represents a copula. + """This abstract class represents a copula. Parameters ---------- @@ -67,7 +78,8 @@ class Copula(abc.ABC): Fit a copula to samples. theta_bounds() Bounds for `theta` parameters. - ''' + """ + rotation_options = ['90°', '180°', '270°'] def __init__(self, theta=None, rotation=None): @@ -78,15 +90,14 @@ def __init__(self, theta=None, rotation=None): @classmethod def __check_theta(cls, theta): - ''' - Checks the `theta` parameter. + """Checks the `theta` parameter. Parameters ---------- theta : array_like Parameter array of the copula. The number of elements depends on the copula family. - ''' + """ bnds = cls.theta_bounds() if len(bnds) > 0: theta = np.asarray(theta) @@ -106,23 +117,22 @@ def __check_theta(cls, theta): @classmethod def __check_rotation(cls, rotation): - ''' - Checks the `rotation` parameter. + """Checks the `rotation` parameter. Parameters ---------- rotation : string Rotation of the copula. Can be one of the elements of `Copula.rotation_options` or `None`. - ''' + """ if rotation is not None and rotation not in cls.rotation_options: raise ValueError("rotation '" + rotation + "' not supported") @staticmethod def __crop_input(samples): - ''' - Crops the input to the unit hypercube. The input is changed and a - reference to the input is returned. + """Crops the input to the unit hypercube. + + The input is changed and a reference to the input is returned. Parameters ---------- @@ -133,15 +143,15 @@ def __crop_input(samples): ------- samples : array_like n-by-2 matrix of cropped samples where n is the number of samples. - ''' + """ samples[samples < 0] = 0 samples[samples > 1] = 1 return samples def __rotate_input(self, samples): - ''' - Preprocesses the input to account for the copula rotation. The input - is changed and a reference to the input is returned. + """Preprocesses the input to account for the copula rotation. + + The input is changed and a reference to the input is returned. Parameters ---------- @@ -152,7 +162,7 @@ def __rotate_input(self, samples): ------- samples : array_like n-by-2 matrix of rotated samples where n is the number of samples. - ''' + """ if self.rotation == '90°': samples[:, 1] = 1 - samples[:, 1] elif self.rotation == '180°': @@ -163,9 +173,9 @@ def __rotate_input(self, samples): @abc.abstractmethod def _logpdf(self, samples): - ''' - Calculates the log of the probability density function. The samples - can be assumed to lie within the unit hypercube. + """Calculates the log of the probability density function. + + The samples can be assumed to lie within the unit hypercube. Parameters ---------- @@ -176,12 +186,10 @@ def _logpdf(self, samples): ------- vals : ndarray Log of the probability density function evaluated at `samples`. - ''' - pass + """ def logpdf(self, samples): - ''' - Calculates the log of the probability density function. + """Calculates the log of the probability density function. Parameters ---------- @@ -192,7 +200,7 @@ def logpdf(self, samples): ------- vals : ndarray Log of the probability density function evaluated at `samples`. - ''' + """ samples = np.copy(np.asarray(samples)) samples = self.__rotate_input(samples) inner = np.all(np.bitwise_and(samples > 0.0, samples < 1.0), axis=1) @@ -204,8 +212,7 @@ def logpdf(self, samples): return vals def pdf(self, samples): - ''' - Calculates the probability density function. + """Calculates the probability density function. Parameters ---------- @@ -216,14 +223,14 @@ def pdf(self, samples): ------- vals : ndarray Probability density function evaluated at `samples`. - ''' + """ return np.exp(self.logpdf(samples)) @abc.abstractmethod def _logcdf(self, samples): - ''' - Calculates the log of the cumulative distribution function. The - samples can be assumed to lie within the unit hypercube. + """Calculates the log of the cumulative distribution function. + + The samples can be assumed to lie within the unit hypercube. Parameters ---------- @@ -234,12 +241,10 @@ def _logcdf(self, samples): ------- vals : ndarray Log of the cumulative distribution function evaluated at `samples`. - ''' - pass + """ def logcdf(self, samples): - ''' - Calculates the log of the cumulative distribution function. + """Calculates the log of the cumulative distribution function. Parameters ---------- @@ -250,7 +255,7 @@ def logcdf(self, samples): ------- vals : ndarray Log of the cumulative distribution function evaluated at `samples`. - ''' + """ samples = np.copy(np.asarray(samples)) samples = self.__crop_input(samples) samples = self.__rotate_input(samples) @@ -274,8 +279,7 @@ def logcdf(self, samples): return vals def cdf(self, samples): - ''' - Calculates the cumulative distribution function. + """Calculates the cumulative distribution function. Parameters ---------- @@ -286,13 +290,14 @@ def cdf(self, samples): ------- ndarray Cumulative distribution function evaluated at `samples`. - ''' + """ return np.exp(self.logcdf(samples)) def __axis_wrapper(self, fun, samples, axis): - ''' - Calls function `fun` with `samples` as argument, but eventually changes - rotation and samples such that `axis == 0` corresponds to `axis == 1`. + """Calls function `fun` with `samples` as argument. + + Eventually changes rotation and samples such that `axis == 0` + corresponds to `axis == 1`. Parameters ---------- @@ -308,7 +313,7 @@ def __axis_wrapper(self, fun, samples, axis): vals : array_like Function values evaluated at `samples` but taking `axis` into account. - ''' + """ samples = np.copy(np.asarray(samples)) samples = self.__crop_input(samples) rotation = self.rotation @@ -324,7 +329,7 @@ def __axis_wrapper(self, fun, samples, axis): raise ValueError("axis must be in [0, 1]") samples = self.__rotate_input(samples) vals = fun(samples) - if self.rotation == '180°' or self.rotation == '270°': + if self.rotation in ('180°', '270°'): vals = 1.0 - vals finally: # Recover original rotation @@ -333,10 +338,10 @@ def __axis_wrapper(self, fun, samples, axis): @abc.abstractmethod def _ccdf(self, samples): - ''' - Calculates the conditional cumulative distribution function conditioned - on axis 1. The samples can be assumed to lie within the unit - hypercube. + """Calculates the conditional cumulative distribution function. + + The cumulative distribution function is conditioned on axis 1. The + samples can be assumed to lie within the unit hypercube. Parameters ---------- @@ -348,12 +353,10 @@ def _ccdf(self, samples): vals : ndarray Conditional cumulative distribution function evaluated at `samples`. - ''' - pass + """ def ccdf(self, samples, axis=1): - ''' - Calculates the conditional cumulative distribution function. + """Calculates the conditional cumulative distribution function. Parameters ---------- @@ -368,12 +371,13 @@ def ccdf(self, samples, axis=1): vals : ndarray Conditional cumulative distribution function evaluated at `samples`. - ''' + """ return self.__axis_wrapper(self._ccdf, samples, axis) @abc.abstractmethod def _ppcf(self, samples): - ''' + """Calculates the inverse of the conditional CDF. + Calculates the inverse of the copula conditional cumulative distribution function conditioned on axis 1. The samples can be assumed to lie within the unit hypercube. @@ -388,11 +392,11 @@ def _ppcf(self, samples): vals : ndarray Inverse of the conditional cumulative distribution function evaluated at `samples`. - ''' - pass + """ def ppcf(self, samples, axis=1): - ''' + """Calculates the inverse of the conditional CDF. + Calculates the inverse of the copula conditional cumulative distribution function. @@ -409,12 +413,11 @@ def ppcf(self, samples, axis=1): vals : ndarray Inverse of the conditional cumulative distribution function evaluated at `samples`. - ''' + """ return self.__axis_wrapper(self._ppcf, samples, axis) def rvs(self, size=1, random_state=None): - ''' - Generates random variates from the copula. + """Generates random variates from the copula. Parameters ---------- @@ -430,7 +433,7 @@ def rvs(self, size=1, random_state=None): ------- samples : array_like n-by-2 matrix of samples where n is the number of samples. - ''' + """ samples = np.stack((uniform.rvs(size=size, random_state=random_state), uniform.rvs(size=size, random_state=random_state)), axis=1) @@ -438,21 +441,18 @@ def rvs(self, size=1, random_state=None): return samples def estimate_theta(self, samples): - ''' - Estimates the theta parameters from the given samples. + """Estimates the theta parameters from the given samples. Parameters ---------- samples : array_like n-by-2 matrix of samples where n is the number of samples. - ''' + """ if self.theta is not None: bnds = self.theta_bounds() def cost(theta): - ''' - Calculates the cost of a given `theta` parameter. - ''' + """Calculates the cost of a given `theta` parameter.""" self.theta = np.asarray(theta) vals = self.logpdf(samples) # For optimization, filter out inifinity values @@ -463,8 +463,7 @@ def cost(theta): @classmethod def fit(cls, samples): - ''' - Fits the parameters of the copula to the given samples. + """Fits the parameters of the copula to the given samples. Parameters ---------- @@ -475,7 +474,7 @@ def fit(cls, samples): ------- copula : Copula The copula fitted to `samples`. - ''' + """ # Find best fitting family copulas = [] for family in cls.__subclasses__(): @@ -495,22 +494,18 @@ def fit(cls, samples): @staticmethod @abc.abstractmethod def theta_bounds(): - ''' - Bounds for `theta` parameters. + """Bounds for `theta` parameters. Returns ------- bnds : array_like List of 2-tuples where the first tuple element represents the lower bound and the second element represents the upper bound. - ''' - pass + """ class IndependenceCopula(Copula): - ''' - This class represents the independence copula. - ''' + """This class represents the independence copula.""" def _logpdf(self, samples): vals = np.zeros(samples.shape[0]) @@ -542,9 +537,7 @@ def theta_bounds(): class GaussianCopula(Copula): - ''' - This class represents a copula from the Gaussian family. - ''' + """This class represents a copula from the Gaussian family.""" def _logpdf(self, samples): if self.theta >= 1.0: @@ -601,15 +594,13 @@ def theta_bounds(): class ClaytonCopula(Copula): - ''' - This class represents a copula from the Clayton family. - ''' + """This class represents a copula from the Clayton family.""" def _logpdf(self, samples): if self.theta == 0: vals = np.zeros(samples.shape[0]) else: - vals = np.log(1 + self.theta) + (-1 - self.theta) \ + vals = np.log1p(self.theta) + (-1 - self.theta) \ * (np.log(samples[:, 0]) + np.log(samples[:, 1])) \ + (-1 / self.theta - 2) \ * np.log(samples[:, 0]**(-self.theta) @@ -676,9 +667,7 @@ def theta_bounds(): class FrankCopula(Copula): - ''' - This class represents a copula from the Frank family. - ''' + """This class represents a copula from the Frank family.""" def _logpdf(self, samples): if self.theta == 0: diff --git a/mixedvines/marginal.py b/mixedvines/marginal.py index e57b6bb..1bd9620 100644 --- a/mixedvines/marginal.py +++ b/mixedvines/marginal.py @@ -15,18 +15,20 @@ # # You should have received a copy of the GNU General Public License along with # this program. If not, see . -''' -This module implements univariate marginal distribution that are either -continuous or discrete. -''' +"""This module implements univariate marginal distributions. + +Classes +------- +Marginal + Discrete or continuous marginal distribution. + +""" from scipy.stats import rv_continuous, norm, gamma, poisson, binom, nbinom import numpy as np -class Marginal(object): - ''' - This class represents a marginal distribution which can be continuous or - discrete. +class Marginal: + """Represents a marginal distribution which can be continuous or discrete. Parameters ---------- @@ -57,15 +59,14 @@ class Marginal(object): Generate random variates. fit(samples, is_continuous) Fit a distribution to samples. - ''' + """ def __init__(self, rv_mixed): self.rv_mixed = rv_mixed self.is_continuous = isinstance(rv_mixed.dist, rv_continuous) def logpdf(self, samples): - ''' - Calculates the log of the probability density function. + """Calculates the log of the probability density function. Parameters ---------- @@ -76,14 +77,13 @@ def logpdf(self, samples): ------- ndarray Log of the probability density function evaluated at `samples`. - ''' + """ if self.is_continuous: return self.rv_mixed.logpdf(samples) return self.rv_mixed.logpmf(samples) def pdf(self, samples): - ''' - Calculates the probability density function. + """Calculates the probability density function. Parameters ---------- @@ -94,12 +94,11 @@ def pdf(self, samples): ------- ndarray Probability density function evaluated at `samples`. - ''' + """ return np.exp(self.logpdf(samples)) def logcdf(self, samples): - ''' - Calculates the log of the cumulative distribution function. + """Calculates the log of the cumulative distribution function. Parameters ---------- @@ -110,12 +109,11 @@ def logcdf(self, samples): ------- ndarray Log of the cumulative distribution function evaluated at `samples`. - ''' + """ return self.rv_mixed.logcdf(samples) def cdf(self, samples): - ''' - Calculates the cumulative distribution function. + """Calculates the cumulative distribution function. Parameters ---------- @@ -126,12 +124,11 @@ def cdf(self, samples): ------- ndarray Cumulative distribution function evaluated at `samples`. - ''' + """ return np.exp(self.logcdf(samples)) def ppf(self, samples): - ''' - Calculates the inverse of the cumulative distribution function. + """Calculates the inverse of the cumulative distribution function. Parameters ---------- @@ -143,12 +140,11 @@ def ppf(self, samples): ndarray Inverse of the cumulative distribution function evaluated at `samples`. - ''' + """ return self.rv_mixed.ppf(samples) def rvs(self, size=1, random_state=None): - ''' - Generates random variates from the distribution. + """Generates random variates from the distribution. Parameters ---------- @@ -164,13 +160,12 @@ def rvs(self, size=1, random_state=None): ------- array_like Array of samples. - ''' + """ return self.rv_mixed.rvs(size, random_state=random_state) @staticmethod def fit(samples, is_continuous): - ''' - Fits a distribution to the given samples. + """Fits a distribution to the given samples. Parameters ---------- @@ -184,7 +179,7 @@ def fit(samples, is_continuous): ------- best_marginal : Marginal The distribution fitted to `samples`. - ''' + """ # Mean and variance mean = np.mean(samples) var = np.var(samples) diff --git a/mixedvines/mixedvine.py b/mixedvines/mixedvine.py index 0a0da28..c1b4e8b 100644 --- a/mixedvines/mixedvine.py +++ b/mixedvines/mixedvine.py @@ -15,9 +15,14 @@ # # You should have received a copy of the GNU General Public License along with # this program. If not, see . -''' -This module implements a copula vine model with mixed marginals. -''' +"""This module implements a copula vine model with mixed marginals. + +Classes +------- +MixedVine + Copula vine model with mixed marginals. + +""" from scipy.stats import kendalltau, norm, uniform from scipy.optimize import minimize import numpy as np @@ -25,9 +30,8 @@ from .copula import Copula, IndependenceCopula -class MixedVine(object): - ''' - This class represents a copula vine model with mixed marginals. +class MixedVine: + """Represents a copula vine model with mixed marginals. Parameters ---------- @@ -57,13 +61,13 @@ class MixedVine(object): Determines which marginals are continuous. fit(samples, is_continuous, trunc_level, do_refine, keep_order) Fits the mixed vine to the given samples. - ''' + """ - class VineLayer(object): - ''' - This class represents a layer of a copula vine tree. A tree - description in layers is advantageous, because most operations on the - vine work in sweeps from layer to layer. + class VineLayer: + """Represents a layer of a copula vine tree. + + A tree description in layers is advantageous, because most operations + on the vine work in sweeps from layer to layer. Parameters ---------- @@ -125,7 +129,7 @@ class VineLayer(object): Collects the bounds of all copula parameters. is_continuous() Determines which marginals are continuous. - ''' + """ def __init__(self, input_layer=None, input_indices=None, marginals=None, copulas=None): @@ -150,30 +154,27 @@ def __init__(self, input_layer=None, input_indices=None, self.input_marginal_indices = None def is_marginal_layer(self): - ''' - Determines whether the layer is the marginal layer. + """Determines whether the layer is the marginal layer. Returns ------- boolean `True` if the layer is the marginal layer. - ''' + """ return not self.input_layer def is_root_layer(self): - ''' - Determines whether the layer is the output layer. + """Determines whether the layer is the output layer. Returns ------- boolean `True` if the layer is the root layer. - ''' + """ return not self.output_layer def logpdf(self, samples): - ''' - Calculates the log of the probability density function. + """Calculates the log of the probability density function. Parameters ---------- @@ -185,7 +186,7 @@ def logpdf(self, samples): ------- ndarray Log of the probability density function evaluated at `samples`. - ''' + """ if samples.size == 0: return np.empty((0, 1)) if self.is_root_layer(): @@ -194,8 +195,7 @@ def logpdf(self, samples): return self.output_layer.logpdf(samples) def _marginal_densities(self, samples): - ''' - Evaluate marginal densities and cumulative distribution functions. + """Evaluates marginal densities and CDFs. Parameters ---------- @@ -213,7 +213,7 @@ def _marginal_densities(self, samples): 'cdfm': Lower cumulative distribution functions. 'is_continuous': List of booleans where element i is `True` if output element i is continuous. - ''' + """ logp = np.zeros(samples.shape) cdfp = np.zeros(samples.shape) cdfm = np.zeros(samples.shape) @@ -234,9 +234,9 @@ def _marginal_densities(self, samples): return dout def densities(self, samples): - ''' - Computes densities and cumulative distribution functions layer by - layer. + """Computes densities and cumulative distribution functions. + + The computation is done layer by layer. Parameters ---------- @@ -255,7 +255,7 @@ def densities(self, samples): 'cdfm': Lower cumulative distribution functions. 'is_continuous': List of booleans where element i is `True` if output element i is continuous. - ''' + """ if self.is_marginal_layer(): return self._marginal_densities(samples) # Propagate samples to input_layer @@ -354,9 +354,10 @@ def densities(self, samples): return dout def build_curvs(self, urvs, curvs): - ''' - Helper function for `make_dependent`. Builds conditional uniform - random variates `curvs` for `make_dependent`. + """Helper function for `make_dependent`. + + Builds conditional uniform random variates `curvs` for + `make_dependent`. Parameters ---------- @@ -373,7 +374,7 @@ def build_curvs(self, urvs, curvs): Dependent uniform random variates. curvs : array_like Conditional uniform random variates. - ''' + """ (urvs, curvs) = self.make_dependent(urvs, curvs) if self.is_marginal_layer(): first_marginal_index = self.output_layer.input_indices[0][0] @@ -386,8 +387,9 @@ def build_curvs(self, urvs, curvs): return (urvs, curvs) def curv_ccdf(self, sample, curvs, copula_index): - ''' - Helper function for `build_curvs` to generate a conditional sample. + """Helper function for `build_curvs`. + + The function generates a conditional sample. Parameters ---------- @@ -403,7 +405,7 @@ def curv_ccdf(self, sample, curvs, copula_index): ------- sample : float Conditional sample for `curvs` at index `copula_index`. - ''' + """ if not self.is_marginal_layer(): sample = self.input_layer.curv_ccdf( sample, curvs, self.input_indices[copula_index][1]) @@ -413,9 +415,10 @@ def curv_ccdf(self, sample, curvs, copula_index): return sample def make_dependent(self, urvs, curvs=None): - ''' - Helper function for `rvs`. Introduces dependencies between the - uniform random variates `urvs` according to the vine copula tree. + """Helper function for `rvs`. + + Introduces dependencies between the uniform random variates `urvs` + according to the vine copula tree. Parameters ---------- @@ -431,7 +434,7 @@ def make_dependent(self, urvs, curvs=None): Dependent uniform random variates. curvs : array_like Conditional uniform random variates. - ''' + """ if curvs is None: curvs = np.zeros(shape=urvs.shape) if not self.is_marginal_layer(): @@ -448,9 +451,9 @@ def make_dependent(self, urvs, curvs=None): return (urvs, curvs) def rvs(self, size=1, random_state=None): - ''' - Generates random variates from the mixed vine. Currently assumes a - c-vine structure. + """Generates random variates from the mixed vine. + + Currently assumes a c-vine structure. Parameters ---------- @@ -467,7 +470,7 @@ def rvs(self, size=1, random_state=None): array_like n-by-d matrix of samples where n is the number of samples and d is the number of marginals. - ''' + """ if self.is_root_layer(): # Determine distribution dimension layer = self @@ -484,10 +487,10 @@ def rvs(self, size=1, random_state=None): return self.output_layer.rvs(size=size, random_state=random_state) def fit(self, samples, is_continuous, trunc_level=None): - ''' - Fits the vine tree to the given samples. This method is supposed - to be called on the output layer and will recurse to its input - layers. + """Fits the vine tree to the given samples. + + This method is supposed to be called on the output layer and will + recurse to its input layers. Parameters ---------- @@ -507,7 +510,7 @@ def fit(self, samples, is_continuous, trunc_level=None): output_urvs : array_like The output uniform random variates of the layer. Can be ignored if this is the output layer. - ''' + """ if self.is_marginal_layer(): output_urvs = np.zeros(samples.shape) for i in range(samples.shape[1]): @@ -531,8 +534,7 @@ def fit(self, samples, is_continuous, trunc_level=None): return output_urvs def get_all_params(self): - ''' - Constructs an array containing all copula parameters. + """Constructs an array containing all copula parameters. Returns ------- @@ -540,7 +542,7 @@ def get_all_params(self): A list containing all copula parameter values starting with the parameters of the first copula layer and continuing layer by layer. - ''' + """ if self.is_marginal_layer(): params = [] else: @@ -552,8 +554,7 @@ def get_all_params(self): return params def set_all_params(self, params): - ''' - Sets all copula parameters to the values stored in params. + """Sets all copula parameters to the values stored in params. Parameters ---------- @@ -561,17 +562,16 @@ def set_all_params(self, params): A list containing all copula parameter values starting with the parameters of the first copula layer and continuing layer by layer. - ''' + """ if not self.is_marginal_layer(): self.input_layer.set_all_params(params) - for i in range(len(self.copulas)): - if self.copulas[i].theta is not None: - for j in range(len(self.copulas[i].theta)): + for i, copula in enumerate(self.copulas): + if copula.theta is not None: + for j, _ in enumerate(copula.theta): self.copulas[i].theta[j] = params.pop(0) def get_all_bounds(self): - ''' - Collects the bounds of all copula parameters. + """Collects the bounds of all copula parameters. Returns ------- @@ -581,7 +581,7 @@ def get_all_bounds(self): continuing layer by layer. The first element of tuple i denotes the lower bound and the second element denotes the upper bound of parameter i. - ''' + """ if self.is_marginal_layer(): bnds = [] else: @@ -592,22 +592,20 @@ def get_all_bounds(self): return bnds def is_continuous(self): - ''' - Determines which marginals are continuous. + """Determines which marginals are continuous. Returns ------- vals : array_like List of boolean values of length d, where d is the number of marginals and element i is `True` if marginal i is continuous. - ''' + """ if self.is_marginal_layer(): vals = np.zeros(len(self.marginals), dtype=bool) for k, marginal in enumerate(self.marginals): vals[k] = marginal.is_continuous return vals - else: - return self.input_layer.is_continuous() + return self.input_layer.is_continuous() def __init__(self, dim): if dim < 2: @@ -616,8 +614,7 @@ def __init__(self, dim): self.root = self._construct_c_vine(np.arange(dim)) def logpdf(self, samples): - ''' - Calculates the log of the probability density function. + """Calculates the log of the probability density function. Parameters ---------- @@ -629,12 +626,11 @@ def logpdf(self, samples): ------- ndarray Log of the probability density function evaluated at `samples`. - ''' + """ return self.root.logpdf(samples) def pdf(self, samples): - ''' - Calculates the probability density function. + """Calculates the probability density function. Parameters ---------- @@ -646,12 +642,11 @@ def pdf(self, samples): ------- ndarray Probability density function evaluated at `samples`. - ''' + """ return np.exp(self.logpdf(samples)) def rvs(self, size=1, random_state=None): - ''' - Generates random variates from the mixed vine. + """Generates random variates from the mixed vine. Parameters ---------- @@ -668,13 +663,12 @@ def rvs(self, size=1, random_state=None): array_like n-by-d matrix of samples where n is the number of samples and d is the number of marginals. - ''' + """ return self.root.rvs(size=size, random_state=random_state) def entropy(self, alpha=0.05, sem_tol=1e-3, mc_size=1000, random_state=None): - ''' - Estimates the entropy of the mixed vine. + """Estimates the entropy of the mixed vine. Parameters ---------- @@ -697,7 +691,7 @@ def entropy(self, alpha=0.05, sem_tol=1e-3, mc_size=1000, Estimate of the mixed vine entropy in bits. sem : float Standard error of the mixed vine entropy estimate in bits. - ''' + """ # Gaussian confidence interval for sem_tol and level alpha conf = norm.ppf(1 - alpha) sem = np.inf @@ -718,7 +712,8 @@ def entropy(self, alpha=0.05, sem_tol=1e-3, mc_size=1000, return ent, sem def set_marginal(self, marginal_index, rv_mixed): - ''' + """Sets a marginal distribution. + Sets a particular marginal distribution in the mixed vine tree for manual construction of a mixed vine model. @@ -728,14 +723,15 @@ def set_marginal(self, marginal_index, rv_mixed): The index of the marginal in the marginal layer. rv_mixed : scipy.stats.distributions.rv_frozen The marginal distribution to be inserted. - ''' + """ layer = self.root while not layer.is_marginal_layer(): layer = layer.input_layer layer.marginals[marginal_index] = Marginal(rv_mixed) def set_copula(self, layer_index, copula_index, copula): - ''' + """Sets a pair copula. + Sets a particular pair copula in the mixed vine tree for manual construction of a mixed vine model. @@ -747,7 +743,7 @@ def set_copula(self, layer_index, copula_index, copula): The index of the copula in its layer. copula : Copula The copula to be inserted. - ''' + """ layer = self.root while not layer.is_marginal_layer(): layer = layer.input_layer @@ -758,22 +754,20 @@ def set_copula(self, layer_index, copula_index, copula): layer.copulas[copula_index] = copula def is_continuous(self): - ''' - Determines which marginals are continuous. + """Determines which marginals are continuous. Returns ------- is_continuous : array_like List of boolean values of length d, where d is the number of marginals and element i is `True` if marginal i is continuous. - ''' + """ return self.root.is_continuous() @staticmethod def fit(samples, is_continuous, trunc_level=None, do_refine=False, keep_order=False): - ''' - Fits the mixed vine to the given samples. + """Fits the mixed vine to the given samples. Parameters ---------- @@ -798,7 +792,7 @@ def fit(samples, is_continuous, trunc_level=None, do_refine=False, ------- vine : MixedVine The mixed vine with parameters fitted to `samples`. - ''' + """ dim = samples.shape[1] vine = MixedVine(dim) if not keep_order: @@ -811,9 +805,7 @@ def fit(samples, is_continuous, trunc_level=None, do_refine=False, bnds = vine.root.get_all_bounds() def cost(params): - ''' - Calculates the cost of a given set of copula parameters. - ''' + """Calculates the cost of a given set of copula parameters.""" vine.root.set_all_params(params.tolist()) vals = vine.logpdf(samples) return -np.sum(vals) @@ -825,7 +817,8 @@ def cost(params): @staticmethod def _heuristic_element_order(samples): - ''' + """Finds a heuristic element order. + Finds an order of elements that heuristically facilitates vine modelling. For this purpose, Kendall's tau is calculated between samples of pairs of elements and elements are scored according to the @@ -841,7 +834,7 @@ def _heuristic_element_order(samples): ------- order : array_like Permutation of all element indices reflecting descending scores. - ''' + """ dim = samples.shape[1] # Score elements according to total absolute Kendall's tau score = np.zeros(dim) @@ -856,7 +849,8 @@ def _heuristic_element_order(samples): @staticmethod def _construct_c_vine(element_order): - ''' + """Constructs a c-vine. + Constructs a c-vine tree without setting marginals or copulas. The c-vine tree is constructed according to the input element order. The index of the element with the most important dependencies should come @@ -871,7 +865,7 @@ def _construct_c_vine(element_order): ------- root : VineLayer The root layer of the canonical vine tree. - ''' + """ dim = len(element_order) marginals = np.empty(dim, dtype=Marginal) layer = MixedVine.VineLayer(marginals=marginals) diff --git a/mixedvines/tests/test_copula.py b/mixedvines/tests/test_copula.py index 048d181..8de929d 100644 --- a/mixedvines/tests/test_copula.py +++ b/mixedvines/tests/test_copula.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2017-2019 Arno Onken +# Copyright (C) 2017-2019, 2021 Arno Onken # # This file is part of the mixedvines package. # @@ -15,9 +15,7 @@ # # You should have received a copy of the GNU General Public License along with # this program. If not, see . -''' -This module implements tests for the copula module. -''' +"""This module implements tests for the copula module.i""" import numpy as np from numpy.testing import assert_allclose from mixedvines.copula import IndependenceCopula, GaussianCopula, \ @@ -25,9 +23,7 @@ def test_logpdf(): - ''' - Tests the log of the probability density function. - ''' + """Tests the log of the probability density function.""" samples = np.array([np.linspace(0, 1, 5), np.linspace(0.2, 0.8, 5)]).T # Independence copula @@ -63,9 +59,7 @@ def test_logpdf(): def test_pdf(): - ''' - Tests the probability density function. - ''' + """Tests the probability density function.""" samples = np.array([np.linspace(0, 1, 5), np.linspace(0.2, 0.8, 5)]).T # Independence copula @@ -98,9 +92,7 @@ def test_pdf(): def test_logcdf(): - ''' - Tests the log of the cumulative distribution function. - ''' + """Tests the log of the cumulative distribution function.""" samples = np.array([np.linspace(0, 1, 5), np.linspace(0.2, 0.8, 5)]).T # Independence copula @@ -137,9 +129,7 @@ def test_logcdf(): def test_cdf(): - ''' - Tests the cumulative distribution function. - ''' + """Tests the cumulative distribution function.""" samples = np.array([np.linspace(0, 1, 5), np.linspace(0.2, 0.8, 5)]).T # Independence copula @@ -172,9 +162,7 @@ def test_cdf(): def test_ccdf(): - ''' - Tests the conditional cumulative distribution function. - ''' + """Tests the conditional cumulative distribution function.""" samples = np.array([np.linspace(0, 1, 5), np.linspace(0.2, 0.8, 5)]).T # Independence copula @@ -226,9 +214,7 @@ def test_ccdf(): def test_ppcf(): - ''' - Tests the conditional cumulative distribution function. - ''' + """Tests the conditional cumulative distribution function.""" samples = np.array([np.linspace(0, 1, 5), np.linspace(0.2, 0.8, 5)]).T # Independence copula @@ -278,9 +264,7 @@ def test_ppcf(): def test_rotation_90_deg(): - ''' - Tests the 90° copula rotation. - ''' + """Tests the 90° copula rotation.""" samples = np.array([np.linspace(0, 1, 5), np.linspace(0.2, 0.8, 5)]).T # Clayton copula family rotated 90° @@ -309,9 +293,7 @@ def test_rotation_90_deg(): def test_rotation_180_deg(): - ''' - Tests the 180° copula rotation. - ''' + """Tests the 180° copula rotation.""" samples = np.array([np.linspace(0, 1, 5), np.linspace(0.2, 0.8, 5)]).T # Clayton copula family rotated 180° @@ -340,9 +322,7 @@ def test_rotation_180_deg(): def test_rotation_270_deg(): - ''' - Tests the 270° copula rotation. - ''' + """Tests the 270° copula rotation.""" samples = np.array([np.linspace(0, 1, 5), np.linspace(0.2, 0.8, 5)]).T # Clayton copula family rotated 270° diff --git a/mixedvines/tests/test_marginal.py b/mixedvines/tests/test_marginal.py index a7f471f..eaffb5e 100644 --- a/mixedvines/tests/test_marginal.py +++ b/mixedvines/tests/test_marginal.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2017-2019 Arno Onken +# Copyright (C) 2017-2019, 2021 Arno Onken # # This file is part of the mixedvines package. # @@ -15,18 +15,14 @@ # # You should have received a copy of the GNU General Public License along with # this program. If not, see . -''' -This module implements tests for the marginal module. -''' +"""This module implements tests for the marginal module.""" import numpy as np from numpy.testing import assert_allclose from mixedvines.marginal import Marginal def test_marginal_fit(): - ''' - Tests the fit method. - ''' + """Tests the fit method.""" samples = np.linspace(-2, 2, 3) # Normal distribution marginal = Marginal.fit(samples, True) diff --git a/mixedvines/tests/test_mixedvine.py b/mixedvines/tests/test_mixedvine.py index a09382b..47ff517 100644 --- a/mixedvines/tests/test_mixedvine.py +++ b/mixedvines/tests/test_mixedvine.py @@ -15,9 +15,7 @@ # # You should have received a copy of the GNU General Public License along with # this program. If not, see . -''' -This module implements tests for the mixedvine module. -''' +"""This module implements tests for the mixedvine module.""" import pytest import numpy as np from numpy.testing import assert_approx_equal, assert_allclose @@ -26,16 +24,15 @@ from mixedvines.mixedvine import MixedVine -@pytest.fixture -def example_vine(): - ''' - Constructs an example mixed vine. +@pytest.fixture(name="example_vine") +def fixture_example_vine(): + """Constructs an example mixed vine. Returns ------- vine : MixedVine An example mixed vine. - ''' + """ dim = 3 # Dimension vine = MixedVine(dim) # Specify marginals @@ -50,9 +47,7 @@ def example_vine(): def test_pdf(example_vine): - ''' - Tests the probability density function. - ''' + """Tests the probability density function.""" # Calculate probability density function on lattice bnds = np.empty((3), dtype=object) bnds[0] = [-1, 1] @@ -74,9 +69,7 @@ def test_pdf(example_vine): def test_fit(example_vine): - ''' - Tests the fit to samples. - ''' + """Tests the fit to samples.""" # Generate random variates size = 100 random_state = np.random.RandomState(0) @@ -93,9 +86,7 @@ def test_fit(example_vine): def test_entropy(example_vine): - ''' - Tests the entropy estimate. - ''' + """Tests the entropy estimate.""" random_state = np.random.RandomState(0) (ent, sem) = example_vine.entropy(sem_tol=1e-2, random_state=random_state) assert_approx_equal(ent, 7.83, significant=3) diff --git a/setup.cfg b/setup.cfg index 80b5e62..2d395e2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,7 +16,7 @@ # this program. If not, see . [metadata] name = mixedvines -version = 1.3.0 +version = 1.3.1 author = Arno Onken author_email = asnelt@asnelt.org description = Canonical vine copula trees with mixed marginals diff --git a/setup.py b/setup.py index a34524f..297a7a1 100644 --- a/setup.py +++ b/setup.py @@ -15,9 +15,7 @@ # # You should have received a copy of the GNU General Public License along with # this program. If not, see . -''' -Setup for the mixedvines package. -''' +"""Setup for the mixedvines package.""" import setuptools setuptools.setup()