Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Utils for the new commondata #1693

Closed
wants to merge 9 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
361 changes: 361 additions & 0 deletions validphys2/src/validphys/datafiles/new_commondata/commondata_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,361 @@
"""
Python utilities for commondata implementation.

This module provides helpful functions that automate a few
tasks that are regularly needed for the implementation of
experimental data to the commondata format. If there are
any additional functions that could be added here as they
could simplify some repetitve tasks, please do suggest.

Before the usage of any functions, it is recommended to read
the docstrings of the function to understand the inputs and
outputs.

@author: Tanishq Sharma
"""

import numpy as np

from math import sqrt
from numpy.linalg import eig

def symmetrize_errors(delta_plus, delta_minus):
r"""Compute the symmterized uncertainty and the shift in data point.

Parameters
----------
delta_plus : float
The top/plus uncertainty with sign
delta_minus : float
The bottom/minus uncertainty with sign

Returns
-------
se_delta : float
The value to be added to the data point
se_sigma : float
The symmetrized uncertainty to be used in commondata

"""
semi_diff = (delta_plus + delta_minus)/2
average = (delta_plus - delta_minus)/2
se_delta = semi_diff
se_sigma = sqrt(average*average + 2*semi_diff*semi_diff)
return se_delta, se_sigma

def percentage_to_absolute(percentage, value):
r"""Compute the absolute value of uncertainty from percentage.

Parameters
----------
percentage : string/float
Experimental datasets can provide the percentage
uncertainties with a % sign or without one.
The function will autostrip % sign and convert to
a float type in case the percentage uncertainty
comes with a % sign. Else, it will directly perform
the computation.
value : float
The data point

Returns
-------
absolute : float
The absolute value of the uncertainty

"""
if type(percentage) is str:
percentage = float(percentage.replace("%", ""))
absolute = percentage * value * 0.01
return absolute
else:
absolute = percentage * value * 0.01
return absolute

def cormat_to_covmat(err_list, cormat_list):
r"""Convert correlation matrix elements to covariance
matrix elements.

Parameters
----------
err_list : list
A one dimensional list which contains the uncertainty
associated to each data point in order.
cormat_list : list
A one dimensional list which contains the elements of
the correlation matrix row by row. Since experimental
datasets provide these matrices in a list form, this
simplifies the implementation for the user.

Returns
-------
covmat_list : list
A one dimensional list which contains the elements of
the covariance matrix row by row.

"""
covmat_list = []
for i in range(len(cormat_list)):
a = i // len(err_list)
b = i % len(err_list)
covmat_list.append(cormat_list[i] * err_list[a] * err_list[b])
return covmat_list

def covmat_to_artunc(ndata, covmat_list, no_of_norm_mat=0):
r"""Convert the covariance matrix to a matrix of
artificial uncertainties.

Parameters
----------
ndata : integer
Number of data points
covmat_list : list
A one dimensional list which contains the elements of
the covariance matrix row by row. Since experimental
datasets provide these matrices in a list form, this
simplifies the implementation for the user.
no_of_norm_mat : int
Normalized covariance matrices may have an eigenvalue
of 0 due to the last data point not being linearly
independent. To allow for this, the user should input
the number of normalized matrices that are being treated
in an instance. For example, if a single covariance matrix
of a normalized distribution is being processed, the input
would be 1. If a covariance matrix contains pertains to
3 normalized datasets (i.e. cross covmat for 3
distributions), the input would be 3. The default value is
0 for when the covariance matrix pertains to an absolute
distribution.

Returns
-------
artunc : list
A two dimensional matrix (given as a list of lists)
which contains artificial uncertainties to be added
to the commondata. i^th row (or list) contains the
artificial uncertainties of the i^th data point.

"""
epsilon = -0.0000000001
neg_eval_count = 0
psd_check = True
covmat = np.zeros((ndata, ndata))
artunc = np.zeros((ndata, ndata))
for i in range(len(covmat_list)):
a = i // ndata
b = i % ndata
covmat[a][b] = covmat_list[i]
eigval, eigvec = eig(covmat)
for j in range(len(eigval)):
if eigval[j] < epsilon:
psd_check = False
elif eigval[j] > epsilon and eigval[j] <= 0:
neg_eval_count = neg_eval_count + 1
if neg_eval_count == (no_of_norm_mat + 1):
psd_check = False
elif eigval[j] > 0:
continue
if psd_check == False:
raise ValueError('The covariance matrix is not positive-semidefinite')
else:
for i in range(ndata):
for j in range(ndata):
if eigval[j] < 0:
continue
else:
artunc[i][j] = eigvec[i][j] * sqrt(eigval[j])
return artunc.tolist()

def cross_cormat_to_covmat(row_err_list, col_err_list, cormat_list):
r"""Convert cross correlation matrix elements
(i.e. those between different different variables or
observables) to covariance matrix elements.

Parameters
----------
row_err_list : list
A one dimensional list which contains the uncertainty
associated to each data point of the variable that is
given on the vertical axis.
col_err_list : list
A one dimensional list which contains the uncertainty
associated to each data point of the variable that is
given on the horizontal axis.
cormat_list : list
A one dimensional list which contains the elements of
the correlation matrix row by row. Since experimental
datasets provide these matrices in a list form, this
simplifies the implementation for the user.

Returns
-------
covmat_list : list
A one dimensional list which contains the elements of
the covariance matrix row by row.

"""
covmat_list = []
for i in range(len(cormat_list)):
a = i // len(col_err_list)
b = i % len(col_err_list)
covmat_list.append(cormat_list[i] * row_err_list[a] * col_err_list[b])
return covmat_list

def matlist_to_matrix(rows, columns, mat_list):
r"""Convert a 1d list to a 2d matrix.

Note: This utils function is not strictly needed for
data implementation, however, it is provided for
the aid of the user due to how matrices are treated
throughout all the other functions. This function
allows the user to convert a list that contains the
elemnets of matrix row by row to a proper matrix, if
need be for any reason.

Parameters
----------
rows : int
No. of rows in the matrix
columns : int
No. of columns in the matrix
mat_list : list
A one dimensional list which contains the elements of
the matrix row by row.

Returns
-------
matrix : numpy.ndarray
The matrix as a numpy 2d array.

"""
if rows * columns == len(mat_list):
matrix = np.zeros((rows, columns))
for i in range(rows):
for j in range(columns):
matrix[i][j] = mat_list[j + i * columns]
matrix = np.array(matrix)
return matrix
else:
raise Exception('rows * columns != len(mat_list)')

def concat_matrices(rows, columns, list_of_matrices):
r"""Join smaller matrices into a large matrix.

This function aims to simplify the process of joining multiple
smaller matrices into one large matrix. Such a need could arise,
for instance, when cross variable covariance matrices are provided
but the user needs to join all the matrices to generate the full
covariance matrix corresponding to the entire dataset.

Parameters
----------
rows : int
No. of rows of matrices to be concatenated. E.g., if 6
matrices: A, B, C, D, E, F need to be joined as
[[A, B, C],
[D, E, F]],
the number of rows would be 2.
columns : int
No. of columns of matrices to be concatenated. In the
above example, this would be 3.
list_of_matrices : list
A list of the matrices that have to concatenated row by
row. In the above example, this would be [A, B, C, D, E, F].
The matrices themselves need to be provided as a list of lists,
or a numpy 2d array. If the user has the matrix in a 1d row by
row form, use matList_to_matrix() to convert it. It is assumed
the user verifies that all the input matrices have the correct
dimensions. Matrices with incompatible dimensions will lead to
undesired behavior.

Returns
-------
final_mat_list : list
A one dimensional list which contains the elements of
the final, fully concatenated matrix row by row.

"""
for i in range(len(list_of_matrices)):
list_of_matrices[i] = np.array(list_of_matrices[i])
col_list = []
for i in range(rows):
row_list = []
for j in range(columns):
row_list.append(list_of_matrices[j + i * columns])
col_list.append(np.concatenate(tuple(row_list), axis=1))
final_mat = np.concatenate(tuple(col_list), axis=0)
final_mat_list = []
for i in range(len(final_mat)):
for j in range(len(final_mat[i])):
final_mat_list.append(final_mat[i][j])
return final_mat_list

def trimat_to_fullmat(mode, tri_mat_list):
r"""Convert a list of values of a triangular matrix
to a symmetric matrix.

Experimental datasets can provide the entries of
correlation or covariance matrices as a triangular
matrix, as these matrices are symmetric by their
very nature. This function can convert these list to
a complete symmetric matrix, that can be used for the
dataset implementation.

mode : bool
Enter 0 or 1 based on the following scenarios:
Use mode 0 if matrix entries are given row by
row such as:
0 1 2 3
4 5 6
7 8
9
Use mode 1 if the matrix entries are given column
by column such as:
0 1 3 6
2 4 7
5 8
9
Please note that the numbers above (0-9) are not
entries of the matrix but rather the index of the
entries of the list which contains the elements of
the triangular matrix.
tri_mat_list : list
A list containing the elements of the triangular matrix,
for example, for a 4*4 matrix, the list of
triangular matrix entries could be:
[a, b, c, d, e, f, g, h, i, j]

Returns
-------
mat_list : list
A one dimensional list which contains the elements of
the fully populated, symmetric matrix row by row.

"""
dim = int((np.sqrt(1 + 8*len(tri_mat_list)) - 1)/2)
matrix = np.zeros((dim, dim))
if mode == 0:
for i in range(dim):
for j in range(i + 1):
list_el = len(tri_mat_list) - 1 - ((i*(i + 1))//2 + j)
if i == j:
matrix[dim - 1 - i][dim - 1 - j] = tri_mat_list[list_el]
else:
matrix[dim - 1 - i][dim - 1 - j] = tri_mat_list[list_el]
matrix[dim - 1 - j][dim - 1 - i] = tri_mat_list[list_el]
elif mode == 1:
for i in range(dim):
for j in range(i + 1):
list_el = (i*(i + 1))//2 + j
if i == j:
matrix[i][j] = tri_mat_list[list_el]
else:
matrix[i][j] = tri_mat_list[list_el]
matrix[j][i] = tri_mat_list[list_el]
else:
raise Exception('Mode should be 0 or 1, refer to the function for usage')
mat_list = []
for i in range(dim):
for j in range(dim):
mat_list.append(matrix[i][j])
return mat_list