From 5785b2b03803584f507635b2a2fa42c104f0cea6 Mon Sep 17 00:00:00 2001 From: Rajita Chandak Date: Thu, 7 Nov 2024 09:27:07 +0100 Subject: [PATCH] cov_flag added --- R/lpcde/R/lpbwcde_fns.R | 2 +- R/lpcde/R/lpcde.R | 1 + R/lpcde/R/lpcde_fns.R | 160 ++++++++++++++++++++------ R/lpcde/man/S_exact.Rd | 7 +- R/lpcde/man/cov_hat.Rd | 4 +- R/lpcde/tests/testthat/test-lpbwcde.R | 1 + R/lpcde/tests/testthat/test-lpcde.R | 4 +- todo.org | 2 +- 8 files changed, 138 insertions(+), 43 deletions(-) diff --git a/R/lpcde/R/lpbwcde_fns.R b/R/lpcde/R/lpbwcde_fns.R index b599f61..4785436 100644 --- a/R/lpcde/R/lpbwcde_fns.R +++ b/R/lpcde/R/lpbwcde_fns.R @@ -691,7 +691,7 @@ bw_irot = function(y_data, x_data, y_grid, x, p, q, mu, nu, kernel_type, regular ####################################################################################### -#' @title S matrix +#' S matrix #' Internal Function #' #' Generate Matrix diff --git a/R/lpcde/R/lpcde.R b/R/lpcde/R/lpcde.R index 74bc968..d12eedf 100644 --- a/R/lpcde/R/lpcde.R +++ b/R/lpcde/R/lpcde.R @@ -149,6 +149,7 @@ lpcde = function(x_data, y_data, y_grid=NULL, x=NULL, bw=NULL, p=NULL, q=NULL, } } + #sd_y = stats::sd(y_data) #sd_x = apply(x_data, 2, stats::sd) #mx = apply(x_data, 2, mean) diff --git a/R/lpcde/R/lpcde_fns.R b/R/lpcde/R/lpcde_fns.R index 4da0083..407d8cb 100644 --- a/R/lpcde/R/lpcde_fns.R +++ b/R/lpcde/R/lpcde_fns.R @@ -49,18 +49,16 @@ lpcde_fn = function(y_data, x_data, y_grid, x, p, q, p_RBC, q_RBC, bw, mu, nu, est_flag = f_hat_val$singular_flag # standard errors - if(cov_flag=="full"){ - covmat = cov_hat(x_data=x_data, y_data=y_data, x=x, y_grid=y_grid, p=p, q=q, - mu=mu, nu=nu, h=bw, kernel_type=kernel_type) - covMat = covmat$cov - c_flag = covmat$singular_flag - se = sqrt(abs(diag(covMat)))*sd_y*mean(sd_x) - } else if (cov_flag == "diag"){ - - } else if (cov_flag == "off"){ + if(cov_flag == "off"){ covMat = NA c_flag = FALSE se = rep(NA, length(y_grid)) + } else { + covmat = cov_hat(x_data=x_data, y_data=y_data, x=x, y_grid=y_grid, p=p, q=q, + mu=mu, nu=nu, h=bw, kernel_type=kernel_type, cov_flag=cov_flag) + covMat = covmat$cov + c_flag = covmat$singular_flag + se = sqrt(abs(diag(covMat)))*sd_y*mean(sd_x) } if (rbc){ @@ -83,20 +81,17 @@ lpcde_fn = function(y_data, x_data, y_grid, x, p, q, p_RBC, q_RBC, bw, mu, nu, # covariance matrix # standard errors - if(cov_flag=="full"){ - covmat_rbc = cov_hat(x_data=x_data, y_data=y_data, x=x, y_grid=y_grid, - p=p_RBC, q=q_RBC, mu=mu, nu=nu, h=bw, - kernel_type=kernel_type) - covMat_rbc = covmat_rbc$cov - c_rbc_flag = covmat_rbc$singular_flag - se_rbc = sqrt(abs(diag(covMat_rbc)))*sd_y*mean(sd_x) - } else if (cov_flag == "diag"){ - - } else if (cov_flag == "off"){ + if (cov_flag == "off"){ covMat_rbc = NA c_rbc_flag = FALSE se_rbc = rep(NA, length(y_grid)) - + } else { + covmat_rbc = cov_hat(x_data=x_data, y_data=y_data, x=x, y_grid=y_grid, + p=p_RBC, q=q_RBC, mu=mu, nu=nu, h=bw, + kernel_type=kernel_type, cov_flag=cov_flag) + covMat_rbc = covmat_rbc$cov + c_rbc_flag = covmat_rbc$singular_flag + se_rbc = sqrt(abs(diag(covMat_rbc)))*sd_y*mean(sd_x) } } @@ -353,9 +348,10 @@ fhat = function(x_data, y_data, x, y_grid, p, q, mu, nu, h, kernel_type){ #' @param mu Degree of derivative with respect to y. #' @param nu Degree of derivative with respect to x. #' @param kernel_type Kernel function choice. +#' @param cov_flag Flag for covariance estimation option. #' @return Covariance matrix for all the grid points #' @keywords internal -cov_hat = function(x_data, y_data, x, y_grid, p, q, mu, nu, h, kernel_type){ +cov_hat = function(x_data, y_data, x, y_grid, p, q, mu, nu, h, kernel_type, cov_flag){ # setting constants n = length(y_data) d = ncol(x_data) @@ -401,11 +397,12 @@ cov_hat = function(x_data, y_data, x, y_grid, p, q, mu, nu, h, kernel_type){ # initialize matrix c_hat = matrix(0L, nrow=ng, ncol=ng) - for (i in 1:ng){ - for (j in 1:i){ + if (cov_flag == "diag"){ + c_hat = matrix(NA, nrow=ng, ncol=ng) + for (i in 1:ng){ # relevant entries w.r.t. y and y_prime y = y_grid[i] - y_prime = y_grid[j] + y_prime = y_grid[i] y_scaled = (y_sorted - y)/h yp_scaled = (y_sorted-y_prime)/h y_elems = which(abs(y_scaled)<=1) @@ -413,8 +410,7 @@ cov_hat = function(x_data, y_data, x, y_grid, p, q, mu, nu, h, kernel_type){ elems = intersect(y_elems, yp_elems) if ( length(elems) <= 5){ - c_hat[i, j] = 0 - c_hat[j, i] = 0 + c_hat[i, i] = 0 } else{ if (mu==0){ if(check_inv(S_x(as.matrix(x_scaled[y_elems, ]/(n*h^d)), q, kernel_type))[1]== TRUE){ @@ -476,7 +472,7 @@ cov_hat = function(x_data, y_data, x, y_grid, p, q, mu, nu, h, kernel_type){ t_2 = (n-k) * a_yp[k] * aj[k] t_3 = (n-k) * a_y[k] * ak[k] t_4 = (n-k)^2 * a_y[k] * a_yp[k] - c_hat[i, j] = c_hat[i, j] + bx[1, elems[k]]^2 * (t_1 + t_2 + t_3 + t_4) + c_hat[i, i] = c_hat[i, i] + bx[1, elems[k]]^2 * (t_1 + t_2 + t_3 + t_4) } } # estimated means @@ -494,17 +490,115 @@ cov_hat = function(x_data, y_data, x, y_grid, p, q, mu, nu, h, kernel_type){ } # filling matrix, using symmetry - c_hat[i, j] = c_hat[i, j]/(n*(n-1)^2) - theta_y*theta_yp/n^2 - c_hat[j, i] = c_hat[i, j] + c_hat[i, i] = c_hat[i, i]/(n*(n-1)^2) - theta_y*theta_yp/n^2 } + } else if(cov_flag == "full"){ + for (i in 1:ng){ + for (j in 1:i){ + # relevant entries w.r.t. y and y_prime + y = y_grid[i] + y_prime = y_grid[j] + y_scaled = (y_sorted - y)/h + yp_scaled = (y_sorted-y_prime)/h + y_elems = which(abs(y_scaled)<=1) + yp_elems = which(abs(yp_scaled)<=1) + elems = intersect(y_elems, yp_elems) + + if ( length(elems) <= 5){ + c_hat[i, j] = 0 + c_hat[j, i] = 0 + } else{ + if (mu==0){ + if(check_inv(S_x(as.matrix(x_scaled[y_elems, ]/(n*h^d)), q, kernel_type))[1]== TRUE){ + sx_mat = solve(S_x(as.matrix(x_scaled[y_elems, ]/(n*h^d)), q, kernel_type)) + } else{ + singular_flag = TRUE + sx_mat = matrix(0L, nrow = length(e_nu), ncol = length(e_nu)) + } + bx = b_x(as.matrix(x_scaled), sx_mat, e_nu, q, kernel_type) + # sy matrix + if(check_inv(S_x(as.matrix(y_scaled[y_elems]), p, kernel_type)/(n*h))[1]== TRUE){ + sy_mat = solve(S_x(as.matrix(y_scaled[y_elems]), p, kernel_type)/(n*h)) + } else{ + singular_flag = TRUE + sy_mat = matrix(0L, nrow = length(e_mu), ncol = length(e_mu)) + } + if(check_inv(S_x(as.matrix(yp_scaled[yp_elems]), p, kernel_type)/(n*h))[1]== TRUE){ + syp_mat = solve(S_x(as.matrix(yp_scaled[yp_elems]), p, kernel_type)/(n*h)) + } else{ + singular_flag = TRUE + syp_mat = matrix(0L, nrow = length(e_mu), ncol = length(e_mu)) + } + + # computing y and yprime vectors + a_y = b_x(matrix(y_scaled[elems]), sy_mat, e_mu, p, kernel_type) + a_yp = b_x(matrix(yp_scaled[elems]), syp_mat, e_mu, p, kernel_type) + + # part 1 of the sum + aj = cumsum(a_y) + ak = cumsum(a_yp) + + }else{ + # sy matrix + if(check_inv(S_x(as.matrix(y_scaled[y_elems]), p, kernel_type)/(n*h))[1]== TRUE){ + sy_mat = solve(S_x(as.matrix(y_scaled[y_elems]), p, kernel_type)/(n*h)) + } else{ + singular_flag = TRUE + sy_mat = matrix(0L, nrow = length(e_mu), ncol = length(e_mu)) + } + if(check_inv(S_x(as.matrix(yp_scaled[yp_elems]), p, kernel_type)/(n*h))[1]== TRUE){ + syp_mat = solve(S_x(as.matrix(yp_scaled[yp_elems]), p, kernel_type)/(n*h)) + } else{ + singular_flag = TRUE + syp_mat = matrix(0L, nrow = length(e_mu), ncol = length(e_mu)) + } + + # computing y and yprime vectors + a_y = b_x(as.matrix(y_scaled[y_elems]), sy_mat, e_mu, p, kernel_type) + a_yp = b_x(as.matrix(yp_scaled[elems]), syp_mat, e_mu, p, kernel_type) + + # part 1 of the sum + aj = cumsum(a_y) + ak = cumsum(a_yp) + } + + # populating matrix + for (k in 1:length(elems)){ + t_1 = aj[k] * ak[k] + t_2 = (n-k) * a_yp[k] * aj[k] + t_3 = (n-k) * a_y[k] * ak[k] + t_4 = (n-k)^2 * a_y[k] * a_yp[k] + c_hat[i, j] = c_hat[i, j] + bx[1, elems[k]]^2 * (t_1 + t_2 + t_3 + t_4) + } + } + # estimated means + if (mu==0){ + theta_y = fhat(x_data=x_data, y_data=y_data, x=x, y_grid=y, p=2, + q=1, mu=0, nu=0, h=h, kernel_type=kernel_type)$est + theta_yp = fhat(x_data=x_data, y_data=y_data, x=x, y_grid=y_prime, + p=2, q=1, mu=0, nu=0, h=h, kernel_type=kernel_type)$est + + }else{ + theta_y = fhat(x_data=x_data, y_data=y_data, x=x, y_grid=y, p=p, + q=q, mu=mu, nu=nu, h=h, kernel_type=kernel_type)$est + theta_yp = fhat(x_data=x_data, y_data=y_data, x=x, y_grid=y_prime, + p=p, q=q, mu=mu, nu=nu, h=h, kernel_type=kernel_type)$est + } + + # filling matrix, using symmetry + c_hat[i, j] = c_hat[i, j]/(n*(n-1)^2) - theta_y*theta_yp/n^2 + c_hat[j, i] = c_hat[i, j] + + } + } } if(mu==0){ c_hat = sweep(sweep(c_hat, MARGIN=1, FUN="*", STATS=1/(n*h^(d+mu+nu))), - MARGIN=2, FUN="*", STATS=1/(n*h^(d+mu+nu))) + MARGIN=2, FUN="*", STATS=1/(n*h^(d+mu+nu))) }else{ c_hat = sweep(sweep(c_hat, MARGIN=1, FUN="*", STATS=1/(n*h^(d+mu+nu+1))), - MARGIN=2, FUN="*", STATS=1/(n*h^(d+mu+nu+1))) + MARGIN=2, FUN="*", STATS=1/(n*h^(d+mu+nu+1))) diag(c_hat) = diag(c_hat/2) } @@ -636,10 +730,10 @@ cov_hat = function(x_data, y_data, x, y_grid, p, q, mu, nu, h, kernel_type){ if(mu==0){ c_hat = sweep(sweep(c_hat, MARGIN=1, FUN="*", STATS=1/(n*h^(d+mu+nu))), - MARGIN=2, FUN="*", STATS=1/(n*h^(d+mu+nu))) + MARGIN=2, FUN="*", STATS=1/(n*h^(d+mu+nu))) }else{ c_hat = sweep(sweep(c_hat, MARGIN=1, FUN="*", STATS=1/(n*h^(d+mu+nu+1))), - MARGIN=2, FUN="*", STATS=1/(n*h^(d+mu+nu+1))) + MARGIN=2, FUN="*", STATS=1/(n*h^(d+mu+nu+1))) diag(c_hat) = diag(c_hat/2) } } diff --git a/R/lpcde/man/S_exact.Rd b/R/lpcde/man/S_exact.Rd index 31388c1..4b9f46e 100644 --- a/R/lpcde/man/S_exact.Rd +++ b/R/lpcde/man/S_exact.Rd @@ -3,9 +3,7 @@ \name{S_exact} \alias{S_exact} \title{S matrix -Internal Function - -Generate Matrix} +Internal Function} \usage{ S_exact(lower = -1, upper = 1, eval_pt, p, kernel_type) } @@ -20,9 +18,6 @@ S_exact(lower = -1, upper = 1, eval_pt, p, kernel_type) a (p+1)-by-(p+1) matrix } \description{ -S matrix -Internal Function - Generate Matrix } \keyword{internal} diff --git a/R/lpcde/man/cov_hat.Rd b/R/lpcde/man/cov_hat.Rd index 404521e..553844a 100644 --- a/R/lpcde/man/cov_hat.Rd +++ b/R/lpcde/man/cov_hat.Rd @@ -4,7 +4,7 @@ \alias{cov_hat} \title{cov_hat: covariance estimator} \usage{ -cov_hat(x_data, y_data, x, y_grid, p, q, mu, nu, h, kernel_type) +cov_hat(x_data, y_data, x, y_grid, p, q, mu, nu, h, kernel_type, cov_flag) } \arguments{ \item{x_data}{Covariate dataset, vector or matrix.} @@ -28,6 +28,8 @@ y-direction.} \item{h}{Numeric, bandwidth vector.} \item{kernel_type}{Kernel function choice.} + +\item{cov_flag}{Flag for covariance estimation option.} } \value{ Covariance matrix for all the grid points diff --git a/R/lpcde/tests/testthat/test-lpbwcde.R b/R/lpcde/tests/testthat/test-lpbwcde.R index 960ca6c..198cb05 100644 --- a/R/lpcde/tests/testthat/test-lpbwcde.R +++ b/R/lpcde/tests/testthat/test-lpbwcde.R @@ -42,3 +42,4 @@ test_that("lpbwcde default output", { coef(model1) expect_equal(model1$opt$bw_type, "imse-rot") }) + diff --git a/R/lpcde/tests/testthat/test-lpcde.R b/R/lpcde/tests/testthat/test-lpcde.R index 5f3a7b5..83331b0 100644 --- a/R/lpcde/tests/testthat/test-lpcde.R +++ b/R/lpcde/tests/testthat/test-lpcde.R @@ -57,8 +57,9 @@ test_that("error checking", { # expect_equal(model2$opt$p, model2$opt$p_RBC) # expect_equal(model2$opt$q, model2$opt$q_RBC) - model2 = lpcde(x_data=x_data, y_data=y_data, y_grid=y_grid, x=0, kernel_type="triangular", bw=1.8) + model2 = lpcde(x_data=x_data, y_data=y_data, y_grid=y_grid, x=0, kernel_type="triangular", bw=1.8, cov_flag="diag") expect_equal(model2$opt$kernel, "triangular") + expect_equal(model2$opt$cov_flag, "diag") }) test_that("lpcde multivariate output", { @@ -102,3 +103,4 @@ test_that("Properties of pdf estimator", { #check all probabilities are less than 1 expect_equal(all(model_reg$Estimate[,3]<1), TRUE) }) + diff --git a/todo.org b/todo.org index 3431d42..d0e9655 100644 --- a/todo.org +++ b/todo.org @@ -5,7 +5,7 @@ ** DONE bandwidth selection error example *** DONE typo in documentation ** DONE check grid redundancy -** TODO Implement diag cov estimation +** DONE Implement diag cov estimation ** TODO multiple x inputs ** TODO adding predict method ** DONE add more unit tests