Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge e2e files related to bayesrecon #19

Merged
merged 24 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
d5ed6bd
added bPMF
LorenzoZambon Nov 14, 2024
7f5d65b
added empirical PMF via KDE
LorenzoZambon Nov 14, 2024
27e5b32
added hier_TD + functions for bPMF
LorenzoZambon Nov 26, 2024
c68afe4
Updated PMF.from_samples
LorenzoZambon Dec 16, 2024
6918afb
updated bPMF functions; added copulas
LorenzoZambon Dec 16, 2024
26dd7aa
updated TD functions + speedup
LorenzoZambon Dec 16, 2024
5644448
added .impute_ts and .aggr_ts
LorenzoZambon Dec 16, 2024
034487a
added t copula; fixed bpmf shrink; optimized IPFP
LorenzoZambon Dec 17, 2024
1f83323
minor fix
LorenzoZambon Jan 8, 2025
4f4109d
added hier_TD_Hstep
LorenzoZambon Jan 9, 2025
3b5e7ea
convert sample matrix to integer for memory reasons
LorenzoZambon Jan 9, 2025
cd81c8a
added Frank copula + improved code for copula cdf
LorenzoZambon Jan 9, 2025
3480e16
minor fix
LorenzoZambon Jan 10, 2025
1a3ec75
optimized .check_hierarchical
LorenzoZambon Jan 16, 2025
d0432e2
added hier_TD_e2e + parallelization of top-down algorithm
LorenzoZambon Jan 16, 2025
d739c56
improved managements of supports and smoothing; added parallel comput…
LorenzoZambon Jan 17, 2025
98fff12
fixed minor bug
LorenzoZambon Jan 17, 2025
28fda0d
added time print
LorenzoZambon Jan 20, 2025
7661c67
changed computation of tau
LorenzoZambon Jan 20, 2025
dc0db7e
deleted e2e files
LorenzoZambon Jan 20, 2025
f642d8e
Merge branch 'main' into huge_hier
LorenzoZambon Jan 20, 2025
8336b71
removed e2e functions from utils
LorenzoZambon Jan 20, 2025
858df41
removed e2e functions from PMF
LorenzoZambon Jan 20, 2025
88f7b5e
Fixed test failures.
dazzimonti Jan 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 93 additions & 16 deletions R/PMF.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,97 @@
# A pmf is represented as normalized numeric vector v:
# for each j = 0, ..., M, the probability of j is the value v[[j+1]]

.NEGBIN_TOLL = 1e-6 # used when fitting a Negative Binomial distribution

###

# Compute the empirical pmf from a vector of samples
PMF.from_samples = function(v) {
.check_discrete_samples(v)
pmf = tabulate(v+1) / length(v) # the support starts from 0
# Tabulate only counts values above 1: if sum(tabulate(v+1)) > length(v),
# it means that there were negative samples
if (!isTRUE(all.equal(sum(pmf), 1))) {
stop("Input error: same samples are negative")
# Fit a Negative Binomial distribution on a given vector of samples.
# If data are underdispersed, fit a Poisson.
# If min_supp is specified, the returned pmf must have minimum length of min_supp+1
# Use Rtoll instead of 1e-6?
.fit_static_negbin = function(v_, toll = .NEGBIN_TOLL) {
v = v_[!is.na(v_)] # remove NA
Mu = mean(v)
Var = stats::var(v)
if (Var <= Mu) { # if data are underdispersed, fit Poisson
M = stats::qpois(1-toll, Mu)
pmf = stats::dpois(0:M, Mu)
} else { # else, fit Negative Binomial
size = Mu^2 / (Var - Mu)
M = stats::qnbinom(1-toll, size = size, mu = Mu)
pmf = stats::dnbinom(0:M, size = size, mu = Mu)
}
return(pmf/sum(pmf))
}

# Estimate the pmf from a vector of non-negative discrete samples.
# If there are NA, they are removed before computing the pmf.
# Several estimates are possible: naive empirical pmf, parametric, KDE (...)
PMF.from_samples = function(v_,
estim_type = "naive",
weights_ = NULL,
estim_params = NULL,
min_supp = NULL,
al_smooth = .ALPHA_SMOOTHING,
check_in = TRUE) {

# First, remove NA
v = v_[!is.na(v_)]

if (check_in) {
# Check that samples are discrete and non-negative
.check_discrete_samples(v)
if (any(v<0)) stop("Input error: the are negative samples")
}

if (estim_type == "naive") {
if (!is.null(estim_params)) {
warning("Not yet implemented. Do not specify estim_params if estim_type = 'naive'")
}

# TODO: add possibility of doing a smoothing (e.g. Laplace)
# maybe default = TRUE only if there are holes?
# TODO: implement min_supp

if (is.null(weights_)) {
pmf = tabulate(v+1) / length(v) # the support starts from 0
} else {
weights = weights_[!is.na(v_)]
weights = weights / sum(weights)
pmf = rep(0, max(v)+1)
for (vv in unique(v)) {
pmf[[vv+1]] = sum(weights[v==vv])
}
}

} else if (estim_type == "parametric") {
if (!is.null(estim_params)) {
stop("Not yet implemented. Do not specify estim_params if estim_type = 'parametric'")
}
# TODO: add more flexibility in the parametric estim (add other distr, e.g. for underdispersed data)

pmf = .fit_static_negbin(v)

} else if (estim_type == "kde") {

stop("Kernel density estimation not yet implemented")
# TODO

} else {
stop("The choice of estim_type is not valid")
}

# pad with zeros to reach the specified length
if (!is.null(min_supp) && length(pmf) <= min_supp) {
pmf = c(pmf, rep(0,min_supp-length(pmf)+1))
}

if (!is.null(al_smooth)) pmf = PMF.smoothing(pmf, al_smooth, laplace = T)

return(pmf)
}


# Compute the pmf from a parametric distribution
PMF.from_params = function(params, distr, Rtoll = .RTOLL) {
# Check that the distribution is implemented, and that the params are ok
Expand Down Expand Up @@ -218,20 +295,18 @@ PMF.summary = function(pmf, Ltoll=.TOLL, Rtoll=.RTOLL) {
return(all_summaries)
}

# Apply smoothing to a pmf to "cover the holes" in the support.
# If there is no hole, it doesn't do anything.
# Apply smoothing to a pmf.
# If the smoothing parameter alpha is not specified, it is set to the min of pmf.
# If laplace is set to TRUE, add alpha to all the points.
# Otherwise, add alpha only to points with zero mass.
PMF.smoothing = function(pmf, alpha = 1e-9, laplace=FALSE) {
PMF.smoothing = function(pmf, alpha = .ALPHA_SMOOTHING, laplace=FALSE) {

if (is.null(alpha)) alpha = min(pmf[pmf!=0])

if (laplace) {
pmf = pmf + rep(alpha, length(pmf))
} else pmf[pmf==0] = alpha

# apply smoothing only if there are holes
if (sum(pmf==0)) {
if (laplace) { pmf = pmf + rep(alpha, length(pmf))
} else pmf[pmf==0] = alpha
}

return(pmf / sum(pmf))
}
Expand Down Expand Up @@ -337,3 +412,5 @@ PMF.tempering = function(pmf, temp) {
temp_pmf = pmf**(1/temp)
return(temp_pmf / sum(temp_pmf))
}


11 changes: 6 additions & 5 deletions R/hierarchy.R
Original file line number Diff line number Diff line change
Expand Up @@ -336,11 +336,12 @@ get_reconc_matrices <- function(agg_levels, h) {
for (i in 1:k) {
for (j in 1:k) {
if (i < j) {
cond1 = c(A[i,] %*% A[j,] != 0) # Upper i and j have some common descendants
cond2 = any(A[j,] > A[i,]) # Upper j is not a descendant of upper i
cond3 = any(A[i,] > A[j,]) # Upper i is not a descendant of upper j
if (cond1 & cond2 & cond3) {
return(FALSE)
if (sum(A[i,] * A[j,]) != 0) { # Upper i and j have some common descendants
if (any(A[j,] > A[i,])) { # Upper j is not a descendant of upper i
if (any(A[i,] > A[j,])) { # Upper i is not a descendant of upper j
return(FALSE)
}
}
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion R/reconc_TDcond.R
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,8 @@ reconc_TDcond = function(A, fc_bottom, fc_upper,
U = .MVN_sample(n_samples = num_samples,
mu = rec_gauss_u$bottom_reconciled_mean,
Sigma = rec_gauss_u$bottom_reconciled_covariance)
U = round(U) # round to integer
U = round(U) # round
mode(U) <- "integer" # convert to integer
U_js = asplit(U, MARGIN = 2) # split into list of column vectors
}

Expand Down
Loading