Skip to content

Commit

Permalink
fix and improve
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Dec 23, 2024
1 parent 637cd20 commit 7664c66
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 44 deletions.
21 changes: 10 additions & 11 deletions src/Cmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,20 @@ Param::Param(int argc, char **argv) {
throw std::invalid_argument("please use --USV together with --inbreed");
}

// handle memory and misc options
ncv = 20 > (2 * k + 1) ? 20 : (2 * k + 1);
oversamples = 10 > k ? 10 : k;
oversamples = oversamples > k ? oversamples : k;
if (haploid && (file_t == FileType::PLINK || file_t == FileType::BGEN)) ploidy = 1;
if (memory > 0 && svd_t != SvdType::FULL) out_of_core = true;
if (maf > 0.5) {
std::cerr << "warning: you specify '--maf' a value greater than 0.5.\n";
maf = 1 - maf;
}
keepsnp = maf > 0 ? true : false;
if (maf && out_of_core)
throw std::invalid_argument("does not support --maf filters for out-of-core mode yet! ");

// beagle.gz only represents genotype likelihood for pcangsd algorithm now
if (pca && file_t == FileType::BEAGLE && svd_t == SvdType::PCAoneAlg2)
throw std::invalid_argument("--svd 2 with --pcangsd not supported yet! use --svd 1 or 0 instead");
// handle EM-PCA
if (pca && file_t == FileType::BEAGLE) pcangsd = true;
if (emu || pcangsd) {
impute = true;
Expand All @@ -202,13 +208,6 @@ Param::Param(int argc, char **argv) {
if (bands < 4 || bands % 2 != 0)
throw std::invalid_argument("the -w/--batches must be a power of 2 and the minimun is 4.");

if (maf > 0.5) {
std::cerr << "warning: you specify '--maf' a value greater than 0.5.\n";
maf = 1 - maf;
}
keepsnp = maf > 0 ? true : false;
if (maf && out_of_core)
throw std::invalid_argument("does not support --maf filters for out-of-core mode yet! ");
if (svd_t == SvdType::PCAoneAlg2 && !noshuffle) perm = true;

} catch (const popl::invalid_option &e) {
Expand Down
67 changes: 34 additions & 33 deletions src/Halko.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ void RsvdOpData::computeUSV(int p, double tol) {
}

void NormalRsvdOpData::computeGandH(Mat2D& G, Mat2D& H, int pi) {
if (!data->snpmajor) {
cao.error("only work with snp major input data now.");
}

// reset omg to random
if (pi == 0 && reset) initOmg();

Expand All @@ -107,40 +111,37 @@ void NormalRsvdOpData::computeGandH(Mat2D& G, Mat2D& H, int pi) {
}
}
}
if (data->snpmajor || true) { // only work with snpmajor input data now.
if (pi > 0) {
Eigen::HouseholderQR<Eigen::Ref<Mat2D>> qr(H);
Omg.noalias() = qr.householderQ() * Mat2D::Identity(cols(), size); // hold H in Omega
}
G.noalias() = data->G.transpose() * Omg;
H.noalias() = data->G * G;
if (pi > 0) {
Eigen::HouseholderQR<Eigen::Ref<Mat2D>> qr(H);
Omg.noalias() = qr.householderQ() * Mat2D::Identity(cols(), size); // hold H in Omega
}
} else {
// for block version
// data->G is always nsamples x nsnps;
if (data->snpmajor || true) {
// for nsnps > nsamples
if (pi > 0) {
Eigen::HouseholderQR<Eigen::Ref<Mat2D>> qr(H);
Omg.noalias() = qr.householderQ() * Mat2D::Identity(cols(), size);
}
H = Mat2D::Zero(cols(), size);
data->check_file_offset_first_var();
for (uint i = 0; i < data->nblocks; ++i) {
start_idx = data->start[i];
stop_idx = data->stop[i];
actual_block_size = stop_idx - start_idx + 1;
tick.clock();
if (update) {
data->read_block_update(start_idx, stop_idx, U, S, V.transpose(), standardize);
} else {
data->read_block_initial(start_idx, stop_idx, standardize);
}
data->readtime += tick.reltime();
G.middleRows(start_idx, actual_block_size).noalias() = data->G.transpose() * Omg;
H.noalias() += data->G * G.middleRows(start_idx, actual_block_size);
}
G.noalias() = data->G.transpose() * Omg;
H.noalias() = data->G * G;
return;
}

// for block version
// data->G is always nsamples x nsnps;
// for nsnps > nsamples
if (pi > 0) {
Eigen::HouseholderQR<Eigen::Ref<Mat2D>> qr(H);
Omg.noalias() = qr.householderQ() * Mat2D::Identity(cols(), size);
}
H = Mat2D::Zero(cols(), size);
data->check_file_offset_first_var();
for (uint i = 0; i < data->nblocks; ++i) {
start_idx = data->start[i];
stop_idx = data->stop[i];
actual_block_size = stop_idx - start_idx + 1;
tick.clock();
if (update) {
data->read_block_update(start_idx, stop_idx, U, S, V.transpose(), standardize);
} else {
data->read_block_initial(start_idx, stop_idx, standardize);
}
data->readtime += tick.reltime();
G.middleRows(start_idx, actual_block_size).noalias() = data->G.transpose() * Omg;
H.noalias() += data->G * G.middleRows(start_idx, actual_block_size);
}
}

Expand Down Expand Up @@ -178,12 +179,12 @@ void FancyRsvdOpData::computeGandH(Mat2D& G, Mat2D& H, int pi) {
}
}
// bandsize: how many blocks in each band, 2, 4, 8, 16, 32, 64, ...
bandsize = fmin(bandsize * 2, data->params.bands);
// b: the index of current block
for (uint b = 0, i = 1, j = 1; b < data->params.bands; ++b, ++i, ++j) {
start_idx = b * blocksize;
stop_idx = (b + 1) * blocksize >= data->nsnps ? data->nsnps - 1 : (b + 1) * blocksize - 1;
actual_block_size = stop_idx - start_idx + 1;
if (U.size() > 0) data->predict_missing_E(U, start_idx, stop_idx);
G.middleRows(start_idx, actual_block_size).noalias() =
data->G.middleCols(start_idx, actual_block_size).transpose() * Omg;
if (pi > 0 && j <= std::pow(2, pi - 1) && std::pow(2, pi) < data->params.bands) {
Expand Down

0 comments on commit 7664c66

Please sign in to comment.