Skip to content

Commit

Permalink
improve print-r2
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Sep 27, 2024
1 parent 2c1131e commit 22de913
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 32 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ data:
tar -xzf example.tar.gz && rm -f example.tar.gz

ld_matrix:
./PCAone -b example/plink -k 3 --ld -o adj -d 2 -m 4
./PCAone -b example/plink -k 3 --ld -o pcaone -d 2
./PCAone -b example/plink -k 3 --ld -o adj -d 2
./PCAone -b example/plink -k 3 --ld -o pcaone -d 2 -m 2
diff adj.kept.bim pcaone.kept.bim
cut -f1 adj.kept.bim | sort -cn ## check if sorted
awk '$$1==3' adj.kept.bim | cut -f4 | sort -cn
Expand Down
47 changes: 17 additions & 30 deletions src/LD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <zlib.h>

#include <algorithm>
#include <cstring>
#include <string>

#include "Cmd.hpp"
Expand Down Expand Up @@ -336,9 +337,6 @@ void ld_clump_single_pheno(std::string fileout, const std::string& head,
if (mpp.count(p2) == 0) continue;
double r =
G.col(idx[j]).dot(G.col(idx[k])) * (sds(idx[j]) * sds(idx[k]) * df);
// double r =
// G.col(idx[j]).dot(G.col(idx[k])) * (sds(idx[j]) * sds(idx[k]) *
// df);
if (r * r >= clump_r2) {
clumped.push_back(p2);
mpp.erase(p2);
Expand Down Expand Up @@ -380,9 +378,8 @@ void ld_r2_small(Data* data, const SNPld& snp, const std::string& filebim,
data->read_block_initial(data->start[b], data->stop[b], false);

gzFile gzfp = gzopen(fileout.c_str(), "wb");
const std::string head{"CHR_A\tBP_A\tSNP_A\tCHR_B\tBP_B\tSNP_B\tR2\n"};
gzwrite(gzfp, head.c_str(), head.size());
String1D out;
std::string line{"CHR_A\tBP_A\tSNP_A\tCHR_B\tBP_B\tSNP_B\tR2\n"};
gzwrite(gzfp, line.c_str(), line.size());

for (int w = 0; w < (int)snp.ws.size(); w++) {
int i = snp.ws[w];
Expand All @@ -392,9 +389,7 @@ void ld_r2_small(Data* data, const SNPld& snp, const std::string& filebim,
b++;
data->read_block_initial(data->start[b], data->stop[b], false);
}
out.resize(snp.we[w] - 1);

#pragma omp parallel for

for (int j = 1; j < snp.we[w]; j++) {
int k = i + j;
double r = 0;
Expand All @@ -409,15 +404,12 @@ void ld_r2_small(Data* data, const SNPld& snp, const std::string& filebim,
r = calc_cor(G.col(i - data->start[b - 1]),
data->G.col(k - data->start[b]), df);
}
//// FIXME: this would be slow
line = bims[i] + "\t" + bims[k] + "\t" + std::to_string(r * r) + "\n";

out[j - 1] =
bims[i] + "\t" + bims[k] + "\t" + std::to_string(r * r) + "\n";
}

for (const auto& str : out) {
if (gzwrite(gzfp, str.c_str(), str.size()) !=
static_cast<int>(str.size()))
cao.error("failed to write data in gzip");
if (gzwrite(gzfp, line.c_str(), line.size()) !=
static_cast<int>(line.size()))
cao.error("failed to write data to ld.gz file");
}
}
gzclose(gzfp);
Expand All @@ -431,25 +423,20 @@ void ld_r2_big(const MyMatrix& G, const SNPld& snp, const std::string& filebim,
MyArray sds = 1.0 / calc_sds(G);
const double df = 1.0 / (G.rows() - 1); // N-1
gzFile gzfp = gzopen(fileout.c_str(), "wb");
const std::string head{"CHR_A\tBP_A\tSNP_A\tCHR_B\tBP_B\tSNP_B\tR2\n"};
gzwrite(gzfp, head.c_str(), head.size());
String1D out;
std::string line{"CHR_A\tBP_A\tSNP_A\tCHR_B\tBP_B\tSNP_B\tR2\n"};
gzwrite(gzfp, line.c_str(), line.size());

for (int w = 0; w < (int)snp.ws.size(); w++) {
int i = snp.ws[w];
out.resize(snp.we[w] - 1);
#pragma omp parallel for

for (int j = 1; j < snp.we[w]; j++) {
int k = i + j;
double r = G.col(i).dot(G.col(k)) * (sds(i) * sds(k) * df);
out[j - 1] =
bims[i] + "\t" + bims[k] + "\t" + std::to_string(r * r) + "\n";
}

for (const auto& str : out) {
if (gzwrite(gzfp, str.c_str(), str.size()) !=
static_cast<int>(str.size()))
cao.error("failed to write data in gzip");
//// FIXME: this would be slow
line = bims[i] + "\t" + bims[k] + "\t" + std::to_string(r * r) + "\n";
if (gzwrite(gzfp, line.c_str(), line.size()) !=
static_cast<int>(line.size()))
cao.error("failed to write data to ld.gz file");
}
}
gzclose(gzfp);
Expand Down

0 comments on commit 22de913

Please sign in to comment.