From 1b298ba0bc4c37f09e62ddc646560e1a28aafd94 Mon Sep 17 00:00:00 2001
From: rickyota <22293266+rickyota@users.noreply.github.com>
Date: Thu, 20 Jul 2023 13:30:17 +0900
Subject: [PATCH 1/6] add: docker, singularity
---
README.md | 30 +++++++++--------
docker/Dockerfile | 30 +++++++++--------
docker/genoboost.def | 6 ++++
genoboost.docker.cv.sh | 73 ------------------------------------------
genoboost.docker.sh | 41 +++++++++++-------------
genoboost.sh | 6 ++--
6 files changed, 59 insertions(+), 127 deletions(-)
create mode 100644 docker/genoboost.def
delete mode 100644 genoboost.docker.cv.sh
diff --git a/README.md b/README.md
index 7749fd1..5824507 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# GenoBoost v0.4.0
+# GenoBoost v0.4.1
[![GenoBoost](https://github.com/rickyota/genoboost/actions/workflows/genoboost.yml/badge.svg)](https://github.com/rickyota/genoboost/actions/workflows/genoboost.yml)
[![Release](https://github.com/rickyota/genoboost/actions/workflows/publish.yml/badge.svg)](https://github.com/rickyota/genoboost/actions/workflows/publish.yml)
@@ -288,30 +288,32 @@ $ genoboost score \
## Advanced Guide
-### Docker
-
Using docker or singularity is recommended.
-Run GenoBoost on an example dataset in `./test/data/1kg_n10000` (1000 samples x 10000 SNVs).
+### Docker
```bash
-$ docker run -td \
- -v "$(pwd)/test/data/1kg_n10000":/work/data:ro -v "$(pwd)/result":/work/result \
- rickyota/genoboost:latest \
- bash ./genoboost.docker.cv.sh
+$ docker pull rickyota/genoboost:latest \
+$ docker run -it rickyota/genoboost:latest \
+ train \
+ --dir ./result \
+ --file-genot ./example/genot \
+ --file-phe ./example/genot.cov \
+ --cov age,sex
```
### Singularity
```bash
-$ singularity build geno.sif docker://rickyota/genoboost:latest
-$ singularity exec \
- --bind "$(pwd)/test/data/1kg_n10000":/work/data,"$(pwd)/result":/work/result \
- --no-home --pwd /opt/genoboost geno.sif \
- bash ./genoboost.docker.cv.sh
+$ singularity build genoboost.sif ./docker/genoboost.def
+$ singularity run genoboost.sif \
+ train \
+ --dir ./result \
+ --file-genot ./example/genot \
+ --file-phe ./example/genot.cov \
+ --cov age,sex
```
-Result files are now in `./result/` .
[release]: https://github.com/rickyota/genoboost/releases
diff --git a/docker/Dockerfile b/docker/Dockerfile
index e6ad4de..dc893b9 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,19 +1,23 @@
-FROM --platform=linux/amd64 rust:1.63 AS builder
-
-WORKDIR /opt/genoboost
-# TODO: install from github url
-COPY ../ .
-# export RUST_BACKTRACE=full
-RUN cargo build --release -p boosting_rust &&\
- cp ./target/release/boosting_rust ./genoboost
+FROM --platform=linux/amd64 rust:1.68 AS builder
-
-FROM --platform=linux/amd64 continuumio/miniconda3 AS runner
+RUN apt-get update &&
+ apt-get install -y --no-install-recommends \
+ clang
WORKDIR /opt/genoboost
-# TODO: copy only necessary files
-COPY --from=builder /opt/genoboost ./
-RUN conda env create --force -n genoboost -f ./etc/env.yml
+COPY ../ .
+#export RUSTFLAGS='-C target-cpu=native'
+RUN cargo build \
+ --release \
+ --manifest-path ./projects_rust/Cargo.toml \
+ --bin genoboost
+ENTRYPOINT ["./projects_rust/target/genoboost"]
+#FROM --platform=linux/amd64 debian:buster-slim AS runner
+#
+#WORKDIR /opt/genoboost
+## TODO: copy only necessary files
+#COPY --from=builder /opt/genoboost/projects_rust/target/release/genoboost ./
+#CMD ["./genoboost"]
diff --git a/docker/genoboost.def b/docker/genoboost.def
new file mode 100644
index 0000000..42369bf
--- /dev/null
+++ b/docker/genoboost.def
@@ -0,0 +1,6 @@
+Bootstrap: docker
+From: rickyota/genoboost:latest
+
+%runscript
+ /opt/genoboost/genoboost "$@"
+
diff --git a/genoboost.docker.cv.sh b/genoboost.docker.cv.sh
deleted file mode 100644
index 6717045..0000000
--- a/genoboost.docker.cv.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-#
-# GenoBoost 5-fold cross-validation
-
-set -eux
-
-# mount to this path
-dir_data="/work/data/"
-dir_result="/work/result/"
-# output directory
-dir="${dir_result}"
-# prefix of plink1 file
-file_plink="${dir_data}genot"
-# covariate file
-file_cov="${dir_data}genot.cov"
-# learning rate parameters
-learning_rates="0.1 0.5"
-
-# output directory of samples
-dir_sample="${dir}samples/"
-# output directory of cross-validation
-dir_cv="${dir}cross_validation/"
-
-# create cv dataset
-mkdir -p "$dir_sample"
-eval "$(conda shell.bash hook)"
-conda activate genoboost
-python -m projects.genetics_py.src.dataset \
- --cross_validation \
- --cross_validation_n 5 \
- --dout "${dir_sample}" \
- --fplink "$file_plink"
-
-# train
-for cvi in {0..4}; do
- dir_wgt_cv="${dir_cv}tr.cv${cvi}/"
- fin_sample="${dir_sample}tr.cv${cvi}.samples"
- ./genoboost train \
- --dir "$dir_wgt_cv" \
- --file_plink "$file_plink" \
- --file_cov "$file_cov" \
- --file_sample "$fin_sample" \
- --learning_rates $learning_rates \
- --iter 100 \
- --clip_sample_weight "top0.1" \
- --prune_snv 0.1
-done
-
-# score
-for cvi in {0..4}; do
- dir_wgt_cv="${dir_cv}tr.cv${cvi}/"
- dir_score_cv="${dir_cv}va.cv${cvi}/"
- fin_sample="${dir_sample}va.cv${cvi}.samples"
- ./genoboost score \
- --dir_score "$dir_score_cv" \
- --iters 10 30 50 100 \
- --file_plink "$file_plink" \
- --file_cov "$file_cov" \
- --file_sample "$fin_sample" \
- --dir_wgt "$dir_wgt_cv" \
- --learning_rates $learning_rates
-
- dir_score_cv="${dir_cv}ts.cv${cvi}/"
- fin_sample="${dir_sample}test.samples"
- ./genoboost score \
- --dir_score "$dir_score_cv" \
- --iters 10 30 50 100 \
- --file_plink "$file_plink" \
- --file_cov "$file_cov" \
- --file_sample "$fin_sample" \
- --dir_wgt "$dir_wgt_cv" \
- --learning_rates $learning_rates
-done
diff --git a/genoboost.docker.sh b/genoboost.docker.sh
index 97e18a5..05c4921 100644
--- a/genoboost.docker.sh
+++ b/genoboost.docker.sh
@@ -4,35 +4,30 @@
set -eux
-# mount to this path
-dir_data="/work/data/"
-dir_result="/work/result/"
# output directory of training
-dir_wgt="${dir_result}train/"
+dir_wgt="./result/train/"
# output directory of score
-dir_score="${dir_result}score/"
+dir_score="./result/score/"
# prefix of plink1 file
-file_plink="${dir_data}genot"
+file_plink="./test/data/1kg_maf0.1_m1k/genot"
# covariate file
-file_cov="${dir_data}genot.cov"
-# learning rate parameters
-learning_rates="0.1 0.5"
+file_cov="./test/data/1kg_maf0.1_m1k/genot.cov"
+
+function genoboost-docker() {
+ docker run -it rickyota/genoboost:latest "$@"
+}
# train
-./genoboost train \
+./genoboost-docker train \
--dir "$dir_wgt" \
- --file_plink "$file_plink" \
- --file_cov "$file_cov" \
- --learning_rates $learning_rates \
- --iter 100 \
- --clip_sample_weight "top0.1" \
- --prune_snv 0.1
+ --file-genot "$file_plink" \
+ --file-phe "$file_cov" \
+ --cov age,sex
# score
-./genoboost score \
- --dir_score "$dir_score" \
- --iters 10 30 50 100 \
- --file_plink "$file_plink" \
- --file_cov "$file_cov" \
- --dir_wgt "$dir_wgt" \
- --learning_rates $learning_rates
+./genoboost-docker score \
+ --dir-score "$dir_score" \
+ --dir-wgt "$dir_wgt" \
+ --file-genot "$file_plink" \
+ --file-phe "$file_cov" \
+ --cov age,sex
diff --git a/genoboost.sh b/genoboost.sh
index 6c978f6..f3c95c2 100644
--- a/genoboost.sh
+++ b/genoboost.sh
@@ -25,8 +25,7 @@ cp ./projects_rust/target/release/genoboost ./genoboost
--dir "$dir_wgt" \
--file-genot "$file_plink" \
--file-phe "$file_cov" \
- --cov age,sex \
- --cross-validation 1
+ --cov age,sex
# score
./genoboost score \
@@ -34,5 +33,4 @@ cp ./projects_rust/target/release/genoboost ./genoboost
--dir-wgt "$dir_wgt" \
--file-genot "$file_plink" \
--file-phe "$file_cov" \
- --cov age,sex \
- --cross-validation 1
+ --cov age,sex
From 21296158e1ac0b3a2a99b1ebe6808e65ab38e095 Mon Sep 17 00:00:00 2001
From: rickyota <22293266+rickyota@users.noreply.github.com>
Date: Sun, 8 Oct 2023 13:03:28 +0900
Subject: [PATCH 2/6] add: README
---
README.md | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 5824507..bcf6ff2 100644
--- a/README.md
+++ b/README.md
@@ -38,8 +38,9 @@ $ genoboost train \
- [Cross-validation](#score-cv)
- [Options for Score](#score-option)
- [Advanced Guide](#advanced-guide)
- - [Docker](#docker)
- - [Singularity](#singularity)
+ - [Installation](#advanced-install)
+ - [Docker](#docker)
+ - [Singularity](#singularity)
## Introduction
@@ -79,7 +80,7 @@ cargo build --manifest-path ./projects_rust/Cargo.toml --release --bin genoboost
cp ./projects_rust/target/release/genoboost ./genoboost
```
-and you can use `genoboost` program.
+and you can use `genoboost` program. This should take less than 5 minutes.
#### Advanced Install
@@ -288,9 +289,10 @@ $ genoboost score \
## Advanced Guide
+### Advanced Installation
Using docker or singularity is recommended.
-### Docker
+#### Docker
```bash
$ docker pull rickyota/genoboost:latest \
@@ -302,7 +304,7 @@ $ docker run -it rickyota/genoboost:latest \
--cov age,sex
```
-### Singularity
+#### Singularity
```bash
$ singularity build genoboost.sif ./docker/genoboost.def
@@ -314,6 +316,9 @@ $ singularity run genoboost.sif \
--cov age,sex
```
+### Computational Time
+
+For ~216 thousands training samples and ~1.1 million SNVs for 10,000 unique SNVs, GenoBoost would take 10 hours.
[release]: https://github.com/rickyota/genoboost/releases
From ff6714d7cda82a07a2775349d8a1fde599ffdfbd Mon Sep 17 00:00:00 2001
From: rickyota <22293266+rickyota@users.noreply.github.com>
Date: Sun, 8 Oct 2023 13:27:31 +0900
Subject: [PATCH 3/6] add: major-a2-train
---
README.md | 2 +-
create.publish.sh | 8 +-
genoboost.cv.sh | 9 +-
genoboost.docker.sh | 4 +-
genoboost.sh | 5 +-
projects_rust/Cargo.toml | 10 -
projects_rust/boosting/benches/common.rs | 4 +-
.../boosting/benches/loss_criterion.rs | 8 +-
.../boosting/src/bin/boosting_res.rs | 35 +-
projects_rust/boosting/src/bin/genoboost.rs | 27 +-
.../boosting/src/bin_old/boosting_research.rs | 16 +-
.../boosting/src/bin_old/genoboost.rs | 5 +-
projects_rust/boosting/src/boosting_param.rs | 36 +-
projects_rust/boosting/src/boosting_score.rs | 49 ++-
.../boosting/src/boosting_score/io.rs | 219 ++++++----
.../boosting/src/boosting_score/run_scores.rs | 23 +-
.../boosting/src/boosting_score/score.rs | 121 +++++-
projects_rust/boosting/src/boosting_train.rs | 83 +++-
.../src/boosting_train/coefficient.rs | 398 +++++++++++++-----
.../boosting_train/coefficient/adjust_coef.rs | 37 +-
.../src/boosting_train/coefficient/calc.rs | 31 ++
.../boosting/src/boosting_train/loss.rs | 150 +++----
.../boosting/src/boosting_train/loss/calc.rs | 364 ++++++++--------
.../src/boosting_train/regression_cov.rs | 3 -
.../boosting/src/boosting_train/table.rs | 38 +-
projects_rust/boosting/src/lib.rs | 109 +++--
projects_rust/boosting/src/wgt_boost/io.rs | 4 +-
projects_rust/boosting/src/wgt_boosts.rs | 14 +-
projects_rust/genetics/Cargo.toml | 1 -
projects_rust/genetics/benches/common.rs | 8 +-
projects_rust/genetics/src/alloc.rs | 5 +
projects_rust/genetics/src/bin/genetics.rs | 28 +-
.../genetics/src/bin/genetics_res.rs | 50 ++-
.../genetics/src/bin/test_pgenlib.rs | 137 ++++++
projects_rust/genetics/src/cov.rs | 24 +-
projects_rust/genetics/src/dataset.rs | 284 +++++++++----
.../genetics/src/dataset/io_genot.rs | 152 ++++++-
.../genetics/src/dataset/io_genot/load.rs | 249 ++++++-----
.../src/dataset/io_genot/load/plink.rs | 78 ++--
.../src/dataset/io_genot/load/plink2.rs | 379 ++++++++++++++---
.../src/dataset/io_genot/load_score.rs | 120 ++++--
projects_rust/genetics/src/dataset/samples.rs | 54 ++-
.../genetics/src/dataset/samples/covs.rs | 5 +-
projects_rust/genetics/src/dataset/snvs.rs | 6 +-
.../genetics/src/genot/base_genot.rs | 72 +++-
.../genetics/src/genot/genot_struct.rs | 64 ++-
projects_rust/genetics/src/lib.rs | 308 ++++++++++++--
projects_rust/genetics/src/regression.rs | 205 +++++----
projects_rust/genetics/src/sample/io.rs | 36 +-
projects_rust/genetics/src/score.rs | 264 +++++++++---
projects_rust/genetics/src/snv/snv_index.rs | 73 +++-
projects_rust/genetics/src/textfile/text.rs | 1 +
projects_rust/genetics/src/wgt/coef.rs | 9 +
projects_rust/genetics/src/wgt/io.rs | 69 ++-
projects_rust/genetics/src/wgts.rs | 15 +-
55 files changed, 3224 insertions(+), 1284 deletions(-)
create mode 100644 projects_rust/genetics/src/bin/test_pgenlib.rs
diff --git a/README.md b/README.md
index c242669..16fd7c9 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# GenoBoost v0.4.1
+# GenoBoost v1.0.0
[![GenoBoost](https://github.com/rickyota/genoboost/actions/workflows/genoboost.yml/badge.svg)](https://github.com/rickyota/genoboost/actions/workflows/genoboost.yml)
[![Release](https://github.com/rickyota/genoboost/actions/workflows/publish.yml/badge.svg)](https://github.com/rickyota/genoboost/actions/workflows/publish.yml)
diff --git a/create.publish.sh b/create.publish.sh
index ef622ea..270d964 100644
--- a/create.publish.sh
+++ b/create.publish.sh
@@ -16,7 +16,7 @@ d_publish="./${artifact_name}/"
cargo build \
--release \
- --target=${target} \
+ --target=${target} \
--manifest-path ./projects_rust/Cargo.toml \
--no-default-features \
--bin genoboost
@@ -27,15 +27,14 @@ cargo build \
mkdir -p ${d_publish}
if [[ ${target} == *"windows"* ]]; then
- cp ./projects_rust/target/${target}/release/genoboost.exe ${d_publish}/
+ cp ./projects_rust/target/${target}/release/genoboost.exe ${d_publish}/
else
- cp ./projects_rust/target/${target}/release/genoboost ${d_publish}/
+ cp ./projects_rust/target/${target}/release/genoboost ${d_publish}/
fi
mkdir -p ${d_publish}/example/
cp ./example/* ${d_publish}/example/
-
zip -r ./${artifact_name}.zip ${d_publish}
#if [[ ${target} == *"windows"* ]]; then
@@ -43,4 +42,3 @@ zip -r ./${artifact_name}.zip ${d_publish}
#else
# zip -r ./${artifact_name}.zip ${d_publish}
#fi
-
diff --git a/genoboost.cv.sh b/genoboost.cv.sh
index 0ba94fe..d307636 100644
--- a/genoboost.cv.sh
+++ b/genoboost.cv.sh
@@ -12,7 +12,6 @@ file_plink="./test/data/1kg_maf0.1_m1k/genot"
# covariate file
file_cov="./test/data/1kg_maf0.1_m1k/genot.cov"
-
# compile
export RUST_BACKTRACE=full
cargo build --manifest-path ./projects_rust/Cargo.toml --release --bin genoboost
@@ -24,14 +23,14 @@ cp ./projects_rust/target/release/genoboost ./genoboost
--file-genot "$file_plink" \
--file-phe "$file_cov" \
--cov age,sex \
- --cross-validation 5
+ --cross-validation 5 \
+ --major_a2_train
# score
./genoboost score \
--dir-score "${dir}/score" \
- --dir-wgt "${dir}/train" \
+ --dir-wgt "${dir}/train" \
--file-genot "$file_plink" \
--file-phe "$file_cov" \
- --cov age,sex \
+ --cov age,sex \
--cross-validation 5
-
diff --git a/genoboost.docker.sh b/genoboost.docker.sh
index 05c4921..b236f51 100644
--- a/genoboost.docker.sh
+++ b/genoboost.docker.sh
@@ -22,7 +22,7 @@ function genoboost-docker() {
--dir "$dir_wgt" \
--file-genot "$file_plink" \
--file-phe "$file_cov" \
- --cov age,sex
+ --cov age,sex
# score
./genoboost-docker score \
@@ -30,4 +30,4 @@ function genoboost-docker() {
--dir-wgt "$dir_wgt" \
--file-genot "$file_plink" \
--file-phe "$file_cov" \
- --cov age,sex
+ --cov age,sex
diff --git a/genoboost.sh b/genoboost.sh
index f3c95c2..84fa5f9 100644
--- a/genoboost.sh
+++ b/genoboost.sh
@@ -25,7 +25,8 @@ cp ./projects_rust/target/release/genoboost ./genoboost
--dir "$dir_wgt" \
--file-genot "$file_plink" \
--file-phe "$file_cov" \
- --cov age,sex
+ --cov age,sex \
+ --major_a2_train
# score
./genoboost score \
@@ -33,4 +34,4 @@ cp ./projects_rust/target/release/genoboost ./genoboost
--dir-wgt "$dir_wgt" \
--file-genot "$file_plink" \
--file-phe "$file_cov" \
- --cov age,sex
+ --cov age,sex
diff --git a/projects_rust/Cargo.toml b/projects_rust/Cargo.toml
index 067eec2..b9e3ae4 100644
--- a/projects_rust/Cargo.toml
+++ b/projects_rust/Cargo.toml
@@ -1,16 +1,6 @@
[workspace]
-
-
members=[
"boosting",
"genetics",
"cmatrix",
- #"playground",
- #"test_pgenlib",
- #"test_rust",
- #"test_pyo3",
]
-
-
-
-
diff --git a/projects_rust/boosting/benches/common.rs b/projects_rust/boosting/benches/common.rs
index 426731c..4621a5a 100644
--- a/projects_rust/boosting/benches/common.rs
+++ b/projects_rust/boosting/benches/common.rs
@@ -13,12 +13,12 @@ fn setup_vars(
let sample_buf = fin_sample.map(|x| genetics::textfile::read_file_to_end(x, None).unwrap());
//) -> (Dataset, Vec, LossStruct) {
- let dataset: Dataset = Dataset::new(
+ let dataset: Dataset = Dataset::new_boost_training(
fin,
GenotFormat::Plink1,
None,
None,
- "",
+ None,
snv_buf.as_deref(),
sample_buf.as_deref(),
//fin_snv,
diff --git a/projects_rust/boosting/benches/loss_criterion.rs b/projects_rust/boosting/benches/loss_criterion.rs
index 3d15e70..6e15b02 100644
--- a/projects_rust/boosting/benches/loss_criterion.rs
+++ b/projects_rust/boosting/benches/loss_criterion.rs
@@ -95,7 +95,7 @@ unsafe fn bench_calculate_loss_gt_comp(c: &mut Criterion) {
&mut losss,
&dataset.genot(),
&sample_weight,
- &dataset.samples().phe(),
+ &dataset.samples().phe_unwrap(),
BoostParam::new_type1(),
&HashSet::new(),
)
@@ -108,7 +108,7 @@ unsafe fn bench_calculate_loss_gt_comp(c: &mut Criterion) {
&mut losss,
&dataset.genot(),
&sample_weight,
- &dataset.samples().phe(),
+ &dataset.samples().phe_unwrap(),
BoostParam::new_type1(),
)
})
@@ -124,7 +124,7 @@ unsafe fn bench_calculate_loss_gt_comp(c: &mut Criterion) {
&mut losss,
&dataset.genot(),
&sample_weight,
- &dataset.samples().phe(),
+ &dataset.samples().phe_unwrap(),
BoostParam::new_type1(),
&HashSet::new(),
)
@@ -137,7 +137,7 @@ unsafe fn bench_calculate_loss_gt_comp(c: &mut Criterion) {
&mut losss,
&dataset.genot(),
&sample_weight,
- &dataset.samples().phe(),
+ &dataset.samples().phe_unwrap(),
BoostParam::new_type1(),
)
})
diff --git a/projects_rust/boosting/src/bin/boosting_res.rs b/projects_rust/boosting/src/bin/boosting_res.rs
index ab05edc..da3497f 100644
--- a/projects_rust/boosting/src/bin/boosting_res.rs
+++ b/projects_rust/boosting/src/bin/boosting_res.rs
@@ -49,8 +49,6 @@ struct Cli {
threads: Option,
#[arg(long, global = true, help = "Verbose")]
verbose: bool,
- #[arg(long, global = true, help = "ccccccc")]
- cdef: bool,
}
#[derive(Debug, Subcommand)]
@@ -86,7 +84,7 @@ struct TrainArgs {
phe: Option,
// parse later
#[arg(long)]
- cov: String,
+ cov: Option,
//#[arg(long)]
//file_cov: Option,
#[arg(long)]
@@ -113,6 +111,11 @@ struct TrainArgs {
//use_adjloss: bool,
//#[arg(long)]
//use_const_for_loss: bool,
+ #[arg(
+ long,
+ help = "Set major allele in training dataset as a2 allele. Otherwise, set ref allele as a2 allele."
+ )]
+ major_a2_train: bool,
#[arg(long)]
resume: bool,
#[arg(long)]
@@ -136,19 +139,22 @@ struct ScoreArgs {
file_sample: Option,
#[arg(long)]
file_phe: Option,
+ //#[arg(long)]
+ //phe: Option,
#[arg(long)]
- phe: Option,
- #[arg(long)]
- cov: String,
+ cov: Option,
//#[arg(long)]
//file_cov: Option,
// if indicated, do not use para_best and calc score of all paras
- #[arg(long)]
+ #[arg(long, value_parser, num_args = 1.., value_delimiter = ' ')]
iters: Option>,
- #[arg(long)]
+ #[arg(long, value_parser, num_args = 1.., value_delimiter = ' ')]
learning_rates: Option>,
#[arg(long)]
use_iter: bool,
+ // TMP: to remove
+ #[arg(long)]
+ use_snv_pos: bool,
}
#[derive(Copy, Clone, PartialEq, Eq, Debug, ValueEnum)]
@@ -181,6 +187,7 @@ impl GenotFormatArg {
fn main() {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
+ // or use _enabled!
if is_x86_feature_detected!("avx2") {
log::info!("Able to use SIMD.")
} else {
@@ -275,6 +282,8 @@ fn main() {
log::info!("file_sample {:?}", fin_sample);
log::info!("boost_params {:?}", boost_params);
+ let make_major_a2_train = args.major_a2_train;
+
let use_adjloss = true;
//let use_adjloss = args.use_adjloss;
let use_const_for_loss = false;
@@ -287,7 +296,7 @@ fn main() {
genot_format,
fin_phe.as_deref(),
phe_name.as_deref(),
- &cov_name,
+ cov_name.as_deref(),
boost_method,
&boost_params,
fin_snv.as_deref(),
@@ -301,6 +310,7 @@ fn main() {
None, //prune_snv,
//&learning_rates,
is_monitor,
+ make_major_a2_train,
);
}
Commands::Score(args) => {
@@ -308,7 +318,7 @@ fn main() {
let fin = PathBuf::from(args.file_genot);
let genot_format = args.genot_format.to_naive();
let fin_phe = args.file_phe.map(|x| PathBuf::from(x));
- let phe_name = args.phe;
+ //let phe_name = args.phe;
let cov_name = args.cov;
let fin_sample = args.file_sample.map(|x| PathBuf::from(x));
//let fin_cov = args.file_cov.map(|x| PathBuf::from(x));
@@ -345,8 +355,8 @@ fn main() {
genot_format,
phe_buf.as_deref(),
//fin_phe.as_deref(),
- phe_name.as_deref(),
- Some(&cov_name),
+ //phe_name.as_deref(),
+ cov_name.as_deref(),
is_every_para,
iterations.as_deref(),
dout_wgt.as_deref(), // use enum?
@@ -357,6 +367,7 @@ fn main() {
//boost_param,
&learning_rates,
use_iter,
+ args.use_snv_pos,
);
}
}
diff --git a/projects_rust/boosting/src/bin/genoboost.rs b/projects_rust/boosting/src/bin/genoboost.rs
index 85c0b6e..99c4de0 100644
--- a/projects_rust/boosting/src/bin/genoboost.rs
+++ b/projects_rust/boosting/src/bin/genoboost.rs
@@ -87,7 +87,7 @@ struct TrainArgs {
phe: Option,
// parse later
#[arg(long)]
- cov: String,
+ cov: Option,
//#[arg(long)]
//file_cov: Option,
#[arg(long)]
@@ -115,6 +115,11 @@ struct TrainArgs {
resume: bool,
#[arg(long)]
write_loss: bool,
+ #[arg(
+ long,
+ help = "Set major allele in training dataset as a2 allele. Otherwise, set ref allele as a2 allele."
+ )]
+ major_a2_train: bool,
// --integrate-only
//#[arg(long, default_value_t = true)]
//integrate: bool,
@@ -147,11 +152,11 @@ struct ScoreArgs {
file_sample: Option,
#[arg(long)]
file_phe: Option,
- #[arg(long)]
- phe: Option,
+ //#[arg(long)]
+ //phe: Option,
// TODO: remove --cov and read from wgt?
#[arg(long)]
- cov: String,
+ cov: Option,
//#[arg(long)]
//file_cov: Option,
// if indicated, do not use para_best and calc score of all paras
@@ -309,7 +314,7 @@ fn main() {
} else if args.iter_snv.is_some() {
boost_params.set_iteration_snv(args.iter_snv.unwrap())
} else {
- if args.train_only{
+ if args.train_only {
panic!("You have to use --iter-snv or --iter with --train-only");
}
// else: integrate
@@ -328,6 +333,8 @@ fn main() {
log::info!("file_sample {:?}", fin_sample);
log::info!("boost_params {:?}", boost_params);
+ let make_major_a2_train = args.major_a2_train;
+
let use_adjloss = true;
//let use_adjloss = args.use_adjloss;
let use_const_for_loss = false;
@@ -347,7 +354,7 @@ fn main() {
genot_format,
fin_phe.as_deref(),
phe_name.as_deref(),
- &cov_name,
+ cov_name.as_deref(),
boost_method,
&boost_params,
fin_snv.as_deref(),
@@ -361,6 +368,7 @@ fn main() {
None, //prune_snv,
//&learning_rates,
is_monitor,
+ make_major_a2_train,
cross_vali,
seed,
);
@@ -370,7 +378,7 @@ fn main() {
let fin = PathBuf::from(args.file_genot);
let genot_format = args.genot_format.to_naive();
let fin_phe = args.file_phe.map(|x| PathBuf::from(x));
- let phe_name = args.phe;
+ //let phe_name = args.phe;
let cov_name = args.cov;
let fin_sample = args.file_sample.map(|x| PathBuf::from(x));
//let fin_cov = args.file_cov.map(|x| PathBuf::from(x));
@@ -407,8 +415,8 @@ fn main() {
&fin,
genot_format,
fin_phe.as_deref(),
- phe_name.as_deref(),
- Some(&cov_name),
+ //phe_name.as_deref(),
+ cov_name.as_deref(),
is_every_para,
iterations.as_deref(),
dout_wgt.as_deref(), // use enum?
@@ -419,6 +427,7 @@ fn main() {
&learning_rates,
use_iter,
cross_vali,
+ false,
);
}
}
diff --git a/projects_rust/boosting/src/bin_old/boosting_research.rs b/projects_rust/boosting/src/bin_old/boosting_research.rs
index 9780e64..aefc802 100644
--- a/projects_rust/boosting/src/bin_old/boosting_research.rs
+++ b/projects_rust/boosting/src/bin_old/boosting_research.rs
@@ -1,11 +1,11 @@
//! Application of **Genoboost**.
//! Input plink file to run Genoboost.
-//!
+//!
//! Logitnomissing
//! When the denominator of s2 is 0.0, (no eps and no samples for minor homozygotes), s2 is set the same as s1 (=dominant model).
//! When the denominator of s1, s0 is 0.0, (no samples for major homozygotes or heterozygotes), s0, s1 is set to 0.0
-//!
-//!
+//!
+//!
// TODO: ensure the same para when resuming
// TODO: (optional) write down extract snvs from top
// TODO: how to get memory?
@@ -189,7 +189,7 @@ fn main() {
let fin_sample_val = matches
.value_of("file_sample_val")
.map(|x| PathBuf::from(x));
- let is_monitor=fin_sample_val.is_some();
+ let is_monitor = fin_sample_val.is_some();
//let boost_type = matches.value_of("boost_type").unwrap();
let learning_rates: Vec