Skip to content

Commit e1ccdb8

Browse files
authored
Merge pull request #7 from eric9n/main
fna
2 parents b3e4340 + 68f1ebb commit e1ccdb8

File tree

8 files changed

+387
-112
lines changed

8 files changed

+387
-112
lines changed

.github/workflows/rust.yml

Lines changed: 119 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,72 +5,144 @@ on:
55
branches:
66
- release
77

8-
# This is the example from the readme.
9-
# On each push to the `release` branch it will create or update a GitHub release, build your app, and upload the artifacts to the release.
108
env:
119
CARGO_TERM_COLOR: always
12-
BINARY_PREFIX: ncbi
1310

1411
jobs:
15-
build-and-release:
16-
permissions:
17-
contents: write
12+
build-cross:
13+
runs-on: ubuntu-latest
14+
env:
15+
RUST_BACKTRACE: full
1816
strategy:
1917
fail-fast: false
2018
matrix:
21-
platform: [macos-latest, ubuntu-20.04, windows-latest]
19+
target:
20+
- i686-unknown-linux-musl
21+
- x86_64-pc-windows-gnu
22+
- x86_64-unknown-linux-gnu
23+
- x86_64-unknown-linux-musl
24+
- armv7-unknown-linux-musleabihf
25+
- armv7-unknown-linux-gnueabihf
26+
- arm-unknown-linux-gnueabi
27+
- arm-unknown-linux-gnueabihf
28+
- arm-unknown-linux-musleabi
29+
- arm-unknown-linux-musleabihf
30+
- aarch64-unknown-linux-gnu
31+
- aarch64-unknown-linux-musl
32+
- mips-unknown-linux-musl
33+
- mipsel-unknown-linux-musl
2234

23-
runs-on: ${{ matrix.platform }}
2435
steps:
25-
- uses: actions/checkout@v4
26-
- name: Build
27-
run: cargo build --release
36+
- uses: actions/checkout@v3
2837

29-
# Set up the GitHub CLI
30-
- name: Install GitHub CLI
38+
- name: Install Rust
3139
run: |
32-
brew install gh
33-
if: matrix.platform == 'macos-latest'
40+
rustup set profile minimal
41+
rustup toolchain install stable
42+
rustup default stable
43+
rustup override set stable
44+
rustup target add --toolchain stable ${{ matrix.target }}
3445
35-
- name: Install GitHub CLI
36-
run: |
37-
sudo apt install -y gh
38-
if: matrix.platform == 'ubuntu-20.04'
46+
- name: Install cross
47+
run: cargo install cross
3948

40-
- name: Install GitHub CLI
49+
- name: Build ${{ matrix.target }}
50+
timeout-minutes: 120
4151
run: |
42-
choco install gh
43-
if: matrix.platform == 'windows-latest'
52+
compile_target=${{ matrix.target }}
53+
54+
if [[ "$compile_target" == *"-linux-"* || "$compile_target" == *"-apple-"* ]]; then
55+
compile_features="-f local-redir -f local-tun"
56+
fi
57+
58+
if [[ "$compile_target" == "mips-"* || "$compile_target" == "mipsel-"* || "$compile_target" == "mips64-"* || "$compile_target" == "mips64el-"* ]]; then
59+
sudo apt-get update -y && sudo apt-get install -y upx;
60+
if [[ "$?" == "0" ]]; then
61+
compile_compress="-u"
62+
fi
63+
fi
64+
65+
cd build
66+
./build-release -t ${{ matrix.target }} $compile_features $compile_compress
67+
68+
- name: Upload Github Assets
69+
uses: softprops/action-gh-release@v1
70+
env:
71+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
72+
with:
73+
files: build/release/*
74+
prerelease: ${{ contains(github.ref_name, '-') }}
75+
tag_name: ${{ inputs.tag || github.ref_name }}
76+
77+
build-unix:
78+
runs-on: ${{ matrix.os }}
79+
env:
80+
BUILD_EXTRA_FEATURES: "local-redir local-tun"
81+
RUST_BACKTRACE: full
82+
strategy:
83+
fail-fast: false
84+
matrix:
85+
# os: [ubuntu-latest, macos-latest]
86+
os: [macos-latest]
87+
target:
88+
- x86_64-apple-darwin
89+
- aarch64-apple-darwin
90+
steps:
91+
- uses: actions/checkout@v3
4492

45-
# Log in to the GitHub CLI
46-
- name: Login to GitHub CLI
47-
run: echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token
93+
- name: Install GNU tar
94+
if: runner.os == 'macOS'
95+
run: |
96+
brew install gnu-tar
97+
# echo "::add-path::/usr/local/opt/gnu-tar/libexec/gnubin"
98+
echo "/usr/local/opt/gnu-tar/libexec/gnubin" >> $GITHUB_PATH
4899
49-
# Create a release
50-
- name: Create Release
51-
id: create_release
100+
- name: Install Rust
52101
run: |
53-
gh release create ${{ github.ref_name }} \
54-
--title "Release ${{ github.ref_name }}" \
55-
--notes "Release notes for ${{ github.ref_name }}" \
56-
--draft
102+
rustup set profile minimal
103+
rustup toolchain install stable
104+
rustup default stable
105+
rustup override set stable
106+
rustup target add --toolchain stable ${{ matrix.target }}
107+
108+
- name: Build release
57109
shell: bash
110+
run: |
111+
./build/build-host-release -t ${{ matrix.target }}
112+
113+
- name: Upload Github Assets
114+
uses: softprops/action-gh-release@v1
115+
env:
116+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
117+
with:
118+
files: build/release/*
119+
prerelease: ${{ contains(github.ref_name, '-') }}
120+
tag_name: ${{ inputs.tag || github.ref_name }}
58121

59-
- name: Rename and prepare binaries for upload
122+
build-windows:
123+
runs-on: windows-latest
124+
env:
125+
RUSTFLAGS: "-C target-feature=+crt-static"
126+
RUST_BACKTRACE: full
127+
steps:
128+
- uses: actions/checkout@v3
129+
130+
- name: Install Rust
60131
run: |
61-
EXT=""
62-
if [ "${{ runner.os }}" = "Windows" ]; then
63-
EXT=".exe"
64-
fi
65-
for file in "${BINARY_PREFIX}"*; do
66-
mv "$file" "${file}-${RUNNER_OS}-${RUNNER_ARCH}${EXT}"
67-
done
68-
echo "Renamed binaries for upload"
69-
shell: bash
132+
rustup set profile minimal
133+
rustup toolchain install stable
134+
rustup default stable
135+
rustup override set stable
136+
137+
- name: Build release
138+
run: |
139+
pwsh ./build/build-host-release.ps1
70140
71-
- name: Upload Artifacts
72-
uses: actions/upload-artifact@v3
141+
- name: Upload Github Assets
142+
uses: softprops/action-gh-release@v1
143+
env:
144+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
73145
with:
74-
name: ${{ env.BINARY_PREFIX }}-${{ runner.os }}-${{ runner.arch }}
75-
path: |
76-
${BINARY_PREFIX}*-${RUNNER_OS}-${RUNNER_ARCH}${EXT}
146+
files: build/release/*
147+
prerelease: ${{ contains(github.ref_name, '-') }}
148+
tag_name: ${{ inputs.tag || github.ref_name }}

ncbi/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ lazy_static = "1.4"
1919
log = "0.4"
2020
env_logger = "0.10.1"
2121
md-5 = "0.10.6"
22+
async-compression = "0.4.5"

ncbi/src/fna.rs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
use async_compression::tokio::bufread::GzipDecoder;
2+
use regex::Regex;
3+
use std::collections::HashMap;
4+
use tokio::fs::OpenOptions;
5+
use tokio::{
6+
fs::File,
7+
io::{AsyncBufReadExt, AsyncWriteExt, BufReader, BufWriter},
8+
};
9+
10+
use anyhow::Result;
11+
use std::path::PathBuf;
12+
13+
pub async fn parse_assembly_fna(site: &str, data_dir: &PathBuf) -> Result<HashMap<String, String>> {
14+
let mut gz_files: HashMap<String, String> = HashMap::new();
15+
let file_name = format!("assembly_summary_{}.txt", site);
16+
let file_path = data_dir.join(file_name);
17+
let file = File::open(&file_path).await?;
18+
let reader = BufReader::new(file);
19+
let mut lines = reader.lines();
20+
21+
while let Some(line) = lines.next_line().await? {
22+
if line.starts_with('#') {
23+
continue;
24+
}
25+
26+
let fields: Vec<&str> = line.split('\t').collect();
27+
if fields.len() > 19 {
28+
let (taxid, asm_level, ftp_path) = (fields[5], fields[11], fields[19]);
29+
30+
if !["Complete Genome", "Chromosome"].contains(&asm_level) || ftp_path == "na" {
31+
continue;
32+
}
33+
34+
let fna_file_name = format!(
35+
"{}_genomic.fna.gz",
36+
ftp_path.split('/').last().unwrap_or_default()
37+
);
38+
gz_files.insert(fna_file_name, taxid.into());
39+
}
40+
}
41+
Ok(gz_files)
42+
}
43+
44+
pub async fn write_to_fna(site: &str, data_dir: &PathBuf) -> Result<()> {
45+
log::info!("write to fna...");
46+
47+
let gz_files = parse_assembly_fna(site, data_dir).await?;
48+
let library_fna_path = data_dir.join(format!("library_{}.fna", &site));
49+
let prelim_map_path = data_dir.join(format!("prelim_map_{}.txt", &site));
50+
51+
let mut fna_writer = BufWriter::new(
52+
OpenOptions::new()
53+
.create(true)
54+
.write(true)
55+
.open(&library_fna_path)
56+
.await?,
57+
);
58+
let mut map_writer = BufWriter::new(
59+
OpenOptions::new()
60+
.create(true)
61+
.write(true)
62+
.open(&prelim_map_path)
63+
.await?,
64+
);
65+
66+
let re: Regex = Regex::new(r"^>(\S+)").unwrap();
67+
68+
for (gz_path, taxid) in gz_files {
69+
let gz_file = data_dir.join(&site).join(gz_path);
70+
let file = File::open(gz_file).await?;
71+
let decompressor = GzipDecoder::new(BufReader::new(file));
72+
let mut reader = BufReader::new(decompressor);
73+
74+
let mut line = String::new();
75+
while reader.read_line(&mut line).await? != 0 {
76+
if let Some(caps) = re.captures(&line) {
77+
let seqid = &caps[1];
78+
map_writer
79+
.write_all(format!("{}\t{}\n", seqid, taxid).as_bytes())
80+
.await?;
81+
fna_writer
82+
.write_all(format!(">kraken:taxid|{}|{}", taxid, &line[1..]).as_bytes())
83+
.await?;
84+
} else {
85+
fna_writer.write_all(line.as_bytes()).await?;
86+
}
87+
line.clear();
88+
}
89+
}
90+
91+
fna_writer.flush().await?;
92+
map_writer.flush().await?;
93+
94+
log::info!("write to fna finished");
95+
Ok(())
96+
}

ncbi/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
mod client;
22
pub mod down;
3+
pub mod fna;
34
pub mod load;
45
pub mod md5sum;
56
pub mod meta;

0 commit comments

Comments
 (0)