Skip to content

Commit

Permalink
Optional output alignments
Browse files Browse the repository at this point in the history
Co-authored-by: aziele <a.zielezinski@gmail.com>
Co-authored-by: Sebastian Deorowicz <sebastian.deorowicz@polsl.pl>
  • Loading branch information
3 people authored Sep 5, 2024
1 parent 2ee5cc4 commit c29a4a4
Show file tree
Hide file tree
Showing 11 changed files with 6,233 additions and 236 deletions.
92 changes: 92 additions & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
name: Deploy

on:
release:
types:
- created
- updated

jobs:


########################################################################################
checkout:
name: Checkout
strategy:
matrix:
machine: [x64_linux, x64_mac, arm64_linux, arm64_mac]
runs-on: [self-hosted, vclust, '${{ matrix.machine }}']

steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Get tags
run: |
cd ./3rd_party/clusty/libs/igraph
git fetch --prune --unshallow
echo exit code $?
git tag --list
continue-on-error: true

########################################################################################
make:
name: Make
needs: checkout
strategy:
fail-fast: false
matrix:
machine: [x64_linux, arm64_linux]
leiden: [true]
include:
- machine: x64_mac
leiden: false
- machine: arm64_mac
leiden: false

runs-on: [self-hosted, vclust, '${{ matrix.machine }}']

steps:
- name: make clean
run: make clean
continue-on-error: true
- name: make
run: make -j32 CXX=g++-12 STATIC_LINK=true LEIDEN=${{ matrix.leiden }}
- name: tar artifacts
run: tar -cvzf vclust.tar.gz vclust.py test.py LICENSE example bin


########################################################################################
help:
name: Print usage
needs: make
strategy:
fail-fast: false
matrix:
machine: [x64_linux, x64_mac, arm64_linux, arm64_mac]
runs-on: [self-hosted, vclust, '${{ matrix.machine }}']

steps:
- name: help
run: python3 vclust.py

########################################################################################
upload:
name: Upload
needs: help
strategy:
fail-fast: false
matrix:
machine: [x64_linux, x64_mac, arm64_linux, arm64_mac]
runs-on: [self-hosted, vclust, '${{ matrix.machine }}']

steps:
- name: deploy
uses: actions/upload-release-asset@v1.0.1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ github.event.release.upload_url }}
asset_path: ./vclust.tar.gz
asset_name: vclust-${{ github.event.release.tag_name }}-${{matrix.machine}}.tar.gz
asset_content_type: application/gzip
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
fail-fast: false
matrix:
machine: [ubuntu-latest, macOS-12]
compiler: [g++-11]
compiler: [g++-12]
runs-on: ['${{ matrix.machine }}']

steps:
Expand Down
1 change: 0 additions & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
[submodule "3rd_party/kmer-db"]
path = 3rd_party/kmer-db
url = https://github.com/refresh-bio/kmer-db.git
branch = develop
[submodule "3rd_party/lz-ani"]
path = 3rd_party/lz-ani
url = https://github.com/refresh-bio/LZ-ANI.git
Expand Down
2 changes: 1 addition & 1 deletion 3rd_party/clusty
2 changes: 1 addition & 1 deletion 3rd_party/kmer-db
2 changes: 1 addition & 1 deletion 3rd_party/lz-ani
Submodule lz-ani updated 93 files
+77 −0 .github/workflows/deploy.yml
+62 −0 .github/workflows/main.yml
+83 −0 .github/workflows/self-hosted.yml
+108 −67 README.md
+5,694 −0 example/output/ani.aln.tsv
+133 −133 example/output/ani.tsv
+0 −0 libs/refresh/allocators/lib/memory_monotonic.h
+0 −0 libs/refresh/compression/lib/file_wrapper.h
+0 −304 libs/refresh/conversion.h
+4,199 −0 libs/refresh/conversions/lib/dragonbox.h
+580 −0 libs/refresh/conversions/lib/numeric_conversions.h
+0 −0 libs/refresh/parallel_queues/lib/parallel-queues-common.h
+0 −0 libs/refresh/parallel_queues/lib/parallel-queues.h
+1 −1 makefile
+92 −4 src/defs.h
+5 −14 src/filter.cpp
+25 −3 src/filter.h
+14 −2 src/lz-ani.cpp
+146 −59 src/lz_matcher.cpp
+30 −3 src/lz_matcher.h
+0 −243 src/memory_monotonic.h
+0 −36 src/parallel-queues-common.h
+0 −419 src/parallel-queues.h
+44 −39 src/params.h
+73 −2 src/parser.cpp
+10 −2 src/parser.h
+2 −2 src/seq_reservoir.cpp
+14 −4 src/seq_reservoir.h
+3 −2 src/utils.cpp
+2 −2 src/utils.h
+62 −0 test/vir61.ani.ids.tsv
+3,661 −0 test/vir61.ani.tsv
+568 −0 test/vir61/NC_001271.1.fna
+573 −0 test/vir61/NC_001604.1.fna
+548 −0 test/vir61/NC_003298.1.fna
+536 −0 test/vir61/NC_004665.1.fna
+539 −0 test/vir61/NC_004777.1.fna
+567 −0 test/vir61/NC_007149.1.fna
+570 −0 test/vir61/NC_007456.1.fna
+553 −0 test/vir61/NC_008694.1.fna
+557 −0 test/vir61/NC_010807.1.fna
+555 −0 test/vir61/NC_011038.1.fna
+571 −0 test/vir61/NC_011040.1.fna
+563 −0 test/vir61/NC_011042.1.fna
+591 −0 test/vir61/NC_011043.1.fna
+557 −0 test/vir61/NC_011045.1.fna
+552 −0 test/vir61/NC_011085.3.fna
+566 −0 test/vir61/NC_011534.1.fna
+590 −0 test/vir61/NC_013647.1.fna
+552 −0 test/vir61/NC_013651.1.fna
+562 −0 test/vir61/NC_015159.1.fna
+568 −0 test/vir61/NC_015208.1.fna
+563 −0 test/vir61/NC_015249.1.fna
+588 −0 test/vir61/NC_015264.1.fna
+551 −0 test/vir61/NC_015271.1.fna
+587 −0 test/vir61/NC_015719.1.fna
+553 −0 test/vir61/NC_019416.1.fna
+564 −0 test/vir61/NC_019510.1.fna
+604 −0 test/vir61/NC_020483.1.fna
+577 −0 test/vir61/NC_021062.1.fna
+572 −0 test/vir61/NC_022744.1.fna
+594 −0 test/vir61/NC_023005.1.fna
+569 −0 test/vir61/NC_023548.1.fna
+559 −0 test/vir61/NC_023558.1.fna
+563 −0 test/vir61/NC_023576.1.fna
+554 −0 test/vir61/NC_023715.1.fna
+580 −0 test/vir61/NC_023736.1.fna
+581 −0 test/vir61/NC_024362.1.fna
+561 −0 test/vir61/NC_024379.1.fna
+555 −0 test/vir61/NC_025451.1.fna
+562 −0 test/vir61/NC_027292.1.fna
+557 −0 test/vir61/NC_027387.1.fna
+563 −0 test/vir61/NC_028655.1.fna
+590 −0 test/vir61/NC_028661.1.fna
+576 −0 test/vir61/NC_028688.1.fna
+547 −0 test/vir61/NC_028702.1.fna
+562 −0 test/vir61/NC_028772.1.fna
+453 −0 test/vir61/NC_028795.1.fna
+598 −0 test/vir61/NC_028800.1.fna
+586 −0 test/vir61/NC_028822.1.fna
+581 −0 test/vir61/NC_028863.1.fna
+554 −0 test/vir61/NC_028880.1.fna
+589 −0 test/vir61/NC_028977.1.fna
+518 −0 test/vir61/NC_029102.1.fna
+564 −0 test/vir61/NC_031018.1.fna
+566 −0 test/vir61/NC_031066.1.fna
+562 −0 test/vir61/NC_031092.1.fna
+563 −0 test/vir61/NC_031114.1.fna
+568 −0 test/vir61/NC_031115.1.fna
+566 −0 test/vir61/NC_031123.1.fna
+570 −0 test/vir61/NC_031258.1.fna
+559 −0 test/vir61/NC_031937.1.fna
+581 −0 test/vir61/NC_031943.1.fna
211 changes: 154 additions & 57 deletions README.md

Large diffs are not rendered by default.

5,694 changes: 5,694 additions & 0 deletions example/output/ani.aln.tsv

Large diffs are not rendered by default.

266 changes: 133 additions & 133 deletions example/output/ani.tsv

Large diffs are not rendered by default.

42 changes: 39 additions & 3 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
FASTA_DIR = DATA_DIR / 'fna'
FASTA_FILE = DATA_DIR / 'multifasta.fna'
ANI_FILE = DATA_DIR / 'output'/ 'ani.tsv'
ALN_FILE = DATA_DIR / 'output' / 'ani.aln.tsv'
IDS_FILE = DATA_DIR / 'output' / 'ani.ids.tsv'
FLTR_FILE = DATA_DIR / 'output' / 'fltr.txt'

Expand Down Expand Up @@ -170,7 +171,7 @@ def test_align_default(test_dir, input, params):
ref_pairs = {
('NC_010807.ref', 'NC_010807.alt1'): 0.99753,
('NC_010807.ref', 'NC_010807.alt2'): 0.98985,
('NC_010807.ref', 'NC_010807.alt3'): 0.98414,
('NC_010807.ref', 'NC_010807.alt3'): 0.98384,
('NC_005091.ref', 'NC_005091.alt1'): 0.97161,
('NC_005091.ref', 'NC_005091.alt2'): 0.96707,
('NC_025457.ref', 'NC_025457.alt1'): 0.80607,
Expand All @@ -188,7 +189,7 @@ def test_align_default(test_dir, input, params):
pairs[(id1, id2)] = tani
for ref_pair, ref_tani in ref_pairs.items():
tani = pairs[ref_pair]
assert abs(tani - ref_tani) < 0.03
assert abs(tani - ref_tani) < 0.007


@pytest.mark.parametrize('outfmt,ref_cols',[
Expand All @@ -214,6 +215,35 @@ def test_align_outfmt(test_dir, outfmt, ref_cols):
assert cols == ref_cols


@pytest.mark.parametrize('input,params',[
(FASTA_DIR, []),
(FASTA_FILE, []),
])
def test_align_alignments(test_dir, input, params):
out_file = test_dir.joinpath('ani.tsv')
out_aln_file = test_dir.joinpath('ani.aln.tsv')
cmd = [
f'{VCLUST.resolve()}',
'align',
'-i',
f'{input}',
'-o',
f'{out_file}',
'--out-aln',
f'{out_aln_file}',

]
p = subprocess.run(cmd)
assert p.returncode == 0
assert p.stderr == None
assert out_aln_file.exists()
assert out_aln_file.stat().st_size
with open(out_aln_file) as fh:
header = fh.readline().split()
assert len(header) == 10
assert fh.readlines()


@pytest.mark.parametrize('input,params',[
(FASTA_DIR, []),
(FASTA_FILE, []),
Expand Down Expand Up @@ -288,7 +318,12 @@ def test_cluster_algorithm(test_dir, algorithm):
assert out_file.stat().st_size


def test_cluster_algorithm_leiden(test_dir):
@pytest.mark.parametrize('params',[
([]),
(['--leiden-resolution', '0.8', '--leiden-iterations', '3']),
(['--leiden-resolution', '0.8', '--leiden-beta', '0.001']),
])
def test_cluster_algorithm_leiden(test_dir, params):
out_file = test_dir / 'clusters.tsv'
cmd = [
f'{VCLUST.resolve()}',
Expand All @@ -306,6 +341,7 @@ def test_cluster_algorithm_leiden(test_dir):
'--tani',
'0.95',
]
cmd.extend(params)
p = subprocess.run(cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
Expand Down
Loading

0 comments on commit c29a4a4

Please sign in to comment.