Skip to content

Commit

Permalink
Handle two-part contig names again and put them under test
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed Aug 4, 2023
1 parent 40d98fb commit 92d4953
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 3 deletions.
2 changes: 1 addition & 1 deletion deps/libhandlegraph
28 changes: 28 additions & 0 deletions src/unittest/handle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "bdsg/hash_graph.hpp"

#include <handlegraph/util.hpp>
#include <handlegraph/path_metadata.hpp>

#include <iostream>
#include <limits>
Expand Down Expand Up @@ -2542,5 +2543,32 @@ TEST_CASE("handlegraph PathMetadata name format preserves ranges on generic path
REQUIRE(subrange.second == PathMetadata::NO_END_POSITION);
}

TEST_CASE("handlegraph PathMetadata name format can parse two-part names", "[handle]") {
std::string path_name = "GRCh38#chr1";

PathSense sense;
string sample;
string locus;
size_t haplotype;
size_t phase_block;
subrange_t subrange;
PathMetadata::parse_path_name(path_name,
sense,
sample,
locus,
haplotype,
phase_block,
subrange);

REQUIRE(PathMetadata::parse_sample_name(path_name) == sample);
REQUIRE(PathMetadata::parse_locus_name(path_name) == locus);

REQUIRE(sense == PathSense::REFERENCE);
REQUIRE(sample == "GRCh38");
REQUIRE(locus == "chr1");
REQUIRE(phase_block == PathMetadata::NO_PHASE_BLOCK);
REQUIRE(subrange == PathMetadata::NO_SUBRANGE);
}

}
}
20 changes: 20 additions & 0 deletions test/graphs/gfa_two_part_reference.gfa
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
H VN:Z:1.1 RS:Z:GRCh37 GRCh38
S 1 G
S 2 A
S 4 GGG
S 5 T
S 6 A
S 7 C
S 8 A
S 9 A
L 1 + 2 + 0M
L 1 + 4 + 0M
L 2 + 4 + 0M
L 4 + 5 + 0M
L 5 + 6 + 0M
L 6 + 7 + 0M
L 6 + 8 + 0M
L 7 + 9 + 0M
L 8 + 9 + 0M
P GRCh38#chr1 1+,4+,5+,6+,7+,9+ *,*,*,*,*
P GRCh37#chr1 1+,2+,4+,5+,6+,8+,9+ *,*,*,*,*,*
9 changes: 7 additions & 2 deletions test/t/48_vg_convert.t
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ PATH=../bin:$PATH # for vg

export LC_ALL="C" # force a consistent sort order

plan tests 100
plan tests 102

vg construct -r complex/c.fa -v complex/c.vcf.gz > c.vg
cat <(vg view c.vg | grep ^S | sort) <(vg view c.vg | grep L | uniq | wc -l) <(vg paths -v c.vg -E) > c.info
Expand Down Expand Up @@ -411,8 +411,13 @@ vg convert -a graphs/components_paths_rgfa.gfa > components_paths_rgfa.hg
is "${?}" "0" "GFA -> HashGraph conversion works with redundant paths"
is "$(vg paths --list -x components_paths_rgfa.hg | wc -l)" "1" "GFA -> HashGraph conversion with redundant paths keeps one copy of the redundant path"

# We should be able to handle pseudo-PanSN paths where there is no haplotype
vg convert -a graphs/gfa_two_part_reference.gfa > gfa_two_part_reference.hg
is "${?}" "0" "GFA -> HashGraph conversion works with two-part reference path names"
is "$(vg paths -M -x gfa_two_part_reference.hg | grep REFERENCE | wc -l)" "2" "GFA -> HashGraph conversion with with two-part reference path names gets the right paths"

rm -f paths.truth.txt paths.gbz.txt paths.gfa.txt paths.hg.txt
rm -f gfa_with_reference.gbz rgfa_with_reference.gbz gfa_with_reference.hg components_paths_rgfa.hg rgfa_with_reference.hg extracted.gfa
rm -f gfa_with_reference.gbz rgfa_with_reference.gbz gfa_with_reference.hg components_paths_rgfa.hg gfa_two_part_reference.hg rgfa_with_reference.hg extracted.gfa

#####
# GFA Streaming
Expand Down

1 comment on commit 92d4953

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch fix-two-part-names. View the full report here.

15 tests passed, 1 tests failed and 0 tests skipped in 18663 seconds

Failed tests:

  • test_sim_chr21_snp1kg_trained (2561 seconds)

Please sign in to comment.