Skip to content

Commit

Permalink
Merge pull request #4045 from vgteam/fix-two-part-names
Browse files Browse the repository at this point in the history
Handle two-part contig names again and put them under test
  • Loading branch information
adamnovak authored Aug 7, 2023
2 parents 40d98fb + 92d4953 commit 9d5db27
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 3 deletions.
2 changes: 1 addition & 1 deletion deps/libhandlegraph
28 changes: 28 additions & 0 deletions src/unittest/handle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "bdsg/hash_graph.hpp"

#include <handlegraph/util.hpp>
#include <handlegraph/path_metadata.hpp>

#include <iostream>
#include <limits>
Expand Down Expand Up @@ -2542,5 +2543,32 @@ TEST_CASE("handlegraph PathMetadata name format preserves ranges on generic path
REQUIRE(subrange.second == PathMetadata::NO_END_POSITION);
}

TEST_CASE("handlegraph PathMetadata name format can parse two-part names", "[handle]") {
std::string path_name = "GRCh38#chr1";

PathSense sense;
string sample;
string locus;
size_t haplotype;
size_t phase_block;
subrange_t subrange;
PathMetadata::parse_path_name(path_name,
sense,
sample,
locus,
haplotype,
phase_block,
subrange);

REQUIRE(PathMetadata::parse_sample_name(path_name) == sample);
REQUIRE(PathMetadata::parse_locus_name(path_name) == locus);

REQUIRE(sense == PathSense::REFERENCE);
REQUIRE(sample == "GRCh38");
REQUIRE(locus == "chr1");
REQUIRE(phase_block == PathMetadata::NO_PHASE_BLOCK);
REQUIRE(subrange == PathMetadata::NO_SUBRANGE);
}

}
}
20 changes: 20 additions & 0 deletions test/graphs/gfa_two_part_reference.gfa
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
H VN:Z:1.1 RS:Z:GRCh37 GRCh38
S 1 G
S 2 A
S 4 GGG
S 5 T
S 6 A
S 7 C
S 8 A
S 9 A
L 1 + 2 + 0M
L 1 + 4 + 0M
L 2 + 4 + 0M
L 4 + 5 + 0M
L 5 + 6 + 0M
L 6 + 7 + 0M
L 6 + 8 + 0M
L 7 + 9 + 0M
L 8 + 9 + 0M
P GRCh38#chr1 1+,4+,5+,6+,7+,9+ *,*,*,*,*
P GRCh37#chr1 1+,2+,4+,5+,6+,8+,9+ *,*,*,*,*,*
9 changes: 7 additions & 2 deletions test/t/48_vg_convert.t
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ PATH=../bin:$PATH # for vg

export LC_ALL="C" # force a consistent sort order

plan tests 100
plan tests 102

vg construct -r complex/c.fa -v complex/c.vcf.gz > c.vg
cat <(vg view c.vg | grep ^S | sort) <(vg view c.vg | grep L | uniq | wc -l) <(vg paths -v c.vg -E) > c.info
Expand Down Expand Up @@ -411,8 +411,13 @@ vg convert -a graphs/components_paths_rgfa.gfa > components_paths_rgfa.hg
is "${?}" "0" "GFA -> HashGraph conversion works with redundant paths"
is "$(vg paths --list -x components_paths_rgfa.hg | wc -l)" "1" "GFA -> HashGraph conversion with redundant paths keeps one copy of the redundant path"

# We should be able to handle pseudo-PanSN paths where there is no haplotype
vg convert -a graphs/gfa_two_part_reference.gfa > gfa_two_part_reference.hg
is "${?}" "0" "GFA -> HashGraph conversion works with two-part reference path names"
is "$(vg paths -M -x gfa_two_part_reference.hg | grep REFERENCE | wc -l)" "2" "GFA -> HashGraph conversion with with two-part reference path names gets the right paths"

rm -f paths.truth.txt paths.gbz.txt paths.gfa.txt paths.hg.txt
rm -f gfa_with_reference.gbz rgfa_with_reference.gbz gfa_with_reference.hg components_paths_rgfa.hg rgfa_with_reference.hg extracted.gfa
rm -f gfa_with_reference.gbz rgfa_with_reference.gbz gfa_with_reference.hg components_paths_rgfa.hg gfa_two_part_reference.hg rgfa_with_reference.hg extracted.gfa

#####
# GFA Streaming
Expand Down

1 comment on commit 9d5db27

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for merge to master. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 18177 seconds

Please sign in to comment.