Skip to content

Commit

Permalink
Merge pull request #5153 from solgenomics/fix-vcf-upload2
Browse files Browse the repository at this point in the history
add protocol checks for vcf upload using scripts
  • Loading branch information
lukasmueller authored Oct 21, 2024
2 parents 6f0e62e + 08a1881 commit 9b9062b
Showing 1 changed file with 57 additions and 5 deletions.
62 changes: 57 additions & 5 deletions bin/load_genotypes_vcf_cxgn_postgres.pl
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ =head1 AUTHOR
=cut

use strict;
use warnings;

use Getopt::Std;
use Data::Dumper;
Expand Down Expand Up @@ -171,7 +172,7 @@ =head1 AUTHOR
open (my $Fout, ">", $opt_o) || die "Can't open file $opt_o\n";
open (my $F, "<", $file) or die "Can't open file $file \n";
my @outline;
my $lastcol;
my $lastcol = 0;
while (<$F>) {
if ($_ =~ m/^\##/) {
print $Fout $_;
Expand Down Expand Up @@ -245,7 +246,7 @@ =head1 AUTHOR

my $organism_q = "SELECT organism_id FROM organism WHERE species = ?";
my @found_organisms;
my $h = $schema->storage->dbh()->prepare($organism_q);
$h = $schema->storage->dbh()->prepare($organism_q);
$h->execute($organism_species);
while (my ($organism_id) = $h->fetchrow_array()){
push @found_organisms, $organism_id;
Expand All @@ -270,15 +271,19 @@ =head1 AUTHOR
});

$parser->load_plugin($opt_c);
$parser->parse_with_iterator();
my $parser_return = $parser->parse_with_iterator();
if ($parser->get_parse_errors()) {
my $parse_errors = $parser->get_parse_errors();
print STDERR Dumper $parse_errors;
die("parse errors");
}

my $project_id;
my $protocol = $parser->protocol_data();
my $observation_unit_names_all = $parser->observation_unit_names();
$protocol->{'reference_genome_name'} = $reference_genome_name;
$protocol->{'species_name'} = $organism_species;


my $vcf_genotyping_type = $opt_T ? $opt_T : 'vcf_snp_genotyping';
# my $genotyping_type;
my $genotype_data_type;
Expand Down Expand Up @@ -337,14 +342,61 @@ =head1 AUTHOR
die;
}
if (scalar(@{$verified_errors->{warning_messages}}) > 0){
my $warning_string = join ', ', @{$verified_errors->{warning_messages}};
my $warning_string = join "\n", @{$verified_errors->{warning_messages}};
if (!$opt_A){
print STDERR Dumper $warning_string;
print STDERR "You can accept these warnings and continue with store if you use -A\n";
die;
}
}

my @protocol_match_errors;
if ($protocol_id) {
my $new_marker_data = $protocol->{markers};
my $stored_protocol = CXGN::Genotype::Protocol->new({
bcs_schema => $schema,
nd_protocol_id => $protocol_id
});
my $stored_markers = $stored_protocol->markers();

my @all_stored_markers = keys %$stored_markers;
my %compare_marker_names = map {$_ => 1} @all_stored_markers;
my @mismatch_marker_names;
while (my ($chrom, $new_marker_data_1) = each %$new_marker_data) {
while (my ($marker_name, $new_marker_details) = each %$new_marker_data_1) {
if (exists($compare_marker_names{$marker_name})) {
while (my ($key, $value) = each %$new_marker_details) {
if ($value ne ($stored_markers->{$marker_name}->{$key})) {
push @protocol_match_errors, "Marker $marker_name in your file has $value for $key, but in the previously stored protocol shows ".$stored_markers->{$marker_name}->{$key};
}
}
} else {
push @mismatch_marker_names, $marker_name;
}
}
}

if (scalar(@mismatch_marker_names) > 0){
foreach my $error ( sort @mismatch_marker_names) {
print STDERR "$error\n";
}
print STDERR "These marker names in your file are not in the selected protocol.\n";
die;
}

if (scalar(@protocol_match_errors) > 0){
my $protocol_warning;
foreach my $match_error (@protocol_match_errors) {
$protocol_warning .= "$match_error\n";
}
if (!$opt_A){
print STDERR Dumper $protocol_warning;
print STDERR "Protocol match error\n";
die;
}
}
}

$store_genotypes->store_metadata();
my $result = $store_genotypes->store_identifiers();
$protocol_id = $result->{nd_protocol_id};
Expand Down

0 comments on commit 9b9062b

Please sign in to comment.