Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix reading VCF with no header #5206

Merged
merged 5 commits into from
Dec 7, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix code that removes header
ClayBirkett committed Nov 20, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit 3c9c0ab2c95cc9f9a31fc5546594ab13ec504043
46 changes: 15 additions & 31 deletions lib/CXGN/Genotype/ParseUpload/Plugin/transposedVCF.pm
Original file line number Diff line number Diff line change
@@ -60,6 +60,7 @@ sub _validate_with_plugin {
my @fields;

open($F, "<", $filename) || die "Can't open file $filename\n";

my @header_info;

my $chroms;
@@ -179,7 +180,8 @@ sub _validate_with_plugin {
#print STDERR "Scanning file for observation unit names... \n";
my $lines = 0;
while (<$F>) {
s/[\r\n]//sg;
chomp;

my @fields = split /\t/;
#print "Parsing line $fields[0]\n";
push @observation_unit_names, $fields[0];
@@ -202,12 +204,12 @@ sub _validate_with_plugin {
}
} else {
foreach (@observation_unit_names) {
s/[\r\n]//sg;
my ($observation_unit_name, $accession_name) = split(/\|\|\|/, $_);
push @observation_units_names_trim, $observation_unit_name;
}
}
my $observation_unit_names = \@observation_units_names_trim;

my $organism_id = $self->get_organism_id;
my $accession_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'accession', 'stock_type')->cvterm_id();

@@ -276,11 +278,10 @@ sub _parse_with_plugin {

my $F;
open($F, "<", $filename) || die "Can't open file $filename\n";

foreach (1..9) { my $trash = <$F>; } # remove first 9 lines
while (<$F> =~ m/^\##/) {
#Trash header lines
}
$self->_fh($F);


}

sub extract_protocol_data {
@@ -343,37 +344,20 @@ sub next_genotype {
print STDERR "No next genotype... Done!\n";
close($F);
return ( [$observation_unit_name], $genotypeprop );
}
else {
} else {
$line =~ s/\r//g;
chomp($line);

LABEL: if ($line =~ m/^\#/) {
#print STDERR "Skipping header line: $line\n";
$line = <$F>;
goto LABEL;
}

if ($self->_is_first_line()) {
print STDERR "Skipping non data lines\n";

# Check if the first line matches the pattern
if ($line !~ m/\d[\/\|]\d/) {
$line = substr($line,0,20);
print STDERR "Skipping $line\n";

# Enter loop to skip lines until a matching line is found
while ($line = <$F>) {
if ($line =~ m/\d[\/\|]\d/) {
last;
} else {
$line = substr($line,0,20);
print STDERR "Skipping $line\n";
}
}
print STDERR "Skipping 7 more lines... ";
for (0..6) {
$line = <$F>;
#print STDERR Dumper $line;
}
}
$line =~ s/[\r\n]//sg;
$line =~ s/\r//g;
chomp($line);

my @fields = split /\t/, $line;
#print STDERR Dumper \@fields;