fix code that removes header

solgenomics · lukasmueller · Dec 7, 2024 · Nov 14, 2024 · Nov 20, 2024 · Dec 5, 2024
commit 3c9c0ab2c95cc9f9a31fc5546594ab13ec504043
diff --git a/lib/CXGN/Genotype/ParseUpload/Plugin/transposedVCF.pm b/lib/CXGN/Genotype/ParseUpload/Plugin/transposedVCF.pm
@@ -60,6 +60,7 @@ sub _validate_with_plugin {
     my @fields;
 
     open($F, "<", $filename) || die "Can't open file $filename\n";
+
     my @header_info;
 
     my $chroms;
@@ -179,7 +180,8 @@ sub _validate_with_plugin {
     #print STDERR "Scanning file for observation unit names... \n";
     my $lines = 0;
     while (<$F>) {
-        s/[\r\n]//sg;
+	chomp;
+
 	my @fields = split /\t/;
 	#print "Parsing line $fields[0]\n";
 	push @observation_unit_names, $fields[0];
@@ -202,12 +204,12 @@ sub _validate_with_plugin {
         }
     } else {
         foreach (@observation_unit_names) {
-	    s/[\r\n]//sg;
             my ($observation_unit_name, $accession_name) = split(/\|\|\|/, $_);
             push @observation_units_names_trim, $observation_unit_name;
         }
     }
     my $observation_unit_names = \@observation_units_names_trim;
+
     my $organism_id = $self->get_organism_id;
     my $accession_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'accession', 'stock_type')->cvterm_id();
 
@@ -276,11 +278,10 @@ sub _parse_with_plugin {
 
     my $F;
     open($F, "<", $filename) || die "Can't open file $filename\n";
-
-    foreach (1..9) { my $trash = <$F>; } # remove first 9 lines
+    while (<$F> =~ m/^\##/) {
+        #Trash header lines
+    }
     $self->_fh($F);
-
-
 }
 
 sub extract_protocol_data {
@@ -343,37 +344,20 @@ sub next_genotype {
         print STDERR "No next genotype... Done!\n";
         close($F);
         return ( [$observation_unit_name], $genotypeprop );
-    }
-    else {
+    } else {
 	$line =~ s/\r//g;
         chomp($line);
 
-        LABEL: if ($line =~ m/^\#/) {
-	    #print STDERR "Skipping header line: $line\n";
-            $line = <$F>;
-            goto LABEL;
-        }
-
         if ($self->_is_first_line()) {
-            print STDERR "Skipping non data lines\n";
-
-	    # Check if the first line matches the pattern
-	    if ($line !~ m/\d[\/\|]\d/) {
-		$line = substr($line,0,20);
-		print STDERR "Skipping $line\n";
-
-		# Enter loop to skip lines until a matching line is found
-	        while ($line = <$F>) {
-	            if ($line =~ m/\d[\/\|]\d/) {
-	                last;
-		    } else {
-			$line = substr($line,0,20);
-                        print STDERR "Skipping $line\n";
-		    }
-		}
+            print STDERR "Skipping 7 more lines... ";
+            for (0..6) {
+                $line = <$F>;
+		#print STDERR Dumper $line;
             }
         }
-        $line =~ s/[\r\n]//sg;
+	$line =~ s/\r//g;
+        chomp($line);
+
         my @fields = split /\t/, $line;
         #print STDERR Dumper \@fields;