diff --git a/lib/SGN/Controller/AJAX/Genefamily.pm b/lib/SGN/Controller/AJAX/Genefamily.pm new file mode 100644 index 0000000000..d42357e37b --- /dev/null +++ b/lib/SGN/Controller/AJAX/Genefamily.pm @@ -0,0 +1,26 @@ + +package SGN::Controller::AJAX::Genefamily; + +use Moose; +use SGN::Genefamily; + +BEGIN { extends 'Catalyst::Controller::REST'; } + +sub browse_families_table :Path('/ajax/tools/genefamily/table') Args(0) { + my $self = shift; + my $c = shift; + + my $build = $c->req->param("build"); + + my $genefamily_dir = $c->config->{genefamily_dir}; + my $genefamily_format = $c->config->{genefamily_format}; + + my $gf = SGN::Genefamily->new( { files_dir => $genefamily_dir, genefamily_format => $genefamily_format, build => $build }); + + my $data_ref = $gf -> table(); + + $c->stash->{rest} = { data => $data_ref }; + +} + +1; diff --git a/lib/SGN/Controller/Genefamily.pm b/lib/SGN/Controller/Genefamily.pm new file mode 100644 index 0000000000..054c0c0b20 --- /dev/null +++ b/lib/SGN/Controller/Genefamily.pm @@ -0,0 +1,187 @@ + +package SGN::Controller::Genefamily; + +use Moose; + +use Data::Dumper; + +BEGIN { extends 'Catalyst::Controller'; } + +sub genefamily_index :Path('/tools/genefamily') Args(0) { + my $self = shift; + my $c = shift; + + $c->stash->{template} = '/tools/genefamily/index.mas'; +} + + +sub search : Path('/tools/genefamily/search') Args(0) { + my $self = shift; + my $c = shift; + +# if ($c->user()) { +# if (grep(/curator|genefamily_editor/, $c->user->get_object()->get_roles() )) { + + $c->stash->{genefamily_id} = $c->req->param("genefamily_id") || ''; + $c->stash->{build} = $c->req->param("build") || ''; + $c->stash->{member_id} = $c->req->param("member_id") || ''; + $c->stash->{action} = $c->req->param("action") || ''; + + $c->stash->{template} = '/tools/genefamily/search.mas'; +# } +# else { +# $c->stash->{message} = "You do not have the necessary privileges to access this page."; +# $c->stash->{template} = "/generic_message.mas"; +# } + + # } + # else { +# $c->stash->{message} = "You need to be logged in to access this page."; +# $c->stash->{template} = "/generic_message.mas"; + # } + +} + +sub sequence_details :Path('/tools/genefamily/seq') Args(3) { + my $self = shift; + my $c = shift; + my $build = shift; + my $family = shift; + my $sequence = shift; + + my $gf = SGN::Genefamily->new( + name => $family, + build => $build, + files_dir => $c->config()->{genefamily_dir}, + ); + + print STDERR "Trying to locate sequence $sequence\n"; + + my $seq_info = $gf->get_sequence($sequence); + + my ($seq_id, $desc, $seq) = @$seq_info; + $c->stash->{build} = $build; + $c->stash->{family} = $family; + $c->stash->{seq_id} = $seq_id; + $c->stash->{desc} = $desc || "[ no description provided ]"; + $c->stash->{seq} = $seq; + + $c->stash->{template} = '/tools/genefamily/sequence.mas'; + + +} + +sub get_family_fasta :Path('/tools/genefamily/fasta/') Args(2) { + my $self = shift; + my $c = shift; + my $build = shift; + my $family = shift; + + my $gf = SGN::Genefamily->new( + name => $family, + build => $build, + files_dir => $c->config()->{genefamily_dir}, + ); + + my $fasta_seq = $gf -> get_fasta(); + + $c->stash->{build} = $build; + $c->stash->{family} = $family; + $c->stash->{fasta} = $fasta_seq; + + $c->stash->{template} = '/tools/genefamily/fasta.mas'; +} + +sub genefamily_details :Path('/tools/genefamily/details') Args(2) { + my $self = shift; + my $c = shift; + my $build = shift; + my $family = shift; + + my $gf = SGN::Genefamily->new( + name => $family, + build => $build, + files_dir => $c->config()->{genefamily_dir}, + ); + + my $seq_data =""; + my $fasta_data = ""; + my $tree_data = ""; + my $annot_data = ""; + my $exp_data = ""; + + my $align_link_disabled = ""; + my $fasta_link_disabled = ""; + my $tree_link_disabled = ""; + my $exp_link_disabled = ""; + + my $errors = ""; + my $big_errors = 0; + + if (!$family) { + $c->stash->{template} = '/generic_message.mas'; + $c->stash->{message} = 'Need a family to display!'; + return; + } + + $c->stash->{genefamily_id} = $family; + + my $members = $gf->get_members($family); + + print STDERR "Members: ".Dumper($members); + + $c->stash->{member_count} = scalar(@$members); + $c->stash->{members} = join(", ", @$members); + + + eval { + $c->stash->{seq_data} = $gf->get_alignment(); + }; + + if ($@) { + $errors .= "Alignment data not available. "; + $big_errors++; + $align_link_disabled="disabled"; + } + + eval { + $c->stash->{fasta_data} = $gf->get_fasta(); + + }; + + if ($@) { + $errors .= "Sequence data not available. "; + $big_errors++; + $fasta_link_disabled = "disabled"; + } + eval { + $c->stash->{tree_data} = $gf->get_tree(); + }; + if ($@) { + $errors .= "Tree data not available. "; + $c->stash->{tree_link_disabled} = "disabled" + } + eval { + $c->stash->{annot_data} = $gf->get_annotation(); + }; + if ($@) { + $errors .= "Annotation data not available. "; + $c->stash->{annot_data} = "(No annotation data available)"; + } + eval { + $errors .= "Expression data not available. "; + $c->stash->{exp_data} = $gf->get_expression(); + }; + if ($@) { + $c->stash->{exp_link_disabled} = "disabled"; + } + + if ($big_errors > 0) { + $errors = "This family does not seem to exist!\n"; + } + + $c->stash->{template} = '/tools/genefamily/details.mas'; + +} + +1; diff --git a/lib/SGN/Genefamily.pm b/lib/SGN/Genefamily.pm index 1588ffca1a..9dcb32675e 100644 --- a/lib/SGN/Genefamily.pm +++ b/lib/SGN/Genefamily.pm @@ -20,11 +20,38 @@ Methods in this class include: package SGN::Genefamily; use Moose; + +with 'MooseX::Object::Pluggable'; + use namespace::autoclean; +use Data::Dumper; use File::Slurp qw/slurp/; +use File::Spec qw | catfile |; use File::Spec::Functions; use File::Basename qw/basename/; +=head2 accessors genefamily_method() + +=cut + +has 'genefamily_format' => ( + is => 'rw', + isa => 'Str', + ); + +has 'genefamily_defs_file' => ( + is => 'rw', + isa => 'Str', + default => sub { return 'genefamily_defs.txt' }, + ); + +has 'sequence_link' => ( + is => 'rw', + isa => 'Str', + default => sub { return '/tools/genefamily/seq/'; } # add /$build/$family/$seq_id + ); + + =head2 accessors name() Usage: $gf->name() @@ -39,17 +66,17 @@ has 'name' => ( # required => 1, ); -=head2 members +# =head2 members - Usage: my @members = $gf->members() - Desc: retrieves the members of a genefamily. Read only. - Property: the members of the gene family - Side Effects: - Example: +# Usage: my @members = $gf->members() +# Desc: retrieves the members of a genefamily. Read only. +# Property: the members of the gene family +# Side Effects: +# Example: -=cut +# =cut -has 'members' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } ); +# has 'members' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } ); =head2 files_dir @@ -66,20 +93,20 @@ has 'files_dir' => ( required => 1, ); -=head2 dataset +=head2 build - Usage: my $d = $gf->dataset() + Usage: my $d = $gf->build() Desc: under the genefamily dir (files_dir), a number of sub-dirs should be present, each of which represents a separate gene family clustering (for example, based on different species or different clustering parameters). - Property: the dataset name [string] + Property: the build name [string] Side Effects: Example: =cut -has 'dataset' => ( +has 'build' => ( is => 'rw', required => 1, ); @@ -121,6 +148,8 @@ sub get_alignment { sub get_fasta { my $self = shift; my $file = catfile( $self->get_path(), "fasta", $self->name() . ".fa" ); + + print STDERR "Retrieving fasta file $file for family ".$self->name()."\n"; unless( -f $file ) { die "The fasta information for family " . $self->name() @@ -180,32 +209,96 @@ sub get_tree { return slurp($file); } -sub get_member_ids { +=head1 get_sequence + +=cut + +sub get_sequence { my $self = shift; + my $sequence = shift; + my $file = File::Spec->catfile($self->get_path(), 'fasta', $self->name().".fa"); + + my @seqs = (); + my $io = Bio::SeqIO->new( -format => 'fasta', -file => $file ); + + while (my $seq = $io->next_seq()) { + print STDERR "Now checking id ".$seq->id()." against search term $sequence\n"; + if ($seq->id() eq $sequence) { + return [ $seq->id(), $seq->desc(), $seq->seq() ] + } + } + return []; } -=head2 get_available_datasets - Usage: my @ds = SGN::Genefamily->get_available_datasets($DIR) - Desc: a class function that returns the available datasets - Ret: a list of dataset names - Args: the $DIR where the datasets are located. +=head1 get_members + +=cut + +sub get_members { + my $self = shift; + my $family = shift; + + my $defs = File::Spec->catfile($self->get_path(), $self->genefamily_defs_file()); + + print STDERR "Getting member info for family $family from file $defs\n"; + + open(my $F, "<", $defs) || die "Can't open gene families definition file at $defs"; + + my @all_members; + while(<$F>) { + chomp; + + my ($family_name, @members) = split/\t/; + + if ($family_name eq $family) { + foreach my $m (@members) { + + my @species_members = split/\,/, $m; + foreach my $id (@species_members) { + $id = 'build()."/$family/$id".'">'.$id.""; + } + @all_members = (@all_members, @species_members); + } + } + } + return \@all_members; +} + +=head2 get_available_builds + + Usage: my @ds = SGN::Genefamily->get_available_builds($DIR) + Desc: a class function that returns the available builds + Ret: a list of build names + Args: the $DIR where the builds are located. Side Effects: Example: =cut -sub get_available_datasets { +sub get_available_builds { my $class = shift; my $path = shift; - my @dirs = map { basename($_) } grep -d, glob "$path/*"; + my @dirs = map { basename($_) } grep -d, glob $path."/*"; return @dirs; } sub get_path { my $self = shift; - return catfile( $self->files_dir(), $self->dataset() ); + return catfile( $self->files_dir(), $self->build() ); +} + +sub table { + my $self = shift; + + my $plugin = $self->genefamily_format(); + $self->load_plugin($plugin); + my $table = $self->get_data($self->build()); + + return $table; } + + 1; diff --git a/lib/SGN/Genefamily/Plugin/Orthofinder.pm b/lib/SGN/Genefamily/Plugin/Orthofinder.pm new file mode 100644 index 0000000000..3d2b670ed7 --- /dev/null +++ b/lib/SGN/Genefamily/Plugin/Orthofinder.pm @@ -0,0 +1,58 @@ + +package SGN::Genefamily::Plugin::Orthofinder; + +use Moose::Role; + +sub get_data { + my $self = shift; + + my $build = shift; + + my $genefamily_definition_file = $self->files_dir()."/$build/genefamily_defs.txt"; + + print STDERR "Working with definition file at $genefamily_definition_file\n"; + open(my $F, "<", $genefamily_definition_file) || die "Can't find gene family definition file"; + + my $header = <$F>; + chomp($header); + + my @species = split /\t/, $header; + + my @table; + while (<$F>) { + chomp; + my ($orthogroup, @per_species_members) = split/\t/; + + + + + my $orthogroup_link = qq | $orthogroup | ; + my $sequence_link = qq | seqs |; + + my $alignment_link = "alignment"; + if ( -e $self->files_dir()."/$build/alignments/$orthogroup.aln" ) { + $alignment_link = qq | alignment |; + } + + my $tree_link = "tree"; + + if ( -e $self->files_dir()."/$build/trees/$orthogroup.tree") { + $tree_link = qq | tree |; + } + + + my @all_members; + for (my $species =1; $species< @per_species_members; $species++) { + my @members = split /\,/, $per_species_members[$species]; + ## maybe add a link here later for each member + @all_members = (@all_members, @members); + } + my $members = join(",", @all_members); + push @table, [$orthogroup_link, $sequence_link, $alignment_link, $tree_link, scalar(@all_members)." members", $members]; + } + + return \@table; +} + + +1; diff --git a/lib/SGN/Genefamily/Plugin/Orthomcl.pm b/lib/SGN/Genefamily/Plugin/Orthomcl.pm new file mode 100644 index 0000000000..f4c193b7a6 --- /dev/null +++ b/lib/SGN/Genefamily/Plugin/Orthomcl.pm @@ -0,0 +1,34 @@ + +package SGN::Genefamily::Plugin::Orthomcl; + +use Moose::Role; + +sub get_data { + my $self = shift; + + my $build = $self->build(); + + open(my $F, "<", $self->files_dir()."/$build/genefamily_defs") || die "Can't find gene family definition file"; + + my $header = <$F>; + chomp($header); + + my @table; + while (<$F>) { + chomp; + my ($orthogroup, $per_species_members) = split/\t/; + my $sequence_link = qq | seqs |; + my $alignment_link = qq | alignment |; + my $tree = qq | tree |; + + my @all_members = split /\s+/, $per_species_members; + + my $members = join(",", @all_members); + + push @table, [$orthogroup, $sequence_link, $alignment_link, $tree, $members]; + } + return \@table; +} + + +1; diff --git a/mason/tools/genefamily/details.mas b/mason/tools/genefamily/details.mas new file mode 100644 index 0000000000..3ae2e2a160 --- /dev/null +++ b/mason/tools/genefamily/details.mas @@ -0,0 +1,63 @@ + +<%args> +$genefamily_id +$members => undef +$member_count => undef +$annot_data => undef +$errors => undef +$tree_data => undef +$tree_link_disabled => undef +$fasta_data => undef +$seq_data => undef +$align_link_disabled => undef + +%args> + + +
Family: <% $genefamily_id %> | <% $annot_data %> |
Note: <% $errors %> |
View | ++ + | + ++ + | + + + +
+Sequences in fasta: | + + + |
+ <% $fasta_data %>+ |
+<% $fasta %> +diff --git a/mason/tools/genefamily/index.mas b/mason/tools/genefamily/index.mas index 30eb7e6474..07440a6c51 100644 --- a/mason/tools/genefamily/index.mas +++ b/mason/tools/genefamily/index.mas @@ -3,18 +3,14 @@ %doc> -<& /page/page_title.mas, title=>"Tomato Genefamily Annotation Resources" &> - -
This is a temporary interface to make the gene families from the tomato genome sequencing project available to gene family curators.
-You can search in different gene family builds, both for a specific family, and for a gene family member. You can then view the fasta, alignments, and trees, if available.
-More functionality will be added later.
-To continue, you need to log in with a special username and password that was provided to you.
+<& /page/page_title.mas, title=>"Genefamily Annotation" &> +Here you can search in different gene family builds, both for a specific family, and for a gene family member. You can then view the fasta, alignments, and trees, if available.
Family: <% $genefamily_id %> | <% $annot_data %> |
Note: <% $errors %> |
View | -- - | - -- - | - - - -
-Sequences in fasta: | - - - |
- <% $fasta_data %>- |