diff --git a/lib/SGN/Controller/AJAX/Genefamily.pm b/lib/SGN/Controller/AJAX/Genefamily.pm new file mode 100644 index 0000000000..d42357e37b --- /dev/null +++ b/lib/SGN/Controller/AJAX/Genefamily.pm @@ -0,0 +1,26 @@ + +package SGN::Controller::AJAX::Genefamily; + +use Moose; +use SGN::Genefamily; + +BEGIN { extends 'Catalyst::Controller::REST'; } + +sub browse_families_table :Path('/ajax/tools/genefamily/table') Args(0) { + my $self = shift; + my $c = shift; + + my $build = $c->req->param("build"); + + my $genefamily_dir = $c->config->{genefamily_dir}; + my $genefamily_format = $c->config->{genefamily_format}; + + my $gf = SGN::Genefamily->new( { files_dir => $genefamily_dir, genefamily_format => $genefamily_format, build => $build }); + + my $data_ref = $gf -> table(); + + $c->stash->{rest} = { data => $data_ref }; + +} + +1; diff --git a/lib/SGN/Controller/Genefamily.pm b/lib/SGN/Controller/Genefamily.pm new file mode 100644 index 0000000000..054c0c0b20 --- /dev/null +++ b/lib/SGN/Controller/Genefamily.pm @@ -0,0 +1,187 @@ + +package SGN::Controller::Genefamily; + +use Moose; + +use Data::Dumper; + +BEGIN { extends 'Catalyst::Controller'; } + +sub genefamily_index :Path('/tools/genefamily') Args(0) { + my $self = shift; + my $c = shift; + + $c->stash->{template} = '/tools/genefamily/index.mas'; +} + + +sub search : Path('/tools/genefamily/search') Args(0) { + my $self = shift; + my $c = shift; + +# if ($c->user()) { +# if (grep(/curator|genefamily_editor/, $c->user->get_object()->get_roles() )) { + + $c->stash->{genefamily_id} = $c->req->param("genefamily_id") || ''; + $c->stash->{build} = $c->req->param("build") || ''; + $c->stash->{member_id} = $c->req->param("member_id") || ''; + $c->stash->{action} = $c->req->param("action") || ''; + + $c->stash->{template} = '/tools/genefamily/search.mas'; +# } +# else { +# $c->stash->{message} = "You do not have the necessary privileges to access this page."; +# $c->stash->{template} = "/generic_message.mas"; +# } + + # } + # else { +# $c->stash->{message} = "You need to be logged in to access this page."; +# $c->stash->{template} = "/generic_message.mas"; + # } + +} + +sub sequence_details :Path('/tools/genefamily/seq') Args(3) { + my $self = shift; + my $c = shift; + my $build = shift; + my $family = shift; + my $sequence = shift; + + my $gf = SGN::Genefamily->new( + name => $family, + build => $build, + files_dir => $c->config()->{genefamily_dir}, + ); + + print STDERR "Trying to locate sequence $sequence\n"; + + my $seq_info = $gf->get_sequence($sequence); + + my ($seq_id, $desc, $seq) = @$seq_info; + $c->stash->{build} = $build; + $c->stash->{family} = $family; + $c->stash->{seq_id} = $seq_id; + $c->stash->{desc} = $desc || "[ no description provided ]"; + $c->stash->{seq} = $seq; + + $c->stash->{template} = '/tools/genefamily/sequence.mas'; + + +} + +sub get_family_fasta :Path('/tools/genefamily/fasta/') Args(2) { + my $self = shift; + my $c = shift; + my $build = shift; + my $family = shift; + + my $gf = SGN::Genefamily->new( + name => $family, + build => $build, + files_dir => $c->config()->{genefamily_dir}, + ); + + my $fasta_seq = $gf -> get_fasta(); + + $c->stash->{build} = $build; + $c->stash->{family} = $family; + $c->stash->{fasta} = $fasta_seq; + + $c->stash->{template} = '/tools/genefamily/fasta.mas'; +} + +sub genefamily_details :Path('/tools/genefamily/details') Args(2) { + my $self = shift; + my $c = shift; + my $build = shift; + my $family = shift; + + my $gf = SGN::Genefamily->new( + name => $family, + build => $build, + files_dir => $c->config()->{genefamily_dir}, + ); + + my $seq_data =""; + my $fasta_data = ""; + my $tree_data = ""; + my $annot_data = ""; + my $exp_data = ""; + + my $align_link_disabled = ""; + my $fasta_link_disabled = ""; + my $tree_link_disabled = ""; + my $exp_link_disabled = ""; + + my $errors = ""; + my $big_errors = 0; + + if (!$family) { + $c->stash->{template} = '/generic_message.mas'; + $c->stash->{message} = 'Need a family to display!'; + return; + } + + $c->stash->{genefamily_id} = $family; + + my $members = $gf->get_members($family); + + print STDERR "Members: ".Dumper($members); + + $c->stash->{member_count} = scalar(@$members); + $c->stash->{members} = join(", ", @$members); + + + eval { + $c->stash->{seq_data} = $gf->get_alignment(); + }; + + if ($@) { + $errors .= "Alignment data not available. "; + $big_errors++; + $align_link_disabled="disabled"; + } + + eval { + $c->stash->{fasta_data} = $gf->get_fasta(); + + }; + + if ($@) { + $errors .= "Sequence data not available. "; + $big_errors++; + $fasta_link_disabled = "disabled"; + } + eval { + $c->stash->{tree_data} = $gf->get_tree(); + }; + if ($@) { + $errors .= "Tree data not available. "; + $c->stash->{tree_link_disabled} = "disabled" + } + eval { + $c->stash->{annot_data} = $gf->get_annotation(); + }; + if ($@) { + $errors .= "Annotation data not available. "; + $c->stash->{annot_data} = "(No annotation data available)"; + } + eval { + $errors .= "Expression data not available. "; + $c->stash->{exp_data} = $gf->get_expression(); + }; + if ($@) { + $c->stash->{exp_link_disabled} = "disabled"; + } + + if ($big_errors > 0) { + $errors = "This family does not seem to exist!\n"; + } + + $c->stash->{template} = '/tools/genefamily/details.mas'; + +} + +1; diff --git a/lib/SGN/Genefamily.pm b/lib/SGN/Genefamily.pm index 1588ffca1a..9dcb32675e 100644 --- a/lib/SGN/Genefamily.pm +++ b/lib/SGN/Genefamily.pm @@ -20,11 +20,38 @@ Methods in this class include: package SGN::Genefamily; use Moose; + +with 'MooseX::Object::Pluggable'; + use namespace::autoclean; +use Data::Dumper; use File::Slurp qw/slurp/; +use File::Spec qw | catfile |; use File::Spec::Functions; use File::Basename qw/basename/; +=head2 accessors genefamily_method() + +=cut + +has 'genefamily_format' => ( + is => 'rw', + isa => 'Str', + ); + +has 'genefamily_defs_file' => ( + is => 'rw', + isa => 'Str', + default => sub { return 'genefamily_defs.txt' }, + ); + +has 'sequence_link' => ( + is => 'rw', + isa => 'Str', + default => sub { return '/tools/genefamily/seq/'; } # add /$build/$family/$seq_id + ); + + =head2 accessors name() Usage: $gf->name() @@ -39,17 +66,17 @@ has 'name' => ( # required => 1, ); -=head2 members +# =head2 members - Usage: my @members = $gf->members() - Desc: retrieves the members of a genefamily. Read only. - Property: the members of the gene family - Side Effects: - Example: +# Usage: my @members = $gf->members() +# Desc: retrieves the members of a genefamily. Read only. +# Property: the members of the gene family +# Side Effects: +# Example: -=cut +# =cut -has 'members' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } ); +# has 'members' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } ); =head2 files_dir @@ -66,20 +93,20 @@ has 'files_dir' => ( required => 1, ); -=head2 dataset +=head2 build - Usage: my $d = $gf->dataset() + Usage: my $d = $gf->build() Desc: under the genefamily dir (files_dir), a number of sub-dirs should be present, each of which represents a separate gene family clustering (for example, based on different species or different clustering parameters). - Property: the dataset name [string] + Property: the build name [string] Side Effects: Example: =cut -has 'dataset' => ( +has 'build' => ( is => 'rw', required => 1, ); @@ -121,6 +148,8 @@ sub get_alignment { sub get_fasta { my $self = shift; my $file = catfile( $self->get_path(), "fasta", $self->name() . ".fa" ); + + print STDERR "Retrieving fasta file $file for family ".$self->name()."\n"; unless( -f $file ) { die "The fasta information for family " . $self->name() @@ -180,32 +209,96 @@ sub get_tree { return slurp($file); } -sub get_member_ids { +=head1 get_sequence + +=cut + +sub get_sequence { my $self = shift; + my $sequence = shift; + my $file = File::Spec->catfile($self->get_path(), 'fasta', $self->name().".fa"); + + my @seqs = (); + my $io = Bio::SeqIO->new( -format => 'fasta', -file => $file ); + + while (my $seq = $io->next_seq()) { + print STDERR "Now checking id ".$seq->id()." against search term $sequence\n"; + if ($seq->id() eq $sequence) { + return [ $seq->id(), $seq->desc(), $seq->seq() ] + } + } + return []; } -=head2 get_available_datasets - Usage: my @ds = SGN::Genefamily->get_available_datasets($DIR) - Desc: a class function that returns the available datasets - Ret: a list of dataset names - Args: the $DIR where the datasets are located. +=head1 get_members + +=cut + +sub get_members { + my $self = shift; + my $family = shift; + + my $defs = File::Spec->catfile($self->get_path(), $self->genefamily_defs_file()); + + print STDERR "Getting member info for family $family from file $defs\n"; + + open(my $F, "<", $defs) || die "Can't open gene families definition file at $defs"; + + my @all_members; + while(<$F>) { + chomp; + + my ($family_name, @members) = split/\t/; + + if ($family_name eq $family) { + foreach my $m (@members) { + + my @species_members = split/\,/, $m; + foreach my $id (@species_members) { + $id = 'build()."/$family/$id".'">'.$id.""; + } + @all_members = (@all_members, @species_members); + } + } + } + return \@all_members; +} + +=head2 get_available_builds + + Usage: my @ds = SGN::Genefamily->get_available_builds($DIR) + Desc: a class function that returns the available builds + Ret: a list of build names + Args: the $DIR where the builds are located. Side Effects: Example: =cut -sub get_available_datasets { +sub get_available_builds { my $class = shift; my $path = shift; - my @dirs = map { basename($_) } grep -d, glob "$path/*"; + my @dirs = map { basename($_) } grep -d, glob $path."/*"; return @dirs; } sub get_path { my $self = shift; - return catfile( $self->files_dir(), $self->dataset() ); + return catfile( $self->files_dir(), $self->build() ); +} + +sub table { + my $self = shift; + + my $plugin = $self->genefamily_format(); + $self->load_plugin($plugin); + my $table = $self->get_data($self->build()); + + return $table; } + + 1; diff --git a/lib/SGN/Genefamily/Plugin/Orthofinder.pm b/lib/SGN/Genefamily/Plugin/Orthofinder.pm new file mode 100644 index 0000000000..3d2b670ed7 --- /dev/null +++ b/lib/SGN/Genefamily/Plugin/Orthofinder.pm @@ -0,0 +1,58 @@ + +package SGN::Genefamily::Plugin::Orthofinder; + +use Moose::Role; + +sub get_data { + my $self = shift; + + my $build = shift; + + my $genefamily_definition_file = $self->files_dir()."/$build/genefamily_defs.txt"; + + print STDERR "Working with definition file at $genefamily_definition_file\n"; + open(my $F, "<", $genefamily_definition_file) || die "Can't find gene family definition file"; + + my $header = <$F>; + chomp($header); + + my @species = split /\t/, $header; + + my @table; + while (<$F>) { + chomp; + my ($orthogroup, @per_species_members) = split/\t/; + + + + + my $orthogroup_link = qq | $orthogroup | ; + my $sequence_link = qq | seqs |; + + my $alignment_link = "alignment"; + if ( -e $self->files_dir()."/$build/alignments/$orthogroup.aln" ) { + $alignment_link = qq | alignment |; + } + + my $tree_link = "tree"; + + if ( -e $self->files_dir()."/$build/trees/$orthogroup.tree") { + $tree_link = qq | tree |; + } + + + my @all_members; + for (my $species =1; $species< @per_species_members; $species++) { + my @members = split /\,/, $per_species_members[$species]; + ## maybe add a link here later for each member + @all_members = (@all_members, @members); + } + my $members = join(",", @all_members); + push @table, [$orthogroup_link, $sequence_link, $alignment_link, $tree_link, scalar(@all_members)." members", $members]; + } + + return \@table; +} + + +1; diff --git a/lib/SGN/Genefamily/Plugin/Orthomcl.pm b/lib/SGN/Genefamily/Plugin/Orthomcl.pm new file mode 100644 index 0000000000..f4c193b7a6 --- /dev/null +++ b/lib/SGN/Genefamily/Plugin/Orthomcl.pm @@ -0,0 +1,34 @@ + +package SGN::Genefamily::Plugin::Orthomcl; + +use Moose::Role; + +sub get_data { + my $self = shift; + + my $build = $self->build(); + + open(my $F, "<", $self->files_dir()."/$build/genefamily_defs") || die "Can't find gene family definition file"; + + my $header = <$F>; + chomp($header); + + my @table; + while (<$F>) { + chomp; + my ($orthogroup, $per_species_members) = split/\t/; + my $sequence_link = qq | seqs |; + my $alignment_link = qq | alignment |; + my $tree = qq | tree |; + + my @all_members = split /\s+/, $per_species_members; + + my $members = join(",", @all_members); + + push @table, [$orthogroup, $sequence_link, $alignment_link, $tree, $members]; + } + return \@table; +} + + +1; diff --git a/mason/tools/genefamily/details.mas b/mason/tools/genefamily/details.mas new file mode 100644 index 0000000000..3ae2e2a160 --- /dev/null +++ b/mason/tools/genefamily/details.mas @@ -0,0 +1,63 @@ + +<%args> +$genefamily_id +$members => undef +$member_count => undef +$annot_data => undef +$errors => undef +$tree_data => undef +$tree_link_disabled => undef +$fasta_data => undef +$seq_data => undef +$align_link_disabled => undef + + + + +

Family detail for family <% $genefamily_id %>

+ + +
Family: <% $genefamily_id %><% $annot_data %>
Note: <% $errors %>
+ +

Gene family members

+Member count: <% $member_count %>
+
+ <% $members %> +
+ + +
+ + + + + + + + +
View +
+ + /> + +
+
+
+ + /> +
+
+
+ +
+Sequences in fasta: +
+ + + +
+ +
+
<% $fasta_data %>
+
+ diff --git a/mason/tools/genefamily/fasta.mas b/mason/tools/genefamily/fasta.mas new file mode 100644 index 0000000000..6bed465fff --- /dev/null +++ b/mason/tools/genefamily/fasta.mas @@ -0,0 +1,12 @@ + +<%args> + +$build +$family +$fasta + + + +
+<% $fasta %>
+
diff --git a/mason/tools/genefamily/index.mas b/mason/tools/genefamily/index.mas index 30eb7e6474..07440a6c51 100644 --- a/mason/tools/genefamily/index.mas +++ b/mason/tools/genefamily/index.mas @@ -3,18 +3,14 @@ -<& /page/page_title.mas, title=>"Tomato Genefamily Annotation Resources" &> - -

This is a temporary interface to make the gene families from the tomato genome sequencing project available to gene family curators.

-

You can search in different gene family builds, both for a specific family, and for a gene family member. You can then view the fasta, alignments, and trees, if available.

-

More functionality will be added later.

-

To continue, you need to log in with a special username and password that was provided to you.

+<& /page/page_title.mas, title=>"Genefamily Annotation" &> +

Here you can search in different gene family builds, both for a specific family, and for a gene family member. You can then view the fasta, alignments, and trees, if available.


-Search gene families

+Search gene families

diff --git a/mason/tools/genefamily/search.mas b/mason/tools/genefamily/search.mas index 6e2c0d49c5..756be8743a 100644 --- a/mason/tools/genefamily/search.mas +++ b/mason/tools/genefamily/search.mas @@ -4,7 +4,7 @@ <%args> -$dataset +$build => "dummy_build_test" $genefamily_id => undef $member_id => undef $action => undef @@ -12,6 +12,9 @@ $action => undef <& /page/page_title.mas, title=>'Gene family search' &> + +

Browse Genefamilies

+ <%perl> use SGN::Genefamily; @@ -21,188 +24,57 @@ my $DIR = $c->get_conf('genefamily_dir'); # '/home/mueller/dutch_tomato_assembl if (!$action) { $action = "input"; } if ($genefamily_id eq '') { $genefamily_id=0; } -if ($action eq 'detail') { - my $gf = SGN::Genefamily->new( - name => "ORTHOMCL$genefamily_id", - dataset => $dataset, - files_dir => $DIR, - ); - - my $seq_data =""; - my $fasta_data = ""; - my $tree_data = ""; - my $annot_data = ""; - my $exp_data = ""; - - my $align_link_disabled = ""; - my $fasta_link_disabled = ""; - my $tree_link_disabled = ""; - my $exp_link_disabled = ""; - - my $errors = ""; - my $big_errors = 0; - eval { - $seq_data = $gf->get_alignment(); - }; - if ($@) { - $errors .= "Alignment data not available. "; - $big_errors++; - $align_link_disabled="disabled"; - } - eval { - $fasta_data = $gf->get_fasta(); - - }; - if ($@) { - $errors .= "Sequence data not available. "; - $big_errors++; - $fasta_link_disabled = "disabled"; - } - eval { - $tree_data = $gf->get_tree(); - }; - if ($@) { - $errors .= "Tree data not available. "; - $tree_link_disabled = "disabled" - } - eval { - $annot_data = $gf->get_annotation(); - }; - if ($@) { - $errors .= "Annotation data not available. "; - $annot_data = "(No annotation data available)"; - } - eval { - $errors .= "Expression data not available. "; - $exp_data = $gf->get_expression(); - }; - if ($@) { - $exp_link_disabled = "disabled"; - } - - if ($big_errors > 0) { - $errors = "This family does not seem to exist!\n"; - } - - -

Family detail for family <% $genefamily_id %>

- - -
Family: <% $genefamily_id %><% $annot_data %>
Note: <% $errors %>
- -
- - - - - - - - -
View -
- - /> - -
-
-
- - /> -
-
-
- -
-Sequences in fasta: -
- - - -
- -
-
<% $fasta_data %>
-
- -<%perl> -} +my @builds = SGN::Genefamily->get_available_builds($DIR); -if ($action eq 'search' && $member_id) { - die 'must provide dataset' unless $dataset; - my $member_file = catfile($DIR,$dataset,'genefamily_defs.txt'); - open (my $F, "<", $member_file) || die "can't open family file $member_file"; - my $family_nr = 0; - my $found = 0; - while(<$F>) { - - if ($_=~/\b$member_id\b/i) { - $found=1; - last(); - } - $family_nr++; - } - - - if ($found) { - - -
- In dataset <% $dataset %>, sequence <% $member_id %> is in family <% $family_nr %>. - - - - -
- -% } -% else { - <% $member_id %> was not found in any family. -% return; -% } -%} - -<%perl> -my @datasets = SGN::Genefamily->get_available_datasets($DIR); - -my $select = ''; my $selected = ""; -foreach my $d (@datasets) { - if ($d eq $dataset) { $selected="selected=\"selected\" "; } +foreach my $d (@builds) { + if ($d eq $build) { $selected="selected=\"selected\" "; } else { $selected= ""; } $select .= qq | |; } $select .= ""; -if ($action eq 'input') { +print $select."

"; - - - - - - - -
If you know the ID of your gene family, enter it below.
Genefamily id -
- (a number) - - <% $select %> - -
-
-OR-
if you know a member ID, search for the corresponding family:
Member id -
- (e.g. At1g01060) - - <% $select %> - -
-
- -% } + + + + + + + +
Genefamily IDSequencesAlignmentsTreesMember CountMembers
+ + + + + + + + + + <%init> use File::Spec::Functions; diff --git a/mason/tools/genefamily/sequence.mas b/mason/tools/genefamily/sequence.mas new file mode 100644 index 0000000000..5b84b7363a --- /dev/null +++ b/mason/tools/genefamily/sequence.mas @@ -0,0 +1,37 @@ + +<%args> +$seq_id +$build +$family +$desc => undef +$seq => undef + + + + +

Sequence Details

+ +

Identifier

+ +<% $seq_id %> + +

Viewing Build

+ +<% $build %> + +

Family

+ +<% $family %> + +

Description

+ +<% $desc %> + +

Sequence

+ +Length: <% length($seq) %> + +
+% $seq =~ s/(.{60})/$1\n/g;
+<% $seq %>
+
diff --git a/sgn.conf b/sgn.conf index 8b2837c05d..7a7932ea99 100644 --- a/sgn.conf +++ b/sgn.conf @@ -271,6 +271,7 @@ cookie_encryption_key bo9yie2JeeVee6ouAhch9aomeesieJ3iShae8aa8 # # where the genefamily info is stored genefamily_dir /export/prod/private/genomes/genefamily/ +genefamily_format Orthomcl # or orthofinder # ## diff --git a/t/unit_fixture/SGN/genefamily.t b/t/unit_fixture/SGN/genefamily.t index 26627d112d..85e1ee6f62 100644 --- a/t/unit_fixture/SGN/genefamily.t +++ b/t/unit_fixture/SGN/genefamily.t @@ -8,13 +8,14 @@ use File::Spec::Functions; use_ok('SGN::Genefamily'); my $test_dir = 't/data/genefamily_data'; + my $gf = SGN::Genefamily->new( files_dir => $test_dir, - dataset => 'test', + build => 'test', name => 'family_0' ); is($gf->name(), "family_0", "name test"); -is($gf->dataset(), "test", "dataset test"); +is($gf->build(), "test", "dataset test"); is($gf->files_dir(), $test_dir, "files_dir test"); is($gf->get_path(), catdir($test_dir,'test'), "get_path test"); my $aln = $gf->get_alignment();