diff --git a/src/main/java/edu/ucsd/sbrg/bigg/annotation/ReactionAnnotation.java b/src/main/java/edu/ucsd/sbrg/bigg/annotation/ReactionAnnotation.java index bf8ef2ed..32a1a363 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/annotation/ReactionAnnotation.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/annotation/ReactionAnnotation.java @@ -6,6 +6,7 @@ import edu.ucsd.sbrg.bigg.polishing.PolishingUtils; import edu.ucsd.sbrg.db.BiGGDB; import edu.ucsd.sbrg.db.QueryOnce; +import edu.ucsd.sbrg.miriam.Registry; import edu.ucsd.sbrg.util.GPRParser; import edu.ucsd.sbrg.util.SBMLUtils; import org.sbml.jsbml.CVTerm.Qualifier; @@ -20,9 +21,6 @@ import java.util.logging.Logger; import java.util.stream.Collectors; -import static edu.ucsd.sbrg.bigg.annotation.BiGGAnnotation.getBiGGIdFromResources; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.TYPE_REACTION; - public class ReactionAnnotation extends CVTermAnnotation { /** @@ -64,7 +62,6 @@ public void annotate() { }); } - /** * Checks if {@link Species#getId()} returns a correct {@link BiGGId} and tries to retrieve a corresponding * {@link BiGGId} based on annotations present. @@ -77,20 +74,52 @@ public Optional checkId() { String id = reaction.getId(); // extracting BiGGId if not present for species boolean isBiGGid = id.matches("^(R_)?([a-zA-Z][a-zA-Z0-9_]+)(?:_([a-z][a-z0-9]?))?(?:_([A-Z][A-Z0-9]?))?$") - && QueryOnce.isReaction(id); + && QueryOnce.isReaction(id); if (!isBiGGid) { // Flatten all resources for all CVTerms into a list - List resources = - reaction.getAnnotation().getListOfCVTerms().stream().filter(cvTerm -> cvTerm.getQualifier() == Qualifier.BQB_IS) - .flatMap(term -> term.getResources().stream()).collect(Collectors.toList()); - if (!resources.isEmpty()) { - // update id if we found something - id = getBiGGIdFromResources(resources, TYPE_REACTION).orElse(id); - } + Set ids = reaction.getAnnotation().getListOfCVTerms() + .stream() + .filter(cvTerm -> cvTerm.getQualifier() == Qualifier.BQB_IS) + .flatMap(term -> term.getResources().stream()) + .map(Registry::checkResourceUrl) + .flatMap(Optional::stream) + .map(Registry::getPartsFromIdentifiersURI) + .map(parts -> { + String prefix = parts.get(0); + String synonymId = parts.get(1); + return BiGGDB.getBiggIdsForReactionForeignId(prefix, synonymId); + }) + .flatMap(Collection::stream) + .filter(this::matchingCompartments) + .map(fr -> fr.reactionId) + .collect(Collectors.toSet()); + id = ids.stream() + .findFirst() + .orElse(id); } return BiGGId.createReactionId(id); } + private boolean matchingCompartments(BiGGDB.ForeignReaction foreignReaction) { + if (!reaction.isSetCompartment() + && null == foreignReaction.compartmentId + && null == foreignReaction.compartmentName) { + return true; + } else if (!reaction.isSetCompartment() + && (null != foreignReaction.compartmentId + || null != foreignReaction.compartmentName)) { + return false; + } else if (reaction.isSetCompartment()) { + return reaction.getCompartment() + .equals(foreignReaction.compartmentId); + } else if (reaction.isSetCompartmentInstance() + && reaction.getCompartmentInstance().isSetName()) { + return reaction.getCompartmentInstance().getName() + .equals(foreignReaction.compartmentName); + } else + return false; + } + /** * @param biggId diff --git a/src/main/java/edu/ucsd/sbrg/db/BiGGDB.java b/src/main/java/edu/ucsd/sbrg/db/BiGGDB.java index a7857f01..464e8185 100644 --- a/src/main/java/edu/ucsd/sbrg/db/BiGGDB.java +++ b/src/main/java/edu/ucsd/sbrg/db/BiGGDB.java @@ -14,81 +14,23 @@ */ package edu.ucsd.sbrg.db; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.TYPE_GENE_PRODUCT; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.TYPE_REACTION; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.TYPE_SPECIES; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.ACCESSION_VALUE; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.BIGG_ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.CHARGE; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.COMPARTMENTALIZED_COMPONENT_ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.COMPARTMENT_ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.COMPONENT_ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.DATA_SOURCE_ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.DATE_TIME; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.FORMULA; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.GENE_REACTION_RULE; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.GENOME_ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.LOCUS_TAG; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.MODEL_ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.NAME; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.OME_ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.ORGANISM; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.PSEUDOREACTION; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.PUBLICATION_ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.REACTION_ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.REFERENCE_ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.REFERENCE_TYPE; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.SUBSYSTEM; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.SYNONYM_COL; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.TAXON_ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.TYPE; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.COMPARTMENT; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.COMPARTMENTALIZED_COMPONENT; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.COMPONENT; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.DATABASE_VERSION; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.DATA_SOURCE; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.GENE; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.GENOME; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.GENOME_REGION; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.MCC; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.MODEL; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.MODEL_REACTION; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.OLD_BIGG_ID; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.PUBLICATION; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.PUBLICATION_MODEL; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.REACTION; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.REFSEQ_NAME; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.REFSEQ_PATTERN; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.SYNONYM; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.URL; -import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.URL_PREFIX; -import static java.text.MessageFormat.format; -import static org.sbml.jsbml.util.Pair.pairOf; +import de.zbit.util.ResourceManager; +import de.zbit.util.Utils; +import edu.ucsd.sbrg.bigg.BiGGId; +import edu.ucsd.sbrg.miriam.Registry; +import org.sbml.jsbml.util.Pair; -import java.sql.Connection; import java.sql.Date; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.LinkedHashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Optional; -import java.util.ResourceBundle; -import java.util.Set; -import java.util.TreeSet; +import java.sql.*; +import java.util.*; import java.util.logging.Logger; import java.util.stream.Collectors; -import org.sbml.jsbml.util.Pair; - -import de.zbit.util.ResourceManager; -import de.zbit.util.Utils; -import edu.ucsd.sbrg.bigg.BiGGId; -import edu.ucsd.sbrg.miriam.Registry; +import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Column.*; +import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.*; +import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.Table.*; +import static java.text.MessageFormat.format; +import static org.sbml.jsbml.util.Pair.pairOf; /** * @author Andreas Dräger @@ -757,4 +699,59 @@ public static Optional getBiggIdFromSynonym(String dataSourceId, String return Optional.empty(); } } + + + public static class ForeignReaction { + public final String reactionId; + public final String compartmentId; + public final String compartmentName; + + public ForeignReaction(String reactionId, String compartmentId, String compartmentName) { + this.reactionId = reactionId; + this.compartmentId = compartmentId; + this.compartmentName = compartmentName; + } + } + + /** + * @param synonym + * @param dataSourceId + * @return String + */ + public static Collection getBiggIdsForReactionForeignId(String dataSourceId, + String synonym) { + Set results = new HashSet<>(); + + var query = "SELECT R.BIGG_ID AS REACTION_BIGG_ID, " + + "C.BIGG_ID AS COMPARTMENT_BIGG_ID, " + + "C.NAME AS COMPARTMENT_NAME " + + "FROM REACTION R " + + "left join REACTION_MATRIX RM " + + "on RM.REACTION_ID = R.ID " + + "left join COMPARTMENTALIZED_COMPONENT CC " + + "on RM.COMPARTMENTALIZED_COMPONENT_ID = CC.ID " + + "left join COMPARTMENT C " + + "on CC.COMPARTMENT_ID = C.ID " + + "join synonym s " + + "on synonym = ? and r.id = s.ome_id " + + "join data_source d " + + "on s.data_source_id = d.id and d.bigg_id = ?"; + + try (var connection = connector.getConnection()) { + var pStatement = connection.prepareStatement(query); + pStatement.setString(1, synonym); + pStatement.setString(2, dataSourceId); + var resultSet = pStatement.executeQuery(); + while (resultSet.next()) { + var reactionBiggId = resultSet.getString(1); + var compartmentBiggId = resultSet.getString(2); + var compartmentName = resultSet.getString(3); + var r = new ForeignReaction(reactionBiggId, compartmentBiggId, compartmentName); + results.add(r); + } + } catch (SQLException exc) { + logger.warning(Utils.getMessage(exc)); + } + return results; + } } diff --git a/src/test/java/edu/ucsd/sbrg/bigg/annotation/ReactionAnnotationTest.java b/src/test/java/edu/ucsd/sbrg/bigg/annotation/ReactionAnnotationTest.java index b761a7f9..ad4e264d 100644 --- a/src/test/java/edu/ucsd/sbrg/bigg/annotation/ReactionAnnotationTest.java +++ b/src/test/java/edu/ucsd/sbrg/bigg/annotation/ReactionAnnotationTest.java @@ -1,5 +1,6 @@ package edu.ucsd.sbrg.bigg.annotation; +import edu.ucsd.sbrg.bigg.ModelPolisherOptions; import org.junit.jupiter.api.Test; import org.sbml.jsbml.CVTerm; import org.sbml.jsbml.Model; @@ -10,6 +11,7 @@ import org.sbml.jsbml.ext.groups.GroupsModelPlugin; import org.sbml.jsbml.ext.groups.Member; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -20,22 +22,26 @@ public class ReactionAnnotationTest extends BiGGDBTest { @Test public void getBiGGIdFromResourcesTest() { - initParameters(); + initParameters(Map.of(ModelPolisherOptions.INCLUDE_ANY_URI.getOptionName(), + "true")); var m = new Model("iJO1366", 3, 2); var r1 = m.createReaction("some_name"); var r2 = m.createReaction("some_other_name"); var r3 = m.createReaction("some_third_name"); + r1.setCompartment("m"); r1.addCVTerm(new CVTerm( CVTerm.Type.BIOLOGICAL_QUALIFIER, CVTerm.Qualifier.BQB_IS, "http://identifiers.org/biocyc/META:ACETATEKIN-RXN")); + r2.setCompartment("e"); r2.addCVTerm(new CVTerm( CVTerm.Type.BIOLOGICAL_QUALIFIER, CVTerm.Qualifier.BQB_IS, "http://identifiers.org/metanetx.reaction/MNXR103371")); + r3.setCompartment("c"); r3.addCVTerm(new CVTerm( CVTerm.Type.BIOLOGICAL_QUALIFIER, CVTerm.Qualifier.BQB_IS, @@ -44,18 +50,21 @@ public void getBiGGIdFromResourcesTest() { var gPlugin = (GroupsModelPlugin) m.getPlugin(GroupsConstants.shortLabel); assertEquals(0, gPlugin.getGroupCount()); - new ReactionAnnotation(r1).annotate(); + new ReactionAnnotation(r1).annotate(); new ReactionAnnotation(r2).annotate(); new ReactionAnnotation(r3).annotate(); var r1FbcPlugin = (FBCReactionPlugin) r1.getPlugin(FBCConstants.shortLabel); var gpa1 = r1FbcPlugin.getGeneProductAssociation(); assertNull(gpa1); - assertEquals(false, r1.isSetCompartment()); - assertEquals("", r1.getName()); + assertEquals("Acetate kinase, mitochondrial", r1.getName()); assertEquals(1, r1.getCVTermCount()); - assertEquals(1, r1.getCVTerm(0).getNumResources()); + assertEquals(11, r1.getCVTerm(0).getNumResources()); + var r2FbcPlugin = (FBCReactionPlugin) r2.getPlugin(FBCConstants.shortLabel); + var gpa2 = r2FbcPlugin.getGeneProductAssociation(); + assertNull(gpa2); + assertEquals("", r2.getName()); assertEquals(1, r2.getCVTermCount()); assertEquals(1, r2.getCVTerm(0).getNumResources()); @@ -63,8 +72,7 @@ public void getBiGGIdFromResourcesTest() { var gpa3 = r3FbcPlugin.getGeneProductAssociation(); assertNotNull(gpa3); assertEquals("G_b2388", ((GeneProductRef) gpa3.getAssociation()).getGeneProduct()); - assertEquals(false, r1.isSetCompartment()); - assertEquals("", r1.getName()); + assertEquals("Hexokinase (D-glucose:ATP)", r3.getName()); assertEquals(1, r3.getCVTermCount()); assertEquals(11, r3.getCVTerm(0).getNumResources());