From e65d3a49c952aaf4d6534e0d6aebbb014c6a6a3b Mon Sep 17 00:00:00 2001 From: Marcello Costamagna Date: Thu, 3 Nov 2022 15:56:15 +0100 Subject: [PATCH 1/5] adds SMILESListFormat reading functionality --- .../ParallelFragmentationAlgorithm.java | 13 +- .../io/IteractingAtomContainerReader.java | 19 ++- .../java/denoptim/io/SMILESListFormat.java | 80 ++++++++++++ .../denoptim/io/SMILESListFormatTest.java | 119 ++++++++++++++++++ 4 files changed, 220 insertions(+), 11 deletions(-) create mode 100644 src/main/java/denoptim/io/SMILESListFormat.java create mode 100644 src/test/java/denoptim/io/SMILESListFormatTest.java diff --git a/src/main/java/denoptim/fragmenter/ParallelFragmentationAlgorithm.java b/src/main/java/denoptim/fragmenter/ParallelFragmentationAlgorithm.java index bb6760852..53602395c 100644 --- a/src/main/java/denoptim/fragmenter/ParallelFragmentationAlgorithm.java +++ b/src/main/java/denoptim/fragmenter/ParallelFragmentationAlgorithm.java @@ -20,7 +20,6 @@ import java.io.File; import java.io.FileFilter; -import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; @@ -34,16 +33,15 @@ import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; -import org.openscience.cdk.DefaultChemObjectBuilder; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; -import org.openscience.cdk.io.iterator.IteratingSDFReader; import denoptim.constants.DENOPTIMConstants; import denoptim.exception.DENOPTIMException; import denoptim.files.FileFormat; import denoptim.io.DenoptimIO; +import denoptim.io.IteractingAtomContainerReader; import denoptim.programs.RunTimeParameters.ParametersType; import denoptim.programs.fragmenter.FragmenterParameters; import denoptim.task.ParallelAsynchronousTaskExecutor; @@ -296,13 +294,12 @@ static void splitInputForThreads(FragmenterParameters settings) int maxBuffersSize = 50000; int numBatches = settings.getNumTasks(); - IteratingSDFReader reader; + IteractingAtomContainerReader reader; try { - reader = new IteratingSDFReader( - new FileInputStream(settings.getStructuresFile()), - DefaultChemObjectBuilder.getInstance()); - } catch (FileNotFoundException e1) + reader = new IteractingAtomContainerReader( + new File(settings.getStructuresFile())); + } catch (IOException | CDKException e1) { // Cannot happen: we ensured the file exist, but it might have been // removed after the check diff --git a/src/main/java/denoptim/io/IteractingAtomContainerReader.java b/src/main/java/denoptim/io/IteractingAtomContainerReader.java index 5d10e2cf0..864a0203f 100644 --- a/src/main/java/denoptim/io/IteractingAtomContainerReader.java +++ b/src/main/java/denoptim/io/IteractingAtomContainerReader.java @@ -14,10 +14,14 @@ import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.io.FormatFactory; import org.openscience.cdk.io.formats.IChemFormat; +import org.openscience.cdk.io.formats.INChIPlainTextFormat; import org.openscience.cdk.io.formats.MDLV2000Format; import org.openscience.cdk.io.formats.MDLV3000Format; +import org.openscience.cdk.io.formats.SMILESFIXFormat; +import org.openscience.cdk.io.formats.SMILESFormat; import org.openscience.cdk.io.iterator.DefaultIteratingChemObjectReader; import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.io.iterator.IteratingSMILESReader; /** * An iterator that take {@link IAtomContainer}s from a file, possibly using @@ -69,7 +73,10 @@ public class IteractingAtomContainerReader implements Iterator public IteractingAtomContainerReader(File input) throws FileNotFoundException, IOException, CDKException { - IChemFormat chemFormat = new FormatFactory().guessFormat( + FormatFactory factory = new FormatFactory(); + factory.registerFormat(new SMILESListFormat()); + + IChemFormat chemFormat = factory.guessFormat( new BufferedReader(new FileReader(input))); if (chemFormat instanceof MDLV2000Format || chemFormat instanceof MDLV3000Format) @@ -78,10 +85,16 @@ public IteractingAtomContainerReader(File input) fileIterator = new IteratingSDFReader(fis, DefaultChemObjectBuilder.getInstance()); usingIteratingReader = true; - } else { + } else if (chemFormat instanceof SMILESListFormat) { + + FileInputStream fis = new FileInputStream(input); + fileIterator = new IteratingSMILESReader(fis, + DefaultChemObjectBuilder.getInstance()); + usingIteratingReader = true; + } else { results = DenoptimIO.readAllAtomContainers(input); listIterator = results.iterator(); - } + } } //------------------------------------------------------------------------------ diff --git a/src/main/java/denoptim/io/SMILESListFormat.java b/src/main/java/denoptim/io/SMILESListFormat.java new file mode 100644 index 000000000..55e394237 --- /dev/null +++ b/src/main/java/denoptim/io/SMILESListFormat.java @@ -0,0 +1,80 @@ +package denoptim.io; + +import java.util.List; +import org.openscience.cdk.io.formats.IChemFormatMatcher; +import org.openscience.cdk.io.formats.IChemFormatMatcher.MatchResult; +import org.openscience.cdk.tools.DataFeatures; + +/** + * Class for recognizing file containing a list of SMILES . + * One SMILES string in each line. Since SMILES do not contain spaces, absence + * of spaces in each line is the condition identifying a list of SMILES + * + * @author marcellocostamagna + * +*/ +public class SMILESListFormat implements IChemFormatMatcher +{ + @Override + public String getReaderClassName() + { + return null; + } + + @Override + public String getWriterClassName() + { + return null; + } + + @Override + public int getSupportedDataFeatures() + { + return DataFeatures.NONE; + } + + @Override + public int getRequiredDataFeatures() + { + return DataFeatures.NONE; + } + + @Override + public String getFormatName() + { + return "SMILES List"; + } + + @Override + public String getPreferredNameExtension() + { + return null; + } + + @Override + public String[] getNameExtensions() + { + return new String[0]; + } + + @Override + public String getMIMEType() + { + return "chemical/smiles"; + } + + @Override + public boolean isXMLBased() + { + return false; + } + + @Override + public final MatchResult matches(final List lines) { + for (int i = 0; i < Math.min(lines.size(), 100); i++) + { + if (lines.get(i).contains(" ")) return NO_MATCH; + } + return new MatchResult(true, this, lines.size()); + } +} diff --git a/src/test/java/denoptim/io/SMILESListFormatTest.java b/src/test/java/denoptim/io/SMILESListFormatTest.java new file mode 100644 index 000000000..65b11306b --- /dev/null +++ b/src/test/java/denoptim/io/SMILESListFormatTest.java @@ -0,0 +1,119 @@ +package denoptim.io; + +/* + * DENOPTIM + * Copyright (C) 2019 Vishwesh Venkatraman + * and Marco Foscato + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; + +import java.io.BufferedReader; +import java.io.File; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import javax.vecmath.Point3d; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.openscience.cdk.Atom; +import org.openscience.cdk.CDKConstants; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IChemObjectBuilder; +import org.openscience.cdk.io.formats.IChemFormatMatcher.MatchResult; +import org.openscience.cdk.silent.SilentChemObjectBuilder; + +import denoptim.constants.DENOPTIMConstants; +import denoptim.exception.DENOPTIMException; +import denoptim.files.FileFormat; +import denoptim.files.FileUtils; +import denoptim.files.UndetectedFileFormatException; +import denoptim.ga.EAUtils; +import denoptim.ga.Population; +import denoptim.graph.APClass; +import denoptim.graph.Candidate; +import denoptim.graph.CandidateLW; +import denoptim.graph.DGraph; +import denoptim.graph.Edge; +import denoptim.graph.Edge.BondType; +import denoptim.graph.EmptyVertex; +import denoptim.graph.Fragment; +import denoptim.graph.FragmentTest; +import denoptim.graph.Ring; +import denoptim.graph.SymmetricSet; +import denoptim.graph.Template; +import denoptim.graph.TemplateTest; +import denoptim.graph.Vertex; +import denoptim.graph.Vertex.BBType; +import denoptim.programs.RunTimeParameters.ParametersType; +import denoptim.programs.denovo.GAParameters; +import denoptim.programs.fragmenter.CuttingRule; + +/** + * Unit test for SMILES List Format. + * + * @author Marcello Costamagna + */ + +public class SMILESListFormatTest +{ + +//------------------------------------------------------------------------------ + + @Test + public void testMatches() throws Exception { + List lines = new ArrayList(); + lines.add("blabla"); + lines.add("blabla"); + lines.add("blabla"); + + SMILESListFormat lsf = new SMILESListFormat(); + MatchResult result = lsf.matches(lines); + assertTrue(result.matched()); + + lines.add("bla bla"); + result = lsf.matches(lines); + assertFalse(result.matched()); + + lines = new ArrayList(); + lines.add(" blabla"); + lines.add("blabla"); + lines.add("blabla"); + + result = lsf.matches(lines); + assertFalse(result.matched()); + + lines = new ArrayList(); + lines.add("blabla"); + lines.add("blabla "); + lines.add("blabla"); + + result = lsf.matches(lines); + assertFalse(result.matched()); + } +} + +//------------------------------------------------------------------------------ From ed6b71aaf80459db4985c9de6f9474c8a2af47f3 Mon Sep 17 00:00:00 2001 From: Marcello Costamagna Date: Thu, 3 Nov 2022 16:06:29 +0100 Subject: [PATCH 2/5] fixes functional test t27 --- test/functional_tests/t27/data/fitness_provider.sh | 2 +- test/functional_tests/t27/runt27.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/functional_tests/t27/data/fitness_provider.sh b/test/functional_tests/t27/data/fitness_provider.sh index ad93c0275..1c90ed065 100755 --- a/test/functional_tests/t27/data/fitness_provider.sh +++ b/test/functional_tests/t27/data/fitness_provider.sh @@ -116,7 +116,7 @@ candIdTo40="M00000025" # defined further below (i.e., see $newCandSrc) triggerRemovalAndAddition="Gen10" # This is the generation where we'll ask denoptim to stop. -triggerStop="Gen15" +triggerStop="Gen20" # NB: you might need to change these numbers as a consequence of a change in # the algorithms that makes the exact sequence of candidates not reproducible diff --git a/test/functional_tests/t27/runt27.sh b/test/functional_tests/t27/runt27.sh index f3708c90f..9da7fcbc0 100755 --- a/test/functional_tests/t27/runt27.sh +++ b/test/functional_tests/t27/runt27.sh @@ -46,12 +46,12 @@ runFolder=$(basename $(ls -lrtd "$wrkDir"/RUN*/ | tail -n 1 | awk '{print $NF}') n50=$(grep -l "$candIdTo50" "$wrkDir/$runFolder/"*/Gen*.txt | wc -l | awk '{print $1}') n40=$(grep -l "$candIdTo40" "$wrkDir/$runFolder/"*/Gen*.txt | wc -l | awk '{print $1}') nMAX28=$(grep -l "MAX: *28.000" "$wrkDir/$runFolder/"*/Gen*.txt | wc -l | awk '{print $1}') -# Difficolt to get the execution of independent threads. The following checks may fail because of an eccessive loading on the cpus which retard the execution of some tasks resulting in widely different results. Usually, re-running the test leads to its successfull completion. -if [ "$n50" -lt 5 ] || [ "$n50" -gt 12 ]; then +# Difficult to get the execution of independent threads. The following checks may fail because of an eccessive loading on the cpus which retard the execution of some tasks resulting in widely different results. Usually, re-running the test leads to its successfull completion. +if [ "$n50" -lt 5 ] || [ "$n50" -gt 15 ]; then echo " " echo "Test 't27' WARNING: unreproducibile behavior: wrong number of populations including $candIdTo50 ($n50). Try re-running t27" fi -if [ "$n40" -lt 5 ] || [ "$n40" -gt 13 ]; then +if [ "$n40" -lt 5 ] || [ "$n40" -gt 15 ]; then echo " " echo "Test 't27' WARNING: unreproducibile behavior: wrong number of populations including "$candIdTo40" ($n40). Try re-running t27" fi From 372bccf30bef6f76e189360f3dea64a33f74d6c0 Mon Sep 17 00:00:00 2001 From: Marcello Costamagna Date: Thu, 3 Nov 2022 16:56:07 +0100 Subject: [PATCH 3/5] enabling 0D structures fragmenter --- .../denoptim/fragmenter/FragmenterTools.java | 38 +++++++++------- .../ParallelFragmentationAlgorithm.java | 45 +++++++++++-------- ...java => IteratingAtomContainerReader.java} | 14 +++++- .../fragmenter/FragmenterParameters.java | 23 ++++++++++ .../ParallelFragmentationAlgorithmTest.java | 5 ++- test/functional_tests/t30/data/mols-20.smi | 10 +++++ test/functional_tests/t30/data/mols-21.smi | 5 +++ test/functional_tests/t30/runt30.sh | 12 ++--- test/functional_tests/t30/t30-20.params | 5 +++ test/functional_tests/t30/t30-21.params | 7 +++ 10 files changed, 119 insertions(+), 45 deletions(-) rename src/main/java/denoptim/io/{IteractingAtomContainerReader.java => IteratingAtomContainerReader.java} (92%) create mode 100644 test/functional_tests/t30/data/mols-20.smi create mode 100644 test/functional_tests/t30/data/mols-21.smi create mode 100644 test/functional_tests/t30/t30-20.params create mode 100644 test/functional_tests/t30/t30-21.params diff --git a/src/main/java/denoptim/fragmenter/FragmenterTools.java b/src/main/java/denoptim/fragmenter/FragmenterTools.java index 3a85a2ce8..e1a7b5349 100644 --- a/src/main/java/denoptim/fragmenter/FragmenterTools.java +++ b/src/main/java/denoptim/fragmenter/FragmenterTools.java @@ -37,7 +37,7 @@ import denoptim.graph.Vertex; import denoptim.graph.Vertex.BBType; import denoptim.io.DenoptimIO; -import denoptim.io.IteractingAtomContainerReader; +import denoptim.io.IteratingAtomContainerReader; import denoptim.programs.fragmenter.CuttingRule; import denoptim.programs.fragmenter.FragmenterParameters; import denoptim.programs.fragmenter.MatchedBond; @@ -239,8 +239,8 @@ public static boolean fragmentation(File input, FragmenterParameters settings, File output, Logger logger) throws CDKException, IOException, DENOPTIMException, IllegalArgumentException, UndetectedFileFormatException { - IteractingAtomContainerReader iterator = - new IteractingAtomContainerReader(input); + IteratingAtomContainerReader iterator = + new IteratingAtomContainerReader(input); int totalProd = 0; int totalKept = 0; @@ -927,23 +927,27 @@ public static boolean filterFragment(Fragment frag, + smb + "'"); return false; } - // Incomplete fragmentation: an atom has the same coords of an AP. - for (AttachmentPoint ap : frag.getAttachmentPoints()) - { - Point3d ap3d = ap.getDirectionVector(); - if (ap3d!=null) + + if (settings.isWorkingIn3D()) + { + // Incomplete 3D fragmentation: an atom has the same coords of an AP. + for (AttachmentPoint ap : frag.getAttachmentPoints()) { - for (IAtom atm : frag.atoms()) + Point3d ap3d = ap.getDirectionVector(); + if (ap3d!=null) { - Point3d atm3d = MoleculeUtils.getPoint3d(atm); - double dist = ap3d.distance(atm3d); - if (dist < 0.0002) + for (IAtom atm : frag.atoms()) { - logger.log(Level.FINE,"Removing fragment with AP" - + frag.getIAtomContainer().indexOf(atm) - + " and atom " + MoleculeUtils.getSymbolOrLabel(atm) - + " coincide."); - return false; + Point3d atm3d = MoleculeUtils.getPoint3d(atm); + double dist = ap3d.distance(atm3d); + if (dist < 0.0002) + { + logger.log(Level.FINE,"Removing fragment with AP" + + frag.getIAtomContainer().indexOf(atm) + + " and atom " + MoleculeUtils.getSymbolOrLabel(atm) + + " coincide."); + return false; + } } } } diff --git a/src/main/java/denoptim/fragmenter/ParallelFragmentationAlgorithm.java b/src/main/java/denoptim/fragmenter/ParallelFragmentationAlgorithm.java index 53602395c..19625108f 100644 --- a/src/main/java/denoptim/fragmenter/ParallelFragmentationAlgorithm.java +++ b/src/main/java/denoptim/fragmenter/ParallelFragmentationAlgorithm.java @@ -36,12 +36,12 @@ import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; - +import org.openscience.cdk.io.iterator.IteratingSMILESReader; import denoptim.constants.DENOPTIMConstants; import denoptim.exception.DENOPTIMException; import denoptim.files.FileFormat; import denoptim.io.DenoptimIO; -import denoptim.io.IteractingAtomContainerReader; +import denoptim.io.IteratingAtomContainerReader; import denoptim.programs.RunTimeParameters.ParametersType; import denoptim.programs.fragmenter.FragmenterParameters; import denoptim.task.ParallelAsynchronousTaskExecutor; @@ -84,6 +84,22 @@ public ParallelFragmentationAlgorithm(FragmenterParameters settings) protected boolean doPreFlightOperations() { + IteratingAtomContainerReader reader; + try + { + reader = new IteratingAtomContainerReader + (new File(settings.getStructuresFile())); + + } catch (IOException | CDKException e1) + { + throw new Error("Error reading file '" + settings.getStructuresFile() + + "'. " + e1.getMessage()); + } + // Detect dimensionality of the molecules + if (reader.getIteratorType().equals(IteratingSMILESReader.class)) + { + settings.setWorkingIn3D(false); + } // Split data in batches for parallelization // This is the collector of the mutating pathname to the file collecting @@ -92,9 +108,7 @@ protected boolean doPreFlightOperations() structures[0] = new File(settings.getStructuresFile()); if (settings.getNumTasks()>1 || settings.doCheckFormula()) { - settings.getLogger().log(Level.INFO, "Combining structures and " - + "formulae..."); - splitInputForThreads(settings); + splitInputForThreads(settings, reader); for (int i=0; i formulae = settings.getFormulae(); + if (settings.doCheckFormula()) + { + settings.getLogger().log(Level.INFO, "Combining structures and " + + "formulae..."); + } int index = -1; int batchId = 0; int buffersSize = 0; diff --git a/src/main/java/denoptim/io/IteractingAtomContainerReader.java b/src/main/java/denoptim/io/IteratingAtomContainerReader.java similarity index 92% rename from src/main/java/denoptim/io/IteractingAtomContainerReader.java rename to src/main/java/denoptim/io/IteratingAtomContainerReader.java index 864a0203f..54c7bb74a 100644 --- a/src/main/java/denoptim/io/IteractingAtomContainerReader.java +++ b/src/main/java/denoptim/io/IteratingAtomContainerReader.java @@ -34,7 +34,7 @@ * * @author Marco Foscato */ -public class IteractingAtomContainerReader implements Iterator +public class IteratingAtomContainerReader implements Iterator { /** @@ -70,7 +70,7 @@ public class IteractingAtomContainerReader implements Iterator * @throws IOException * @throws CDKException */ - public IteractingAtomContainerReader(File input) + public IteratingAtomContainerReader(File input) throws FileNotFoundException, IOException, CDKException { FormatFactory factory = new FormatFactory(); @@ -134,4 +134,14 @@ public void close() throws IOException //------------------------------------------------------------------------------ + /** + * @return the class of the iterator defined upon creating a reader + */ + public Class getIteratorType() + { + if (usingIteratingReader) + return fileIterator.getClass(); + else + return listIterator.getClass(); + } } diff --git a/src/main/java/denoptim/programs/fragmenter/FragmenterParameters.java b/src/main/java/denoptim/programs/fragmenter/FragmenterParameters.java index a85748691..134605b41 100644 --- a/src/main/java/denoptim/programs/fragmenter/FragmenterParameters.java +++ b/src/main/java/denoptim/programs/fragmenter/FragmenterParameters.java @@ -350,6 +350,11 @@ public class FragmenterParameters extends RunTimeParameters */ private boolean isStandaloneFragmentClustering = false; + /** + * Flag activating operations depending on 3D structure + */ + private boolean workingIn3D = true; + //------------------------------------------------------------------------------ @@ -1430,5 +1435,23 @@ public boolean isStandaloneFragmentClustering() } //------------------------------------------------------------------------------ + + /** + * + * @return true if we are dealing with 3D structures + */ + public boolean isWorkingIn3D() + { + return workingIn3D; + } +//------------------------------------------------------------------------------ + /** + * Sets boolean variable workingIn3D + * @param workingIn3D + */ + public void setWorkingIn3D(boolean workingIn3D) + { + this.workingIn3D = workingIn3D; + } } diff --git a/src/test/java/denoptim/fragmenter/ParallelFragmentationAlgorithmTest.java b/src/test/java/denoptim/fragmenter/ParallelFragmentationAlgorithmTest.java index d170e2d86..de88d0985 100644 --- a/src/test/java/denoptim/fragmenter/ParallelFragmentationAlgorithmTest.java +++ b/src/test/java/denoptim/fragmenter/ParallelFragmentationAlgorithmTest.java @@ -33,6 +33,7 @@ import denoptim.constants.DENOPTIMConstants; import denoptim.io.DenoptimIO; +import denoptim.io.IteratingAtomContainerReader; import denoptim.programs.fragmenter.FragmenterParameters; /** @@ -89,7 +90,9 @@ public void testSplitInputForThreads() throws Exception settings.checkParameters(); settings.processParameters(); - ParallelFragmentationAlgorithm.splitInputForThreads(settings); + IteratingAtomContainerReader reader = new IteratingAtomContainerReader + (new File(settings.getStructuresFile())); + ParallelFragmentationAlgorithm.splitInputForThreads(settings, reader); int[] expectedEntries = {3, 2, 2}; for (int i=0; i<3; i++) diff --git a/test/functional_tests/t30/data/mols-20.smi b/test/functional_tests/t30/data/mols-20.smi new file mode 100644 index 000000000..001306e87 --- /dev/null +++ b/test/functional_tests/t30/data/mols-20.smi @@ -0,0 +1,10 @@ +CCCC(=O)NNC(=O)Nc1ccccc1 +CC(=O)NC1CCC2(C)C(CCC3(C)C2C(=O)C=C2C4C(C)C(C)CCC4(C)CCC23C)C1(C)C(=O)O +CC(=O)NC(C)Cc1ccc(C#Cc2ccnc(N3CCCC(F)C3)n2)cc1 +Cc1cccc(CCNC(=O)C2CCC(=O)N(Cc3ccc(Cl)cc3)C2)n1 +CC1C=CN(N(C)C)C2=C1C(=O)c1cnccc1C2=O +COc1ccc2c(-c3ccncc3)c(-c3ccc(F)cc3)[nH]c2n1 +Nc1nnc(CCNC(=O)c2cccc(C3CCCNC3)c2)s1 +CN(C)c1ccc(C(C(=O)NC2CCCC2)N(C(=O)c2ccco2)C2CC2)cc1 +CCOC(=O)C1CCN(C(=O)c2cccc(S(=O)(=O)NCc3ccccc3)c2)CC1 +CN1CCC2(NC(=O)NC2=O)c2ccccc21 diff --git a/test/functional_tests/t30/data/mols-21.smi b/test/functional_tests/t30/data/mols-21.smi new file mode 100644 index 000000000..36c489307 --- /dev/null +++ b/test/functional_tests/t30/data/mols-21.smi @@ -0,0 +1,5 @@ +C([H])([H])([H])C(=O)OC([H])([H])C([H])([H])([H]) +C([H])([H])([H])C(=O)OC([H])([H])C([H])([H])([H]) +C([H])([H])([H])C(=O)OC([H])([H])C([H])([H])([H]) +C([H])([H])([H])C(=O)OC([H])([H])C([H])([H])([H]) +C([H])([H])([H])C(=O)OC([H])([H])C([H])([H])([H]) diff --git a/test/functional_tests/t30/runt30.sh b/test/functional_tests/t30/runt30.sh index 4cbdf9004..c1d0f06d8 100755 --- a/test/functional_tests/t30/runt30.sh +++ b/test/functional_tests/t30/runt30.sh @@ -8,12 +8,12 @@ mv data/* "$wrkDir" rm -rf data # Here we define the expected results -# sub test ID: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 -expctdNoMissingAtomMols=(9 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0) - expectedPreFiltered=(0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0) - expectedFragments=(0 0 0 20 12 8 6 11 70 0 0 0 0 0 9 0 4 2 7) -expectedIsomorphicFrags=(0 0 0 0 0 0 30 11 0 0 0 0 0 0 96 0 0 0 0) - expectedResults=(9 1 2 0 0 0 0 0 0 4 2 2 2 3 0 2 0 0 0) +# sub test ID: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 +expctdNoMissingAtomMols=(9 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0) + expectedPreFiltered=(0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0) + expectedFragments=(0 0 0 20 12 8 6 11 70 0 0 0 0 0 9 0 4 2 7 7 3) +expectedIsomorphicFrags=(0 0 0 0 0 0 30 11 0 0 0 0 0 0 96 0 0 0 0 0 0) + expectedResults=(9 1 2 0 0 0 0 0 0 4 2 2 2 3 0 2 0 0 0 0 0) nSubTests=${#expctdNoMissingAtomMols[@]} diff --git a/test/functional_tests/t30/t30-20.params b/test/functional_tests/t30/t30-20.params new file mode 100644 index 000000000..96f5a771a --- /dev/null +++ b/test/functional_tests/t30/t30-20.params @@ -0,0 +1,5 @@ +# +# This is the parameter file for test t30-20 +# +FRG-STRUCTURESFILE=mols-20.smi +FRG-CUTTINGRULESFILE=cutting_rules diff --git a/test/functional_tests/t30/t30-21.params b/test/functional_tests/t30/t30-21.params new file mode 100644 index 000000000..810d8c975 --- /dev/null +++ b/test/functional_tests/t30/t30-21.params @@ -0,0 +1,7 @@ +# +# This is the parameter file for test t30-21 +# +FRG-STRUCTURESFILE=mols-21.smi +FRG-CUTTINGRULESFILE=cutting_rules +FRG-REMOVEDUPLICATES +FRG-VERBOSITY=4 From e68f1eb2e8d290efcd910c521ecc0aa5e07ac2b2 Mon Sep 17 00:00:00 2001 From: Marcello Costamagna Date: Thu, 3 Nov 2022 17:04:02 +0100 Subject: [PATCH 4/5] adds explicitation of implicit hydrogens --- .../fragmenter/ParallelFragmentationAlgorithm.java | 3 ++- src/main/java/denoptim/utils/MoleculeUtils.java | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/main/java/denoptim/fragmenter/ParallelFragmentationAlgorithm.java b/src/main/java/denoptim/fragmenter/ParallelFragmentationAlgorithm.java index 19625108f..88d5ec09c 100644 --- a/src/main/java/denoptim/fragmenter/ParallelFragmentationAlgorithm.java +++ b/src/main/java/denoptim/fragmenter/ParallelFragmentationAlgorithm.java @@ -340,7 +340,8 @@ static void splitInputForThreads(FragmenterParameters settings, // expected to be found (but CSD uses them...) try { - MoleculeUtils.setZeroImplicitHydrogensToAllAtoms(mol); + // MoleculeUtils.setZeroImplicitHydrogensToAllAtoms(mol); + MoleculeUtils.explicitHydrogens(mol); MoleculeUtils.ensureNoUnsetBondOrders(mol); } catch (CDKException e) { diff --git a/src/main/java/denoptim/utils/MoleculeUtils.java b/src/main/java/denoptim/utils/MoleculeUtils.java index 4fd6e843c..e72b2c9fb 100644 --- a/src/main/java/denoptim/utils/MoleculeUtils.java +++ b/src/main/java/denoptim/utils/MoleculeUtils.java @@ -702,6 +702,17 @@ public static void setZeroImplicitHydrogensToAllAtoms(IAtomContainer iac) } //------------------------------------------------------------------------------ + + /** + * Converts all the implicit hydrogens to explicit + */ + public static void explicitHydrogens(IAtomContainer mol) + { + AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); + + } + +//------------------------------------------------------------------------------ /** * Sets bond order = single to all otherwise unset bonds. In case of failed From 0ee985e319b72058eb0c68a4595dfc0a15f57936 Mon Sep 17 00:00:00 2001 From: Marco Foscato Date: Fri, 11 Nov 2022 10:32:06 +0100 Subject: [PATCH 5/5] resolves error from unit testing on Windows --- src/main/java/denoptim/io/IteratingAtomContainerReader.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/denoptim/io/IteratingAtomContainerReader.java b/src/main/java/denoptim/io/IteratingAtomContainerReader.java index 54c7bb74a..054fc46ac 100644 --- a/src/main/java/denoptim/io/IteratingAtomContainerReader.java +++ b/src/main/java/denoptim/io/IteratingAtomContainerReader.java @@ -76,8 +76,10 @@ public IteratingAtomContainerReader(File input) FormatFactory factory = new FormatFactory(); factory.registerFormat(new SMILESListFormat()); - IChemFormat chemFormat = factory.guessFormat( - new BufferedReader(new FileReader(input))); + BufferedReader headReader = new BufferedReader(new FileReader(input)); + IChemFormat chemFormat = factory.guessFormat(headReader); + headReader.close(); + if (chemFormat instanceof MDLV2000Format || chemFormat instanceof MDLV3000Format) {