diff --git a/package/MDAnalysis/converters/OpenMM.py b/package/MDAnalysis/converters/OpenMM.py index 227a99ebe5..76a743b773 100644 --- a/package/MDAnalysis/converters/OpenMM.py +++ b/package/MDAnalysis/converters/OpenMM.py @@ -77,13 +77,17 @@ class OpenMMSimulationReader(base.SingleFrameReaderBase): """ format = "OPENMMSIMULATION" - units = {"time": "ps", "length": "nm", "velocity": "nm/ps", - "force": "kJ/(mol*nm)", "energy": "kJ/mol"} + units = { + "time": "ps", + "length": "nm", + "velocity": "nm/ps", + "force": "kJ/(mol*nm)", + "energy": "kJ/mol", + } @staticmethod def _format_hint(thing): - """Can this reader read *thing*? - """ + """Can this reader read *thing*?""" try: from openmm.app import Simulation except ImportError: @@ -104,20 +108,23 @@ def _read_first_frame(self): self.ts.triclinic_dimensions = self.convert_pos_from_native( self.ts.triclinic_dimensions, inplace=False ) - self.ts.dimensions[3:] = _sanitize_box_angles(self.ts.dimensions[3:]) + self.ts.dimensions[3:] = _sanitize_box_angles( + self.ts.dimensions[3:] + ) self.convert_velocities_from_native(self.ts._velocities) self.convert_forces_from_native(self.ts._forces) self.convert_time_from_native(self.ts.dt) def _mda_timestep_from_omm_context(self): - """ Construct Timestep object from OpenMM context """ + """Construct Timestep object from OpenMM context""" try: import openmm.unit as u except ImportError: # pragma: no cover import simtk.unit as u - state = self.filename.context.getState(-1, getVelocities=True, - getForces=True, getEnergy=True) + state = self.filename.context.getState( + -1, getVelocities=True, getForces=True, getEnergy=True + ) n_atoms = self.filename.context.getSystem().getNumParticles() @@ -125,13 +132,14 @@ def _mda_timestep_from_omm_context(self): ts.frame = 0 ts.data["time"] = state.getTime()._value ts.data["potential_energy"] = ( - state.getPotentialEnergy().in_units_of(u.kilojoule/u.mole)._value + state.getPotentialEnergy().in_units_of(u.kilojoule / u.mole)._value ) ts.data["kinetic_energy"] = ( - state.getKineticEnergy().in_units_of(u.kilojoule/u.mole)._value + state.getKineticEnergy().in_units_of(u.kilojoule / u.mole)._value ) ts.triclinic_dimensions = state.getPeriodicBoxVectors( - asNumpy=True)._value + asNumpy=True + )._value ts.dimensions[3:] = _sanitize_box_angles(ts.dimensions[3:]) ts.positions = state.getPositions(asNumpy=True)._value ts.velocities = state.getVelocities(asNumpy=True)._value @@ -153,8 +161,7 @@ class OpenMMAppReader(base.SingleFrameReaderBase): @staticmethod def _format_hint(thing): - """Can this reader read *thing*? - """ + """Can this reader read *thing*?""" try: from openmm import app except ImportError: @@ -163,8 +170,7 @@ def _format_hint(thing): except ImportError: return False else: - return isinstance(thing, (app.PDBFile, app.Modeller, - app.PDBxFile)) + return isinstance(thing, (app.PDBFile, app.Modeller, app.PDBxFile)) def _read_first_frame(self): self.n_atoms = self.filename.topology.getNumAtoms() @@ -177,10 +183,12 @@ def _read_first_frame(self): self.ts.triclinic_dimensions = self.convert_pos_from_native( self.ts.triclinic_dimensions, inplace=False ) - self.ts.dimensions[3:] = _sanitize_box_angles(self.ts.dimensions[3:]) + self.ts.dimensions[3:] = _sanitize_box_angles( + self.ts.dimensions[3:] + ) def _mda_timestep_from_omm_app(self): - """ Construct Timestep object from OpenMM Application object """ + """Construct Timestep object from OpenMM Application object""" omm_object = self.filename n_atoms = omm_object.topology.getNumAtoms() @@ -198,7 +206,7 @@ def _mda_timestep_from_omm_app(self): def _sanitize_box_angles(angles): - """ Ensure box angles correspond to first quadrant + """Ensure box angles correspond to first quadrant See `discussion on unitcell angles `_ """ diff --git a/package/MDAnalysis/converters/OpenMMParser.py b/package/MDAnalysis/converters/OpenMMParser.py index a8b2866085..03fa06a60b 100644 --- a/package/MDAnalysis/converters/OpenMMParser.py +++ b/package/MDAnalysis/converters/OpenMMParser.py @@ -84,9 +84,7 @@ class OpenMMTopologyParser(TopologyReaderBase): @staticmethod def _format_hint(thing): - """Can this Parser read object *thing*? - - """ + """Can this Parser read object *thing*?""" try: from openmm import app except ImportError: @@ -98,7 +96,7 @@ def _format_hint(thing): return isinstance(thing, app.Topology) def _mda_topology_from_omm_topology(self, omm_topology): - """ Construct mda topology from omm topology + """Construct mda topology from omm topology Can be used for any openmm object that contains a topology object @@ -130,9 +128,11 @@ def _mda_topology_from_omm_topology(self, omm_topology): try: from simtk.unit import daltons except ImportError: - msg = ("OpenMM is required for the OpenMMParser but " - "it's not installed. Try installing it with \n" - "conda install -c conda-forge openmm") + msg = ( + "OpenMM is required for the OpenMMParser but " + "it's not installed. Try installing it with \n" + "conda install -c conda-forge openmm" + ) raise ImportError(msg) atom_resindex = [a.residue.index for a in omm_topology.atoms()] @@ -168,19 +168,21 @@ def _mda_topology_from_omm_topology(self, omm_topology): if elem.symbol.capitalize() in SYMB2Z: validated_elements.append(elem.symbol) else: - validated_elements.append('') + validated_elements.append("") atomtypes.append(elem.symbol) masses.append(elem.mass.value_in_unit(daltons)) else: - validated_elements.append('') + validated_elements.append("") masses.append(0.0) - atomtypes.append('X') + atomtypes.append("X") if not all(validated_elements): if any(validated_elements): - warnings.warn("Element information missing for some atoms. " - "These have been given an empty element record ") - if any(i == 'X' for i in atomtypes): + warnings.warn( + "Element information missing for some atoms. " + "These have been given an empty element record " + ) + if any(i == "X" for i in atomtypes): warnings.warn( "For absent elements, atomtype has been " "set to 'X' and mass has been set to 0.0. " @@ -189,10 +191,12 @@ def _mda_topology_from_omm_topology(self, omm_topology): "to_guess=['masses', 'types']). " "(for MDAnalysis version 2.x " "this is done automatically," - " but it will be removed in 3.0).") + " but it will be removed in 3.0)." + ) - attrs.append(Elements(np.array(validated_elements, - dtype=object))) + attrs.append( + Elements(np.array(validated_elements, dtype=object)) + ) else: wmsg = ( @@ -205,7 +209,8 @@ def _mda_topology_from_omm_topology(self, omm_topology): "These can be guessed using " "universe.guess_TopologyAttrs(" "to_guess=['masses', 'types']) " - "See MDAnalysis.guessers.") + "See MDAnalysis.guessers." + ) warnings.warn(wmsg) else: @@ -239,9 +244,7 @@ class OpenMMAppTopologyParser(OpenMMTopologyParser): @staticmethod def _format_hint(thing): - """Can this Parser read object *thing*? - - """ + """Can this Parser read object *thing*?""" try: from openmm import app except ImportError: @@ -252,10 +255,7 @@ def _format_hint(thing): else: return isinstance( thing, - ( - app.PDBFile, app.Modeller, - app.Simulation, app.PDBxFile - ) + (app.PDBFile, app.Modeller, app.Simulation, app.PDBxFile), ) def parse(self, **kwargs): diff --git a/package/MDAnalysis/converters/ParmEd.py b/package/MDAnalysis/converters/ParmEd.py index b808f6b148..69b0c0364e 100644 --- a/package/MDAnalysis/converters/ParmEd.py +++ b/package/MDAnalysis/converters/ParmEd.py @@ -93,10 +93,11 @@ class ParmEdReader(SingleFrameReaderBase): """Coordinate reader for ParmEd.""" - format = 'PARMED' + + format = "PARMED" # Structure.coordinates always in Angstrom - units = {'time': None, 'length': 'Angstrom'} + units = {"time": None, "length": "Angstrom"} @staticmethod def _format_hint(thing): @@ -115,8 +116,7 @@ def _format_hint(thing): def _read_first_frame(self): self.n_atoms = len(self.filename.atoms) - self.ts = ts = self._Timestep(self.n_atoms, - **self._ts_kwargs) + self.ts = ts = self._Timestep(self.n_atoms, **self._ts_kwargs) if self.filename.coordinates is not None: ts._pos = self.filename.coordinates @@ -129,12 +129,12 @@ def _read_first_frame(self): MDA2PMD = { - 'tempfactor': 'bfactor', - 'gbscreen': 'screen', - 'altLoc': 'altloc', - 'nbindex': 'nb_idx', - 'solventradius': 'solvent_radius', - 'id': 'number' + "tempfactor": "bfactor", + "gbscreen": "screen", + "altLoc": "altloc", + "nbindex": "nb_idx", + "solventradius": "solvent_radius", + "id": "number", } @@ -160,8 +160,8 @@ class ParmEdConverter(base.ConverterBase): """ - lib = 'PARMED' - units = {'time': None, 'length': 'Angstrom'} + lib = "PARMED" + units = {"time": None, "length": "Angstrom"} def convert(self, obj): """Write selection at current trajectory frame to :class:`~parmed.structure.Structure`. @@ -181,9 +181,11 @@ def convert(self, obj): if NumpyVersion(np.__version__) >= "2.0.0": ermsg = "ParmEd is not compatible with NumPy 2.0+" else: - ermsg = ("ParmEd is required for ParmEdConverter but is not " - "installed. Try installing it with \n" - "pip install parmed") + ermsg = ( + "ParmEd is required for ParmEdConverter but is not " + "installed. Try installing it with \n" + "pip install parmed" + ) raise ImportError(errmsg) try: # make sure to use atoms (Issue 46) @@ -196,63 +198,80 @@ def convert(self, obj): try: names = ag_or_ts.names except (AttributeError, NoDataError): - names = itertools.cycle(('X',)) - missing_topology.append('names') + names = itertools.cycle(("X",)) + missing_topology.append("names") try: resnames = ag_or_ts.resnames except (AttributeError, NoDataError): - resnames = itertools.cycle(('UNK',)) - missing_topology.append('resnames') + resnames = itertools.cycle(("UNK",)) + missing_topology.append("resnames") if missing_topology: warnings.warn( "Supplied AtomGroup was missing the following attributes: " "{miss}. These will be written with default values. " "Alternatively these can be supplied as keyword arguments." - "".format(miss=', '.join(missing_topology))) + "".format(miss=", ".join(missing_topology)) + ) try: positions = ag_or_ts.positions except (AttributeError, NoDataError): - positions = [None]*ag_or_ts.n_atoms + positions = [None] * ag_or_ts.n_atoms try: velocities = ag_or_ts.velocities except (AttributeError, NoDataError): - velocities = [None]*ag_or_ts.n_atoms + velocities = [None] * ag_or_ts.n_atoms atom_kwargs = [] - for atom, name, resname, xyz, vel in zip(ag_or_ts, names, resnames, - positions, velocities): - akwargs = {'name': name} - chain_seg = {'segid': atom.segid} - for attrname in ('mass', 'charge', 'type', - 'altLoc', 'tempfactor', - 'occupancy', 'gbscreen', 'solventradius', - 'nbindex', 'rmin', 'epsilon', 'rmin14', - 'epsilon14', 'id'): + for atom, name, resname, xyz, vel in zip( + ag_or_ts, names, resnames, positions, velocities + ): + akwargs = {"name": name} + chain_seg = {"segid": atom.segid} + for attrname in ( + "mass", + "charge", + "type", + "altLoc", + "tempfactor", + "occupancy", + "gbscreen", + "solventradius", + "nbindex", + "rmin", + "epsilon", + "rmin14", + "epsilon14", + "id", + ): try: - akwargs[MDA2PMD.get(attrname, attrname)] = getattr(atom, attrname) + akwargs[MDA2PMD.get(attrname, attrname)] = getattr( + atom, attrname + ) except AttributeError: pass try: el = atom.element.lower().capitalize() - akwargs['atomic_number'] = SYMB2Z[el] + akwargs["atomic_number"] = SYMB2Z[el] except (KeyError, AttributeError): try: tp = atom.type.lower().capitalize() - akwargs['atomic_number'] = SYMB2Z[tp] + akwargs["atomic_number"] = SYMB2Z[tp] except (KeyError, AttributeError): pass try: - chain_seg['chain'] = atom.chainID + chain_seg["chain"] = atom.chainID except AttributeError: pass try: - chain_seg['inscode'] = atom.icode + chain_seg["inscode"] = atom.icode except AttributeError: pass - atom_kwargs.append((akwargs, resname, atom.resid, chain_seg, xyz, vel)) + atom_kwargs.append( + (akwargs, resname, atom.resid, chain_seg, xyz, vel) + ) struct = pmd.Structure() @@ -264,9 +283,12 @@ def convert(self, obj): if vel is not None: atom.vx, atom.vy, atom.vz = vel - atom.atom_type = pmd.AtomType(akwarg['name'], None, - akwarg['mass'], - atomic_number=akwargs.get('atomic_number')) + atom.atom_type = pmd.AtomType( + akwarg["name"], + None, + akwarg["mass"], + atomic_number=akwargs.get("atomic_number"), + ) struct.add_atom(atom, resname, resid, **kw) try: @@ -274,12 +296,15 @@ def convert(self, obj): except AttributeError: struct.box = None - if hasattr(ag_or_ts, 'universe'): - atomgroup = {atom: index for index, - atom in enumerate(list(ag_or_ts))} - get_atom_indices = functools.partial(get_indices_from_subset, - atomgroup=atomgroup, - universe=ag_or_ts.universe) + if hasattr(ag_or_ts, "universe"): + atomgroup = { + atom: index for index, atom in enumerate(list(ag_or_ts)) + } + get_atom_indices = functools.partial( + get_indices_from_subset, + atomgroup=atomgroup, + universe=ag_or_ts.universe, + ) else: get_atom_indices = lambda x: x @@ -290,8 +315,9 @@ def convert(self, obj): pass else: for p in params: - atoms = [struct.atoms[i] for i in map(get_atom_indices, - p.indices)] + atoms = [ + struct.atoms[i] for i in map(get_atom_indices, p.indices) + ] try: for obj in p.type: bond = pmd.Bond(*atoms, type=obj.type, order=obj.order) @@ -301,7 +327,7 @@ def convert(self, obj): bond.type.list = struct.bond_types except (TypeError, AttributeError): order = p.order if p.order is not None else 1 - btype = getattr(p.type, 'type', None) + btype = getattr(p.type, "type", None) bond = pmd.Bond(*atoms, type=btype, order=order) struct.bonds.append(bond) @@ -311,40 +337,62 @@ def convert(self, obj): # dihedrals try: - params = ag_or_ts.dihedrals.atomgroup_intersection(ag_or_ts, - strict=True) + params = ag_or_ts.dihedrals.atomgroup_intersection( + ag_or_ts, strict=True + ) except AttributeError: pass else: for p in params: - atoms = [struct.atoms[i] for i in map(get_atom_indices, - p.indices)] + atoms = [ + struct.atoms[i] for i in map(get_atom_indices, p.indices) + ] try: for obj in p.type: - imp = getattr(obj, 'improper', False) - ign = getattr(obj, 'ignore_end', False) - dih = pmd.Dihedral(*atoms, type=obj.type, - ignore_end=ign, improper=imp) + imp = getattr(obj, "improper", False) + ign = getattr(obj, "ignore_end", False) + dih = pmd.Dihedral( + *atoms, type=obj.type, ignore_end=ign, improper=imp + ) struct.dihedrals.append(dih) if isinstance(dih.type, pmd.DihedralType): struct.dihedral_types.append(dih.type) dih.type.list = struct.dihedral_types except (TypeError, AttributeError): - btype = getattr(p.type, 'type', None) - imp = getattr(p.type, 'improper', False) - ign = getattr(p.type, 'ignore_end', False) - dih = pmd.Dihedral(*atoms, type=btype, - improper=imp, ignore_end=ign) + btype = getattr(p.type, "type", None) + imp = getattr(p.type, "improper", False) + ign = getattr(p.type, "ignore_end", False) + dih = pmd.Dihedral( + *atoms, type=btype, improper=imp, ignore_end=ign + ) struct.dihedrals.append(dih) if isinstance(dih.type, pmd.DihedralType): struct.dihedral_types.append(dih.type) dih.type.list = struct.dihedral_types for param, pmdtype, trackedlist, typelist, clstype in ( - ('ureybradleys', pmd.UreyBradley, struct.urey_bradleys, struct.urey_bradley_types, pmd.BondType), - ('angles', pmd.Angle, struct.angles, struct.angle_types, pmd.AngleType), - ('impropers', pmd.Improper, struct.impropers, struct.improper_types, pmd.ImproperType), - ('cmaps', pmd.Cmap, struct.cmaps, struct.cmap_types, pmd.CmapType) + ( + "ureybradleys", + pmd.UreyBradley, + struct.urey_bradleys, + struct.urey_bradley_types, + pmd.BondType, + ), + ( + "angles", + pmd.Angle, + struct.angles, + struct.angle_types, + pmd.AngleType, + ), + ( + "impropers", + pmd.Improper, + struct.impropers, + struct.improper_types, + pmd.ImproperType, + ), + ("cmaps", pmd.Cmap, struct.cmaps, struct.cmap_types, pmd.CmapType), ): try: params = getattr(ag_or_ts, param) @@ -353,8 +401,10 @@ def convert(self, obj): pass else: for v in values: - atoms = [struct.atoms[i] for i in map(get_atom_indices, - v.indices)] + atoms = [ + struct.atoms[i] + for i in map(get_atom_indices, v.indices) + ] try: for parmed_obj in v.type: @@ -364,7 +414,7 @@ def convert(self, obj): typelist.append(p.type) p.type.list = typelist except (TypeError, AttributeError): - vtype = getattr(v.type, 'type', None) + vtype = getattr(v.type, "type", None) p = pmdtype(*atoms, type=vtype) trackedlist.append(p) diff --git a/package/MDAnalysis/converters/ParmEdParser.py b/package/MDAnalysis/converters/ParmEdParser.py index 31ed9bee41..331f14e279 100644 --- a/package/MDAnalysis/converters/ParmEdParser.py +++ b/package/MDAnalysis/converters/ParmEdParser.py @@ -118,7 +118,7 @@ Angles, Dihedrals, Impropers, - CMaps + CMaps, ) from ..core.topology import Topology @@ -136,7 +136,8 @@ class ParmEdParser(TopologyReaderBase): """ For ParmEd structures """ - format = 'PARMED' + + format = "PARMED" @staticmethod def _format_hint(thing): @@ -228,30 +229,27 @@ def parse(self, **kwargs): try: elements.append(Z2SYMB[z]) except KeyError: - elements.append('') + elements.append("") # Make Atom TopologyAttrs for vals, Attr, dtype in ( - (names, Atomnames, object), - (masses, Masses, np.float32), - (charges, Charges, np.float32), - (types, Atomtypes, object), - (elements, Elements, object), - (serials, Atomids, np.int32), - (chainids, ChainIDs, object), - - (altLocs, AltLocs, object), - (bfactors, Tempfactors, np.float32), - (occupancies, Occupancies, np.float32), - - (screens, GBScreens, np.float32), - (solvent_radii, SolventRadii, np.float32), - (nonbonded_indices, NonbondedIndices, np.int32), - - (rmins, RMins, np.float32), - (epsilons, Epsilons, np.float32), - (rmin14s, RMin14s, np.float32), - (epsilon14s, Epsilon14s, np.float32), + (names, Atomnames, object), + (masses, Masses, np.float32), + (charges, Charges, np.float32), + (types, Atomtypes, object), + (elements, Elements, object), + (serials, Atomids, np.int32), + (chainids, ChainIDs, object), + (altLocs, AltLocs, object), + (bfactors, Tempfactors, np.float32), + (occupancies, Occupancies, np.float32), + (screens, GBScreens, np.float32), + (solvent_radii, SolventRadii, np.float32), + (nonbonded_indices, NonbondedIndices, np.int32), + (rmins, RMins, np.float32), + (epsilons, Epsilons, np.float32), + (rmin14s, RMin14s, np.float32), + (epsilon14s, Epsilon14s, np.float32), ): attrs.append(Attr(np.array(vals, dtype=dtype))) @@ -262,7 +260,8 @@ def parse(self, **kwargs): residx, (resids, resnames, chainids, segids) = change_squash( (resids, resnames, chainids, segids), - (resids, resnames, chainids, segids)) + (resids, resnames, chainids, segids), + ) n_residues = len(resids) attrs.append(Resids(resids)) @@ -311,8 +310,11 @@ def parse(self, **kwargs): bond_types = list(map(squash_identical, bond_types)) bond_orders = list(map(squash_identical, bond_orders)) - attrs.append(Bonds(bond_values, types=bond_types, guessed=False, - order=bond_orders)) + attrs.append( + Bonds( + bond_values, types=bond_types, guessed=False, order=bond_orders + ) + ) for pmdlist, na, values, types in ( (structure.urey_bradleys, 2, ub_values, ub_types), @@ -323,7 +325,7 @@ def parse(self, **kwargs): ): for p in pmdlist: - atoms = ['atom{}'.format(i) for i in range(1, na+1)] + atoms = ["atom{}".format(i) for i in range(1, na + 1)] idx = tuple(getattr(p, a).idx for a in atoms) if idx not in values: values[idx] = [p] @@ -345,9 +347,13 @@ def parse(self, **kwargs): types = list(map(squash_identical, types)) attrs.append(Attr(vals, types=types, guessed=False, order=None)) - top = Topology(n_atoms, n_residues, n_segments, - attrs=attrs, - atom_resindex=residx, - residue_segindex=segidx) + top = Topology( + n_atoms, + n_residues, + n_segments, + attrs=attrs, + atom_resindex=residx, + residue_segindex=segidx, + ) return top diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py index aa78a7ea0c..c9348cb8fd 100644 --- a/package/MDAnalysis/converters/RDKit.py +++ b/package/MDAnalysis/converters/RDKit.py @@ -128,8 +128,18 @@ # anion charges are directly handled by the code using the typical valence # of the atom MONATOMIC_CATION_CHARGES = { - 3: 1, 11: 1, 19: 1, 37: 1, 47: 1, 55: 1, - 12: 2, 20: 2, 29: 2, 30: 2, 38: 2, 56: 2, + 3: 1, + 11: 1, + 19: 1, + 37: 1, + 47: 1, + 55: 1, + 12: 2, + 20: 2, + 29: 2, + 30: 2, + 38: 2, + 56: 2, 26: 2, # Fe could also be +3 13: 3, } @@ -153,10 +163,11 @@ class RDKitReader(memory.MemoryReader): .. versionadded:: 2.0.0 """ - format = 'RDKIT' + + format = "RDKIT" # Structure.coordinates always in Angstrom - units = {'time': None, 'length': 'Angstrom'} + units = {"time": None, "length": "Angstrom"} @staticmethod def _format_hint(thing): @@ -180,14 +191,15 @@ def __init__(self, filename, **kwargs): RDKit molecule """ n_atoms = filename.GetNumAtoms() - coordinates = np.array([ - conf.GetPositions() for conf in filename.GetConformers()], - dtype=np.float32) + coordinates = np.array( + [conf.GetPositions() for conf in filename.GetConformers()], + dtype=np.float32, + ) if coordinates.size == 0: warnings.warn("No coordinates found in the RDKit molecule") coordinates = np.empty((1, n_atoms, 3), dtype=np.float32) coordinates[:] = np.nan - super(RDKitReader, self).__init__(coordinates, order='fac', **kwargs) + super(RDKitReader, self).__init__(coordinates, order="fac", **kwargs) class RDKitConverter(base.ConverterBase): @@ -313,11 +325,12 @@ class RDKitConverter(base.ConverterBase): """ - lib = 'RDKIT' - units = {'time': None, 'length': 'Angstrom'} + lib = "RDKIT" + units = {"time": None, "length": "Angstrom"} - def convert(self, obj, cache=True, NoImplicit=True, max_iter=200, - force=False): + def convert( + self, obj, cache=True, NoImplicit=True, max_iter=200, force=False + ): """Write selection at current trajectory frame to :class:`~rdkit.Chem.rdchem.Mol`. @@ -342,16 +355,19 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200, try: from rdkit import Chem except ImportError: - raise ImportError("RDKit is required for the RDKitConverter but " - "it's not installed. Try installing it with \n" - "conda install -c conda-forge rdkit") + raise ImportError( + "RDKit is required for the RDKitConverter but " + "it's not installed. Try installing it with \n" + "conda install -c conda-forge rdkit" + ) try: # make sure to use atoms (Issue 46) ag = obj.atoms except AttributeError: - raise TypeError("No `atoms` attribute in object of type {}, " - "please use a valid AtomGroup or Universe".format( - type(obj))) from None + raise TypeError( + "No `atoms` attribute in object of type {}, " + "please use a valid AtomGroup or Universe".format(type(obj)) + ) from None # parameters passed to atomgroup_to_mol kwargs = dict(NoImplicit=NoImplicit, max_iter=max_iter, force=force) @@ -366,9 +382,11 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200, if np.isnan(ag.positions).any() or np.allclose( ag.positions, 0.0, rtol=0.0, atol=1e-12 ): - warnings.warn("NaN or empty coordinates detected in coordinates, " - "the output molecule will not have 3D coordinates " - "assigned") + warnings.warn( + "NaN or empty coordinates detected in coordinates, " + "the output molecule will not have 3D coordinates " + "assigned" + ) else: # assign coordinates conf = Chem.Conformer(mol.GetNumAtoms()) @@ -409,7 +427,8 @@ def atomgroup_to_mol(ag, NoImplicit=True, max_iter=200, force=False): "The `elements` attribute is required for the RDKitConverter " "but is not present in this AtomGroup. Please refer to the " "documentation to guess elements from other attributes or " - "type `help(MDAnalysis.topology.guessers)`") from None + "type `help(MDAnalysis.topology.guessers)`" + ) from None if "H" not in ag.elements: if force: @@ -424,7 +443,8 @@ def atomgroup_to_mol(ag, NoImplicit=True, max_iter=200, force=False): "the parameter ``NoImplicit=False`` when using the converter " "to allow implicit hydrogens and disable inferring bond " "orders and charges. You can also use ``force=True`` to " - "ignore this error.") + "ignore this error." + ) # attributes accepted in PDBResidueInfo object pdb_attrs = {} @@ -433,9 +453,12 @@ def atomgroup_to_mol(ag, NoImplicit=True, max_iter=200, force=False): pdb_attrs[attr] = getattr(ag, attr) resnames = pdb_attrs.get("resnames", None) if resnames is None: + def get_resname(idx): return "" + else: + def get_resname(idx): return resnames[idx] @@ -473,7 +496,8 @@ def get_resname(idx): except NoDataError: warnings.warn( "No `bonds` attribute in this AtomGroup. Guessing bonds based " - "on atoms coordinates") + "on atoms coordinates" + ) ag.guess_bonds() for bond in ag.bonds: @@ -492,15 +516,17 @@ def get_resname(idx): mol = _standardize_patterns(mol, max_iter) # reorder atoms to match MDAnalysis, since the reactions from # _standardize_patterns will mess up the original order - order = np.argsort([atom.GetIntProp("_MDAnalysis_index") - for atom in mol.GetAtoms()]) + order = np.argsort( + [atom.GetIntProp("_MDAnalysis_index") for atom in mol.GetAtoms()] + ) mol = Chem.RenumberAtoms(mol, order.astype(int).tolist()) # sanitize if possible err = Chem.SanitizeMol(mol, catchErrors=True) if err: - warnings.warn("Could not sanitize molecule: " - f"failed during step {err!r}") + warnings.warn( + "Could not sanitize molecule: " f"failed during step {err!r}" + ) return mol @@ -515,7 +541,7 @@ def set_converter_cache_size(maxsize): conversions in memory. Using ``maxsize=None`` will remove all limits to the cache size, i.e. everything is cached. """ - global atomgroup_to_mol # pylint: disable=global-statement + global atomgroup_to_mol # pylint: disable=global-statement atomgroup_to_mol = lru_cache(maxsize=maxsize)(atomgroup_to_mol.__wrapped__) @@ -597,9 +623,12 @@ def _atom_sorter(atom): order and charge infering code to get the correct state on the first try. Currently sorts by number of unpaired electrons, then by number of heavy atom neighbors (i.e. atoms at the edge first).""" - num_heavy_neighbors = len([ - neighbor for neighbor in atom.GetNeighbors() - if neighbor.GetAtomicNum() > 1] + num_heavy_neighbors = len( + [ + neighbor + for neighbor in atom.GetNeighbors() + if neighbor.GetAtomicNum() > 1 + ] ) return (-_get_nb_unpaired_electrons(atom)[0], num_heavy_neighbors) @@ -630,16 +659,19 @@ def _infer_bo_and_charges(mol): """ # heavy atoms sorted by number of heavy atom neighbors (lower first) then # NUE (higher first) - atoms = sorted([atom for atom in mol.GetAtoms() - if atom.GetAtomicNum() > 1], - key=_atom_sorter) + atoms = sorted( + [atom for atom in mol.GetAtoms() if atom.GetAtomicNum() > 1], + key=_atom_sorter, + ) for atom in atoms: # monatomic ions if atom.GetDegree() == 0: - atom.SetFormalCharge(MONATOMIC_CATION_CHARGES.get( - atom.GetAtomicNum(), - -_get_nb_unpaired_electrons(atom)[0])) + atom.SetFormalCharge( + MONATOMIC_CATION_CHARGES.get( + atom.GetAtomicNum(), -_get_nb_unpaired_electrons(atom)[0] + ) + ) mol.UpdatePropertyCache(strict=False) continue # get NUE for each possible valence @@ -654,8 +686,11 @@ def _infer_bo_and_charges(mol): if (len(nue) == 1) and (nue[0] <= 0): continue else: - neighbors = sorted(atom.GetNeighbors(), reverse=True, - key=lambda a: _get_nb_unpaired_electrons(a)[0]) + neighbors = sorted( + atom.GetNeighbors(), + reverse=True, + key=lambda a: _get_nb_unpaired_electrons(a)[0], + ) # check if one of the neighbors has a common NUE for na in neighbors: # get NUE for the neighbor @@ -663,13 +698,12 @@ def _infer_bo_and_charges(mol): # smallest common NUE common_nue = min( min([i for i in nue if i >= 0], default=0), - min([i for i in na_nue if i >= 0], default=0) + min([i for i in na_nue if i >= 0], default=0), ) # a common NUE of 0 means we don't need to do anything if common_nue != 0: # increase bond order - bond = mol.GetBondBetweenAtoms( - atom.GetIdx(), na.GetIdx()) + bond = mol.GetBondBetweenAtoms(atom.GetIdx(), na.GetIdx()) order = common_nue + 1 bond.SetBondType(RDBONDORDER[order]) mol.UpdatePropertyCache(strict=False) @@ -843,14 +877,17 @@ def _rebuild_conjugated_bonds(mol, max_iter=200): pattern = Chem.MolFromSmarts("[*-{1-2}]-,=[*+0]=,#[*+0]") # pattern used to finish fixing a series of conjugated bonds base_end_pattern = Chem.MolFromSmarts( - "[*-{1-2}]-,=[*+0]=,#[*+0]-,=[*-{1-2}]") + "[*-{1-2}]-,=[*+0]=,#[*+0]-,=[*-{1-2}]" + ) # used when there's an odd number of matches for `pattern` odd_end_pattern = Chem.MolFromSmarts( "[*-]-[*+0]=[*+0]-[*-,$([#7;X3;v3]),$([#6+0,#7+1]=O)," - "$([S;D4;v4]-[O-])]") + "$([S;D4;v4]-[O-])]" + ) # number of unique matches with the pattern - n_matches = len(set([match[0] - for match in mol.GetSubstructMatches(pattern)])) + n_matches = len( + set([match[0] for match in mol.GetSubstructMatches(pattern)]) + ) # nothing to standardize if n_matches == 0: return @@ -886,21 +923,27 @@ def _rebuild_conjugated_bonds(mol, max_iter=200): ): for neighbor in term_atom.GetNeighbors(): bond = mol.GetBondBetweenAtoms(anion2, neighbor.GetIdx()) - if (neighbor.GetAtomicNum() == 8 and - bond.GetBondTypeAsDouble() == 2): + if ( + neighbor.GetAtomicNum() == 8 + and bond.GetBondTypeAsDouble() == 2 + ): bond.SetBondType(Chem.BondType.SINGLE) neighbor.SetFormalCharge(-1) break # edge-case 2: S=O # [*-]-*=*-[Sv4]-[O-] --> *=*-*=[Sv6]=O # transform -[O-] to =O - elif (term_atom.GetAtomicNum() == 16 and - term_atom.GetFormalCharge() == 0): + elif ( + term_atom.GetAtomicNum() == 16 + and term_atom.GetFormalCharge() == 0 + ): for neighbor in term_atom.GetNeighbors(): bond = mol.GetBondBetweenAtoms(anion2, neighbor.GetIdx()) - if (neighbor.GetAtomicNum() == 8 and - neighbor.GetFormalCharge() == -1 and - bond.GetBondTypeAsDouble() == 1): + if ( + neighbor.GetAtomicNum() == 8 + and neighbor.GetFormalCharge() == -1 + and bond.GetBondTypeAsDouble() == 1 + ): bond.SetBondType(Chem.BondType.DOUBLE) neighbor.SetFormalCharge(0) break @@ -976,5 +1019,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200): return # reached max_iter - warnings.warn("The standardization could not be completed within a " - "reasonable number of iterations") + warnings.warn( + "The standardization could not be completed within a " + "reasonable number of iterations" + ) diff --git a/package/MDAnalysis/converters/RDKitParser.py b/package/MDAnalysis/converters/RDKitParser.py index 24c730ac06..b837ea60a8 100644 --- a/package/MDAnalysis/converters/RDKitParser.py +++ b/package/MDAnalysis/converters/RDKitParser.py @@ -160,7 +160,8 @@ class RDKitParser(TopologyReaderBase): the type attribute get the same values as the element attribute. """ - format = 'RDKIT' + + format = "RDKIT" @staticmethod def _format_hint(thing): @@ -203,19 +204,24 @@ def parse(self, **kwargs): try: atom = mol.GetAtomWithIdx(0) except RuntimeError: - top = Topology(n_atoms=0, n_res=0, n_seg=0, - attrs=None, - atom_resindex=None, - residue_segindex=None) + top = Topology( + n_atoms=0, + n_res=0, + n_seg=0, + attrs=None, + atom_resindex=None, + residue_segindex=None, + ) return top # check if multiple charges present - if atom.HasProp('_GasteigerCharge') and ( - atom.HasProp('_TriposPartialCharge') + if atom.HasProp("_GasteigerCharge") and ( + atom.HasProp("_TriposPartialCharge") ): warnings.warn( - 'Both _GasteigerCharge and _TriposPartialCharge properties ' - 'are present. Using Gasteiger charges by default.') + "Both _GasteigerCharge and _TriposPartialCharge properties " + "are present. Using Gasteiger charges by default." + ) for atom in mol.GetAtoms(): ids.append(atom.GetIdx()) @@ -224,7 +230,7 @@ def parse(self, **kwargs): aromatics.append(atom.GetIsAromatic()) chiralities.append(_rdkit_atom_to_RS(atom)) mi = atom.GetMonomerInfo() - if mi: # atom name and residue info are present + if mi: # atom name and residue info are present names.append(mi.GetName().strip()) resnums.append(mi.GetResidueNumber()) resnames.append(mi.GetResidueName()) @@ -237,23 +243,25 @@ def parse(self, **kwargs): else: # atom name (MOL2 only) try: - names.append(atom.GetProp('_TriposAtomName')) + names.append(atom.GetProp("_TriposAtomName")) except KeyError: pass # atom type (MOL2 only) try: - atomtypes.append(atom.GetProp('_TriposAtomType')) + atomtypes.append(atom.GetProp("_TriposAtomType")) except KeyError: pass # gasteiger charge (computed): # if the user took the time to compute them, make it a priority # over charges read from a MOL2 file try: - charges.append(atom.GetDoubleProp('_GasteigerCharge')) + charges.append(atom.GetDoubleProp("_GasteigerCharge")) except KeyError: # partial charge (MOL2 only) try: - charges.append(atom.GetDoubleProp('_TriposPartialCharge')) + charges.append( + atom.GetDoubleProp("_TriposPartialCharge") + ) except KeyError: pass @@ -277,7 +285,7 @@ def parse(self, **kwargs): (elements, Elements, object), (masses, Masses, np.float32), (aromatics, Aromaticities, bool), - (chiralities, RSChirality, 'U1'), + (chiralities, RSChirality, "U1"), ): attrs.append(Attr(np.array(vals, dtype=dtype))) @@ -312,7 +320,7 @@ def parse(self, **kwargs): if charges: attrs.append(Charges(np.array(charges, dtype=np.float32))) else: - pass # no guesser yet + pass # no guesser yet # PDB only for vals, Attr, dtype in ( @@ -332,7 +340,8 @@ def parse(self, **kwargs): icodes = np.array(icodes, dtype=object) residx, (resnums, resnames, icodes, segids) = change_squash( (resnums, resnames, icodes, segids), - (resnums, resnames, icodes, segids)) + (resnums, resnames, icodes, segids), + ) n_residues = len(resnums) for vals, Attr, dtype in ( (resnums, Resids, np.int32), @@ -354,13 +363,17 @@ def parse(self, **kwargs): attrs.append(Segids(segids)) else: n_segments = 1 - attrs.append(Segids(np.array(['SYSTEM'], dtype=object))) + attrs.append(Segids(np.array(["SYSTEM"], dtype=object))) segidx = None # create topology - top = Topology(n_atoms, n_residues, n_segments, - attrs=attrs, - atom_resindex=residx, - residue_segindex=segidx) + top = Topology( + n_atoms, + n_residues, + n_segments, + attrs=attrs, + atom_resindex=residx, + residue_segindex=segidx, + ) return top diff --git a/package/MDAnalysis/converters/base.py b/package/MDAnalysis/converters/base.py index 234d4f7da4..5f3d0e3250 100644 --- a/package/MDAnalysis/converters/base.py +++ b/package/MDAnalysis/converters/base.py @@ -41,7 +41,7 @@ class _Convertermeta(type): def __init__(cls, name, bases, classdict): type.__init__(type, name, bases, classdict) try: - fmt = asiterable(classdict['lib']) + fmt = asiterable(classdict["lib"]) except KeyError: pass else: @@ -51,8 +51,7 @@ def __init__(cls, name, bases, classdict): class ConverterBase(IOBase, metaclass=_Convertermeta): - """Base class for converting to other libraries. - """ + """Base class for converting to other libraries.""" def __repr__(self): return "<{cls}>".format(cls=self.__class__.__name__) diff --git a/package/pyproject.toml b/package/pyproject.toml index 5853b74a21..7699f7c3b4 100644 --- a/package/pyproject.toml +++ b/package/pyproject.toml @@ -135,6 +135,7 @@ tables\.py | MDAnalysis/visualization/.*\.py | MDAnalysis/lib/.*\.py^ | MDAnalysis/transformations/.*\.py +| MDAnalysis/converters/.*\.py ) ''' extend-exclude = ''' diff --git a/testsuite/MDAnalysisTests/converters/test_base.py b/testsuite/MDAnalysisTests/converters/test_base.py index 99a4deca03..a8bebca0c3 100644 --- a/testsuite/MDAnalysisTests/converters/test_base.py +++ b/testsuite/MDAnalysisTests/converters/test_base.py @@ -28,18 +28,22 @@ def test_coordinate_converterbase_warning(): from MDAnalysis.coordinates.base import ConverterBase import MDAnalysis.converters.base - wmsg = ("ConverterBase moved from coordinates.base." - "ConverterBase to converters.base.ConverterBase " - "and will be removed from coordinates.base " - "in MDAnalysis release 3.0.0") + wmsg = ( + "ConverterBase moved from coordinates.base." + "ConverterBase to converters.base.ConverterBase " + "and will be removed from coordinates.base " + "in MDAnalysis release 3.0.0" + ) with pytest.warns(DeprecationWarning, match=wmsg): + class DerivedConverter(ConverterBase): pass assert issubclass(DerivedConverter, ConverterBase) - assert not issubclass(DerivedConverter, - MDAnalysis.converters.base.ConverterBase) + assert not issubclass( + DerivedConverter, MDAnalysis.converters.base.ConverterBase + ) def test_converters_converterbase_no_warning(): diff --git a/testsuite/MDAnalysisTests/converters/test_openmm.py b/testsuite/MDAnalysisTests/converters/test_openmm.py index 4405547b8c..0983583d56 100644 --- a/testsuite/MDAnalysisTests/converters/test_openmm.py +++ b/testsuite/MDAnalysisTests/converters/test_openmm.py @@ -43,7 +43,7 @@ pytest.skip(allow_module_level=True) -class TestOpenMMBasicSimulationReader(): +class TestOpenMMBasicSimulationReader: @pytest.fixture def omm_sim_uni(self): system = mm.System() @@ -57,7 +57,8 @@ def omm_sim_uni(self): positions = np.ones((5, 3)) * unit.angstrom integrator = mm.LangevinIntegrator( 273 * unit.kelvin, - 1.0 / unit.picoseconds, 2.0 * unit.femtoseconds, + 1.0 / unit.picoseconds, + 2.0 * unit.femtoseconds, ) simulation = app.Simulation(topology, system, integrator) simulation.context.setPositions(positions) @@ -67,11 +68,13 @@ def omm_sim_uni(self): def test_dimensions(self, omm_sim_uni): assert_allclose( omm_sim_uni.trajectory.ts.dimensions, - np.array([20., 20., 20., 90., 90., 90.]), + np.array([20.0, 20.0, 20.0, 90.0, 90.0, 90.0]), rtol=0, atol=1e-3, - err_msg=("OpenMMBasicSimulationReader failed to get unitcell " - "dimensions from OpenMM Simulation Object"), + err_msg=( + "OpenMMBasicSimulationReader failed to get unitcell " + "dimensions from OpenMM Simulation Object" + ), ) def test_coordinates(self, omm_sim_uni): @@ -84,7 +87,7 @@ def test_basic_topology(self, omm_sim_uni): assert omm_sim_uni.residues.n_residues == 1 assert omm_sim_uni.residues.resnames[0] == "RES" assert omm_sim_uni.segments.n_segments == 1 - assert omm_sim_uni.segments.segids[0] == '0' + assert omm_sim_uni.segments.segids[0] == "0" assert len(omm_sim_uni.bonds.indices) == 0 def test_data(self, omm_sim_uni): @@ -107,8 +110,10 @@ def test_dimensions(self): self.ref.trajectory.ts.dimensions, rtol=0, atol=1e-3, - err_msg=("OpenMMPDBFileReader failed to get unitcell dimensions " - "from OpenMMPDBFile"), + err_msg=( + "OpenMMPDBFileReader failed to get unitcell dimensions " + "from OpenMMPDBFile" + ), ) def test_coordinates(self): @@ -133,8 +138,10 @@ def test_dimensions(self): self.ref.trajectory.ts.dimensions, rtol=0, atol=1e-3, - err_msg=("OpenMMModellerReader failed to get unitcell dimensions " - "from OpenMMModeller"), + err_msg=( + "OpenMMModellerReader failed to get unitcell dimensions " + "from OpenMMModeller" + ), ) def test_coordinates(self): @@ -167,8 +174,10 @@ def test_dimensions(self): self.ref.trajectory.ts.dimensions, rtol=0, atol=1e-3, - err_msg=("OpenMMSimulationReader failed to get unitcell " - "dimensions from OpenMMSimulation"), + err_msg=( + "OpenMMSimulationReader failed to get unitcell " + "dimensions from OpenMMSimulation" + ), ) def test_coordinates(self): @@ -176,7 +185,7 @@ def test_coordinates(self): rp = self.ref.atoms.positions assert_allclose(up, rp, rtol=0, atol=1e-3) - @pytest.mark.xfail(reason='OpenMM pickling not supported yet') + @pytest.mark.xfail(reason="OpenMM pickling not supported yet") def test_pickle_singleframe_reader(self): """ See `OpenMM SwigPyObject serialisation discussion `_ diff --git a/testsuite/MDAnalysisTests/converters/test_openmm_parser.py b/testsuite/MDAnalysisTests/converters/test_openmm_parser.py index 12fdbd4857..7188499fd8 100644 --- a/testsuite/MDAnalysisTests/converters/test_openmm_parser.py +++ b/testsuite/MDAnalysisTests/converters/test_openmm_parser.py @@ -53,7 +53,7 @@ class OpenMMTopologyBase(ParserBase): "bonds", "chainIDs", "elements", - "types" + "types", ] expected_n_bonds = 0 @@ -113,12 +113,12 @@ def test_segids(self, top): assert top.segids.values == [] def test_elements(self, top): - if 'elements' in self.expected_attrs: + if "elements" in self.expected_attrs: assert len(top.elements.values) == self.expected_n_atoms assert isinstance(top.elements.values, np.ndarray) assert all(isinstance(elem, str) for elem in top.elements.values) else: - assert not hasattr(top, 'elements') + assert not hasattr(top, "elements") def test_atomtypes(self, top): assert len(top.types.values) == self.expected_n_atoms @@ -131,15 +131,17 @@ def test_masses(self, top): assert len(top.masses.values) == self.expected_n_atoms if self.expected_n_atoms: assert isinstance(top.masses.values, np.ndarray) - assert all(isinstance(mass, np.float64) - for mass in top.masses.values) + assert all( + isinstance(mass, np.float64) for mass in top.masses.values + ) else: assert top.masses.values == [] def test_guessed_attributes(self, filename): u = mda.Universe(filename, topology_format="OPENMMTOPOLOGY") - u_guessed_attrs = [attr.attrname for attr - in u._topology.guessed_attributes] + u_guessed_attrs = [ + attr.attrname for attr in u._topology.guessed_attributes + ] for attr in self.guessed_attrs: assert hasattr(u.atoms, attr) assert attr in u_guessed_attrs @@ -156,7 +158,7 @@ class OpenMMAppTopologyBase(OpenMMTopologyBase): "bonds", "chainIDs", "elements", - "types" + "types", ] expected_n_bonds = 0 @@ -193,8 +195,10 @@ class TestOpenMMTopologyParserWithPartialElements(OpenMMTopologyBase): expected_n_bonds = 25533 def test_with_partial_elements(self): - wmsg1 = ("Element information missing for some atoms. " - "These have been given an empty element record ") + wmsg1 = ( + "Element information missing for some atoms. " + "These have been given an empty element record " + ) wmsg2 = ( "For absent elements, atomtype has been " @@ -202,14 +206,15 @@ def test_with_partial_elements(self): "If needed these can be guessed using " "universe.guess_TopologyAttrs(to_guess=['masses', 'types']). " "(for MDAnalysis version 2.x this is done automatically," - " but it will be removed in 3.0).") + " but it will be removed in 3.0)." + ) with pytest.warns(UserWarning) as warnings: mda_top = self.parser(self.ref_filename).parse() - assert mda_top.types.values[3344] == 'X' - assert mda_top.types.values[3388] == 'X' - assert mda_top.elements.values[3344] == '' - assert mda_top.elements.values[3388] == '' + assert mda_top.types.values[3344] == "X" + assert mda_top.types.values[3388] == "X" + assert mda_top.elements.values[3344] == "" + assert mda_top.elements.values[3388] == "" assert mda_top.masses.values[3344] == 0.0 assert mda_top.masses.values[3388] == 0.0 @@ -233,7 +238,8 @@ def test_no_elements_warn(): "but it will be removed in MDAnalysis v3.0. " "These can be guessed using " "universe.guess_TopologyAttrs(to_guess=['masses', 'types']) " - "See MDAnalysis.guessers.") + "See MDAnalysis.guessers." + ) with pytest.warns(UserWarning) as warnings: mda_top = parser(omm_top).parse() @@ -243,26 +249,26 @@ def test_no_elements_warn(): def test_invalid_element_symbols(): parser = mda.converters.OpenMMParser.OpenMMTopologyParser omm_top = Topology() - bad1 = Element(0, "*", "*", 0*daltons) - bad2 = Element(0, "?", "?", 6*daltons) + bad1 = Element(0, "*", "*", 0 * daltons) + bad2 = Element(0, "?", "?", 6 * daltons) bad3 = None - silver = Element.getBySymbol('Ag') + silver = Element.getBySymbol("Ag") chain = omm_top.addChain() - res = omm_top.addResidue('R', chain) - omm_top.addAtom(name='Ag', element=silver, residue=res) - omm_top.addAtom(name='Bad1', element=bad1, residue=res) - omm_top.addAtom(name='Bad2', element=bad2, residue=res) - omm_top.addAtom(name='Bad3', element=bad3, residue=res) + res = omm_top.addResidue("R", chain) + omm_top.addAtom(name="Ag", element=silver, residue=res) + omm_top.addAtom(name="Bad1", element=bad1, residue=res) + omm_top.addAtom(name="Bad2", element=bad2, residue=res) + omm_top.addAtom(name="Bad3", element=bad3, residue=res) mda_top = parser(omm_top).parse() - assert mda_top.types.values[0] == 'Ag' - assert mda_top.types.values[1] == '*' - assert mda_top.types.values[2] == '?' - assert mda_top.types.values[3] == 'X' - assert mda_top.elements.values[0] == 'Ag' - assert mda_top.elements.values[1] == '' - assert mda_top.elements.values[2] == '' - assert mda_top.elements.values[3] == '' + assert mda_top.types.values[0] == "Ag" + assert mda_top.types.values[1] == "*" + assert mda_top.types.values[2] == "?" + assert mda_top.types.values[3] == "X" + assert mda_top.elements.values[0] == "Ag" + assert mda_top.elements.values[1] == "" + assert mda_top.elements.values[2] == "" + assert mda_top.elements.values[3] == "" assert mda_top.masses.values[0] == 107.86822 assert mda_top.masses.values[1] == 0.0 assert mda_top.masses.values[2] == 6.0 diff --git a/testsuite/MDAnalysisTests/converters/test_parmed.py b/testsuite/MDAnalysisTests/converters/test_parmed.py index e0503bd9da..337495cf8f 100644 --- a/testsuite/MDAnalysisTests/converters/test_parmed.py +++ b/testsuite/MDAnalysisTests/converters/test_parmed.py @@ -24,7 +24,7 @@ import MDAnalysis as mda import numpy as np -from numpy.testing import (assert_allclose, assert_equal) +from numpy.testing import assert_allclose, assert_equal from numpy.lib import NumpyVersion from MDAnalysisTests.coordinates.base import _SingleFrameReader @@ -46,10 +46,9 @@ # TODO: remove this guard when parmed has a release # that support NumPy 2 if NumpyVersion(np.__version__) < "2.0.0": - pmd = pytest.importorskip('parmed') + pmd = pytest.importorskip("parmed") else: - pmd = pytest.importorskip('parmed_skip_with_numpy2') - + pmd = pytest.importorskip("parmed_skip_with_numpy2") class TestParmEdReaderGRO: @@ -60,18 +59,20 @@ class TestParmEdReaderGRO: def test_dimensions(self): assert_allclose( - self.universe.trajectory.ts.dimensions, + self.universe.trajectory.ts.dimensions, self.ref.trajectory.ts.dimensions, rtol=0, atol=1e-3, - err_msg=("ParmEdReader failed to get unitcell dimensions " - "from ParmEd")) - + err_msg=( + "ParmEdReader failed to get unitcell dimensions " "from ParmEd" + ), + ) + def test_coordinates(self): up = self.universe.atoms.positions rp = self.ref.atoms.positions assert_allclose(up, rp, rtol=0, atol=1e-3) - + class BaseTestParmEdReader(_SingleFrameReader): def setUp(self): @@ -80,27 +81,31 @@ def setUp(self): def test_dimensions(self): assert_allclose( - self.universe.trajectory.ts.dimensions, + self.universe.trajectory.ts.dimensions, self.ref.trajectory.ts.dimensions, rtol=0, atol=1e-3, - err_msg=("ParmEdReader failed to get unitcell dimensions " - "from ParmEd")) - + err_msg=( + "ParmEdReader failed to get unitcell dimensions " "from ParmEd" + ), + ) + def test_coordinates(self): up = self.universe.atoms.positions rp = self.ref.atoms.positions assert_allclose(up, rp, rtol=0, atol=1e-3) - + class TestParmEdReaderPDB(BaseTestParmEdReader): ref_filename = RefAdKSmall.filename - + def test_uses_ParmEdReader(self): from MDAnalysis.coordinates.ParmEd import ParmEdReader - assert isinstance(self.universe.trajectory, ParmEdReader), "failed to choose ParmEdReader" + assert isinstance( + self.universe.trajectory, ParmEdReader + ), "failed to choose ParmEdReader" def _parmed_param_eq(a, b): @@ -108,52 +113,66 @@ def _parmed_param_eq(a, b): b_idx = [b.atom1.idx, b.atom2.idx] for i in (3, 4, 5): - atom = 'atom{}'.format(i) + atom = "atom{}".format(i) if hasattr(a, atom): if not hasattr(b, atom): return False a_idx.append(getattr(a, atom).idx) b_idx.append(getattr(b, atom).idx) - + atoms = a_idx == b_idx or a_idx == b_idx[::-1] return atoms and a.type == b.type class BaseTestParmEdConverter: - equal_atom_attrs = ('name', 'altloc') - almost_equal_atom_attrs = ('mass', 'charge', 'occupancy') - expected_attrs = ('atoms', 'bonds', 'angles', 'dihedrals', 'impropers', - 'cmaps', 'urey_bradleys') - - - @pytest.fixture(scope='class') + equal_atom_attrs = ("name", "altloc") + almost_equal_atom_attrs = ("mass", "charge", "occupancy") + expected_attrs = ( + "atoms", + "bonds", + "angles", + "dihedrals", + "impropers", + "cmaps", + "urey_bradleys", + ) + + @pytest.fixture(scope="class") def ref(self): # skip_bonds controls whether to search for bonds if it's not in the file - return pmd.load_file(self.ref_filename, skip_bonds=True) + return pmd.load_file(self.ref_filename, skip_bonds=True) - @pytest.fixture(scope='class') + @pytest.fixture(scope="class") def universe(self, ref): return mda.Universe(self.ref_filename) - @pytest.fixture(scope='class') + @pytest.fixture(scope="class") def output(self, universe): - return universe.atoms.convert_to('PARMED') + return universe.atoms.convert_to("PARMED") - @pytest.fixture(scope='class') + @pytest.fixture(scope="class") def roundtrip(self, ref): u = mda.Universe(ref) - return u.atoms.convert_to('PARMED') + return u.atoms.convert_to("PARMED") def test_equivalent_connectivity_counts(self, universe, output): - for attr in ('atoms', 'bonds', 'angles', 'dihedrals', 'impropers', - 'cmaps', 'urey_bradleys'): + for attr in ( + "atoms", + "bonds", + "angles", + "dihedrals", + "impropers", + "cmaps", + "urey_bradleys", + ): u = getattr(universe, attr, []) o = getattr(output, attr) assert len(u) == len(o) - - @pytest.mark.parametrize('attr', ('bonds', 'angles', 'dihedrals', 'impropers', - 'cmaps')) + + @pytest.mark.parametrize( + "attr", ("bonds", "angles", "dihedrals", "impropers", "cmaps") + ) def test_equivalent_connectivity_values(self, universe, output, attr): u = getattr(universe._topology, attr, []) vals = u.values if u else [] @@ -190,17 +209,25 @@ def test_equivalent_atoms(self, ref, output): for attr in self.equal_atom_attrs: ra = getattr(r, attr) oa = getattr(o, attr) - assert ra == oa, 'atom {} not equal for atoms {} and {}'.format(attr, r, o) - + assert ( + ra == oa + ), "atom {} not equal for atoms {} and {}".format(attr, r, o) + for attr in self.almost_equal_atom_attrs: ra = getattr(r, attr) oa = getattr(o, attr) - assert_allclose(ra, oa, rtol=0, atol=1e-2, - err_msg=(f'atom {attr} not almost equal for atoms ' - f'{r} and {o}')) - - @pytest.mark.parametrize('attr', ('bonds', 'angles', 'impropers', - 'cmaps')) + assert_allclose( + ra, + oa, + rtol=0, + atol=1e-2, + err_msg=( + f"atom {attr} not almost equal for atoms " + f"{r} and {o}" + ), + ) + + @pytest.mark.parametrize("attr", ("bonds", "angles", "impropers", "cmaps")) def test_equivalent_connectivity_types(self, ref, roundtrip, attr): original = getattr(ref, attr) for p in getattr(roundtrip, attr): @@ -211,9 +238,14 @@ def test_equivalent_connectivity_types(self, ref, roundtrip, attr): def test_equivalent_dihedrals(self, ref, roundtrip): original = ref.dihedrals for p in roundtrip.dihedrals: - assert any((_parmed_param_eq(p, q) and - p.improper == q.improper and - p.ignore_end == q.ignore_end) for q in original) + assert any( + ( + _parmed_param_eq(p, q) + and p.improper == q.improper + and p.ignore_end == q.ignore_end + ) + for q in original + ) def test_missing_attr(self): n_atoms = 10 @@ -221,9 +253,10 @@ def test_missing_attr(self): u.add_TopologyAttr("resid", [1]) u.add_TopologyAttr("segid", ["DUM"]) u.add_TopologyAttr("mass", [1] * n_atoms) - with pytest.warns(UserWarning, - match="Supplied AtomGroup was missing the following " - "attributes"): + with pytest.warns( + UserWarning, + match="Supplied AtomGroup was missing the following " "attributes", + ): # should miss names and resnames u.atoms.convert_to("PARMED") @@ -234,29 +267,36 @@ class BaseTestParmEdConverterSubset(BaseTestParmEdConverter): end_i = 0 skip_i = 1 - @pytest.fixture(scope='class') + @pytest.fixture(scope="class") def ref(self): # skip_bonds controls whether to search for bonds if it's not in the file struct = pmd.load_file(self.ref_filename, skip_bonds=True) - return struct[self.start_i:self.end_i:self.skip_i] + return struct[self.start_i : self.end_i : self.skip_i] - @pytest.fixture(scope='class') + @pytest.fixture(scope="class") def universe(self): u = mda.Universe(self.ref_filename) - return mda.Merge(u.atoms[self.start_i:self.end_i:self.skip_i]) + return mda.Merge(u.atoms[self.start_i : self.end_i : self.skip_i]) class BaseTestParmEdConverterFromParmed(BaseTestParmEdConverter): - equal_atom_attrs = ('name', 'number', 'altloc') + equal_atom_attrs = ("name", "number", "altloc") - @pytest.fixture(scope='class') + @pytest.fixture(scope="class") def universe(self, ref): return mda.Universe(ref) def test_equivalent_connectivity_counts(self, ref, output): - for attr in ('atoms', 'bonds', 'angles', 'dihedrals', 'impropers', - 'cmaps', 'urey_bradleys'): + for attr in ( + "atoms", + "bonds", + "angles", + "dihedrals", + "impropers", + "cmaps", + "urey_bradleys", + ): r = getattr(ref, attr) o = getattr(output, attr) assert len(r) == len(o) @@ -292,14 +332,16 @@ class TestParmEdConverterGROSubset(BaseTestParmEdConverterSubset): start_i = 5 end_i = 100 + # TODO: Add Subset test for PRMs when mda.Merge accepts Universes without positions + class TestParmEdConverterPDB(BaseTestParmEdConverter): ref_filename = PDB_small - # Neither MDAnalysis nor ParmEd read the mass column + # Neither MDAnalysis nor ParmEd read the mass column # of PDBs and are liable to guess wrong - almost_equal_atom_attrs = ('charge', 'occupancy') + almost_equal_atom_attrs = ("charge", "occupancy") def test_equivalent_coordinates(self, ref, output): assert_allclose(ref.coordinates, output.coordinates, rtol=0, atol=1e-3) diff --git a/testsuite/MDAnalysisTests/converters/test_parmed_parser.py b/testsuite/MDAnalysisTests/converters/test_parmed_parser.py index ea73e8dc00..1a11876fd4 100644 --- a/testsuite/MDAnalysisTests/converters/test_parmed_parser.py +++ b/testsuite/MDAnalysisTests/converters/test_parmed_parser.py @@ -26,25 +26,42 @@ import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase -from MDAnalysisTests.datafiles import ( - PSF_NAMD_GBIS, - PRM -) +from MDAnalysisTests.datafiles import PSF_NAMD_GBIS, PRM -pmd = pytest.importorskip('parmed') +pmd = pytest.importorskip("parmed") class BaseTestParmedParser(ParserBase): parser = mda.converters.ParmEdParser.ParmEdParser - expected_attrs = ['ids', 'names', 'types', 'masses', - 'charges', 'altLocs', 'occupancies', - 'tempfactors', 'gbscreens', 'solventradii', - 'nbindices', 'rmins', 'epsilons', 'rmin14s', - 'epsilon14s', 'elements', 'chainIDs', - 'resids', 'resnames', 'resnums', - 'segids', - 'bonds', 'ureybradleys', 'angles', - 'dihedrals', 'impropers', 'cmaps'] + expected_attrs = [ + "ids", + "names", + "types", + "masses", + "charges", + "altLocs", + "occupancies", + "tempfactors", + "gbscreens", + "solventradii", + "nbindices", + "rmins", + "epsilons", + "rmin14s", + "epsilon14s", + "elements", + "chainIDs", + "resids", + "resnames", + "resnums", + "segids", + "bonds", + "ureybradleys", + "angles", + "dihedrals", + "impropers", + "cmaps", + ] expected_n_atoms = 0 expected_n_residues = 1 @@ -69,40 +86,61 @@ def test_creates_universe(self, filename): assert isinstance(u, mda.Universe) def test_bonds_total_counts(self, top, filename): - unique = set([(a.atom1.idx, a.atom2.idx) - for a in filename.bonds]) + unique = set([(a.atom1.idx, a.atom2.idx) for a in filename.bonds]) assert len(top.bonds.values) == len(unique) def test_angles_total_counts(self, top, filename): - unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx) - for a in filename.angles]) + unique = set( + [(a.atom1.idx, a.atom2.idx, a.atom3.idx) for a in filename.angles] + ) assert len(top.angles.values) == len(unique) def test_dihedrals_total_counts(self, top, filename): - unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx, a.atom4.idx) - for a in filename.dihedrals]) + unique = set( + [ + (a.atom1.idx, a.atom2.idx, a.atom3.idx, a.atom4.idx) + for a in filename.dihedrals + ] + ) assert len(top.dihedrals.values) == len(unique) def test_impropers_total_counts(self, top, filename): - unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx, a.atom4.idx) - for a in filename.impropers]) + unique = set( + [ + (a.atom1.idx, a.atom2.idx, a.atom3.idx, a.atom4.idx) + for a in filename.impropers + ] + ) assert len(top.impropers.values) == len(unique) def test_cmaps_total_counts(self, top, filename): - unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx, - a.atom4.idx, a.atom5.idx) - for a in filename.cmaps]) + unique = set( + [ + ( + a.atom1.idx, + a.atom2.idx, + a.atom3.idx, + a.atom4.idx, + a.atom5.idx, + ) + for a in filename.cmaps + ] + ) assert len(top.cmaps.values) == len(unique) def test_ureybradleys_total_counts(self, top, filename): - unique = set([(a.atom1.idx, a.atom2.idx) - for a in filename.urey_bradleys]) + unique = set( + [(a.atom1.idx, a.atom2.idx) for a in filename.urey_bradleys] + ) assert len(top.ureybradleys.values) == len(unique) def test_elements(self, top): for erange, evals in zip(self.elems_ranges, self.expected_elems): - assert_equal(top.elements.values[erange[0]:erange[1]], evals, - "unexpected element match") + assert_equal( + top.elements.values[erange[0] : erange[1]], + evals, + "unexpected element match", + ) class TestParmedParserPSF(BaseTestParmedParser): @@ -121,20 +159,21 @@ class TestParmedParserPSF(BaseTestParmedParser): expected_n_cmaps = 212 elems_ranges = ((100, 120),) # No atomic numbers set by parmed == no elements - expected_elems = (np.array( - ['N', 'H', 'C', 'H', 'C', 'H', 'H', 'C', 'H', 'C', 'H', 'H', 'H', 'C', - 'H', 'H', 'H', 'C', 'O', 'N',], dtype=object),) + expected_elems = (np.array(list("NHCHCHHCHCHHHCHHHCON"), dtype=object),) def test_bonds_atom_counts(self, universe): assert len(universe.atoms[[0]].bonds) == 4 assert len(universe.atoms[[42]].bonds) == 1 - @pytest.mark.parametrize('value', ( - (0, 1), - (0, 2), - (0, 3), - (0, 4), - )) + @pytest.mark.parametrize( + "value", + ( + (0, 1), + (0, 2), + (0, 3), + (0, 4), + ), + ) def test_bonds_identity(self, top, value): vals = top.bonds.values assert value in vals or value[::-1] in vals @@ -150,11 +189,14 @@ def test_angles_atom_counts(self, universe): assert len(universe.atoms[[0]].angles), 9 assert len(universe.atoms[[42]].angles), 2 - @pytest.mark.parametrize('value', ( - (1, 0, 2), - (1, 0, 3), - (1, 0, 4), - )) + @pytest.mark.parametrize( + "value", + ( + (1, 0, 2), + (1, 0, 3), + (1, 0, 4), + ), + ) def test_angles_identity(self, top, value): vals = top.angles.values assert value in vals or value[::-1] in vals @@ -162,20 +204,22 @@ def test_angles_identity(self, top, value): def test_dihedrals_atom_counts(self, universe): assert len(universe.atoms[[0]].dihedrals) == 14 - @pytest.mark.parametrize('value', ( - (0, 4, 6, 7), - (0, 4, 6, 8), - (0, 4, 6, 9), - (0, 4, 17, 18), - )) + @pytest.mark.parametrize( + "value", + ( + (0, 4, 6, 7), + (0, 4, 6, 8), + (0, 4, 6, 9), + (0, 4, 17, 18), + ), + ) def test_dihedrals_identity(self, top, value): vals = top.dihedrals.values assert value in vals or value[::-1] in vals - @pytest.mark.parametrize('value', ( - (17, 19, 21, 41, 43), - (60, 62, 64, 79, 81) - )) + @pytest.mark.parametrize( + "value", ((17, 19, 21, 41, 43), (60, 62, 64, 79, 81)) + ) def test_cmaps_identity(self, top, value): vals = top.cmaps.values assert value in vals or value[::-1] in vals @@ -191,21 +235,24 @@ class TestParmedParserPRM(BaseTestParmedParser): expected_n_atoms = 252 expected_n_residues = 14 elems_ranges = ((0, 8), (30, 38)) - expected_elems = (np.array(['N', 'H', 'H', 'H', 'C', 'H', 'C', 'H'], - dtype=object), - np.array(['H', 'C', 'H', 'H', 'C', 'C', 'H', 'C'], - dtype=object)) + expected_elems = ( + np.array(["N", "H", "H", "H", "C", "H", "C", "H"], dtype=object), + np.array(["H", "C", "H", "H", "C", "C", "H", "C"], dtype=object), + ) def test_bonds_atom_counts(self, universe): assert len(universe.atoms[[0]].bonds) == 4 assert len(universe.atoms[[42]].bonds) == 1 - @pytest.mark.parametrize('value', ( - (10, 11), - (10, 12), - (4, 6), - (4, 10), - )) + @pytest.mark.parametrize( + "value", + ( + (10, 11), + (10, 12), + (4, 6), + (4, 10), + ), + ) def test_bonds_identity(self, top, value): vals = top.bonds.values assert value in vals or value[::-1] in vals @@ -222,12 +269,15 @@ def test_angles_atom_counts(self, universe): assert len(universe.atoms[[0]].angles), 9 assert len(universe.atoms[[42]].angles), 2 - @pytest.mark.parametrize('value', ( - (11, 10, 12), - (10, 12, 14), - (6, 4, 10), - (4, 10, 11), - )) + @pytest.mark.parametrize( + "value", + ( + (11, 10, 12), + (10, 12, 14), + (6, 4, 10), + (4, 10, 11), + ), + ) def test_angles_identity(self, top, value): vals = top.angles.values assert value in vals or value[::-1] in vals @@ -235,25 +285,29 @@ def test_angles_identity(self, top, value): def test_dihedrals_atom_counts(self, universe): assert len(universe.atoms[[0]].dihedrals) == 14 - @pytest.mark.parametrize('value', ( - (11, 10, 12, 14), - (10, 12, 14, 16), - )) + @pytest.mark.parametrize( + "value", + ( + (11, 10, 12, 14), + (10, 12, 14, 16), + ), + ) def test_dihedrals_identity(self, top, value): vals = top.dihedrals.values assert value in vals or value[::-1] in vals def test_dihedral_types(self, universe): ag = universe.atoms[[10, 12, 14, 16]] - dih = universe.dihedrals.atomgroup_intersection(ag, - strict=True)[0] + dih = universe.dihedrals.atomgroup_intersection(ag, strict=True)[0] assert len(dih.type) == 4 - for i, (phi_k, per) in enumerate(( - (2.0, 1), - (2.0, 2), - (0.4, 3), - (0.0, 4), - )): + for i, (phi_k, per) in enumerate( + ( + (2.0, 1), + (2.0, 2), + (0.4, 3), + (0.0, 4), + ) + ): assert dih.type[i].type.phi_k == phi_k assert dih.type[i].type.per == per diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py index 1d56c4c5f6..a455e8ddea 100644 --- a/testsuite/MDAnalysisTests/converters/test_rdkit.py +++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py @@ -35,30 +35,35 @@ from numpy.testing import assert_allclose, assert_equal try: - from MDAnalysis.converters.RDKit import (RDATTRIBUTES, - _add_mda_attr_to_rdkit, - _atom_sorter, - _infer_bo_and_charges, - _rebuild_conjugated_bonds, - _set_atom_property, - _standardize_patterns) + from MDAnalysis.converters.RDKit import ( + RDATTRIBUTES, + _add_mda_attr_to_rdkit, + _atom_sorter, + _infer_bo_and_charges, + _rebuild_conjugated_bonds, + _set_atom_property, + _standardize_patterns, + ) from rdkit import Chem from rdkit.Chem import AllChem except ImportError: pass -requires_rdkit = pytest.mark.skipif(import_not_available("rdkit"), - reason="requires RDKit") +requires_rdkit = pytest.mark.skipif( + import_not_available("rdkit"), reason="requires RDKit" +) -@pytest.mark.skipif(not import_not_available("rdkit"), - reason="only for min dependencies build") +@pytest.mark.skipif( + not import_not_available("rdkit"), reason="only for min dependencies build" +) class TestRequiresRDKit(object): def test_converter_requires_rdkit(self): u = mda.Universe(PDB_full) - with pytest.raises(ImportError, - match="RDKit is required for the RDKitConverter"): + with pytest.raises( + ImportError, match="RDKit is required for the RDKitConverter" + ): u.atoms.convert_to("RDKIT") @@ -104,22 +109,28 @@ def product(request): def is_isomorphic(mol, ref, useChirality=False): - return (mol.HasSubstructMatch(ref, useChirality=useChirality) - and ref.HasSubstructMatch(mol, useChirality=useChirality)) + return mol.HasSubstructMatch( + ref, useChirality=useChirality + ) and ref.HasSubstructMatch(mol, useChirality=useChirality) @requires_rdkit class TestRDKitReader(object): - @pytest.mark.parametrize("rdmol, n_frames", [ - ("mol2_mol", 1), - ("smiles_mol", 3), - ], indirect=["rdmol"]) + @pytest.mark.parametrize( + "rdmol, n_frames", + [ + ("mol2_mol", 1), + ("smiles_mol", 3), + ], + indirect=["rdmol"], + ) def test_coordinates(self, rdmol, n_frames): universe = mda.Universe(rdmol) assert universe.trajectory.n_frames == n_frames - expected = np.array([ - conf.GetPositions() for conf in rdmol.GetConformers()], - dtype=np.float32) + expected = np.array( + [conf.GetPositions() for conf in rdmol.GetConformers()], + dtype=np.float32, + ) assert_equal(expected, universe.trajectory.coordinate_array) def test_no_coordinates(self): @@ -133,8 +144,9 @@ def test_compare_mol2reader(self): universe = mda.Universe(MolFactory.mol2_mol()) mol2 = mda.Universe(mol2_molecule) assert universe.trajectory.n_frames == mol2.trajectory.n_frames - assert_equal(universe.trajectory.ts.positions, - mol2.trajectory.ts.positions) + assert_equal( + universe.trajectory.ts.positions, mol2.trajectory.ts.positions + ) @requires_rdkit @@ -148,16 +160,18 @@ def mol2(self): u = mda.Universe(mol2_molecule) # add elements guesser = DefaultGuesser(None) - elements = np.array([guesser.guess_atom_element(x) for x in u.atoms.types], - dtype=object) - u.add_TopologyAttr('elements', elements) + elements = np.array( + [guesser.guess_atom_element(x) for x in u.atoms.types], + dtype=object, + ) + u.add_TopologyAttr("elements", elements) return u @pytest.fixture def peptide(self): u = mda.Universe(GRO) elements = mda.guesser.DefaultGuesser(None).guess_types(u.atoms.names) - u.add_TopologyAttr('elements', elements) + u.add_TopologyAttr("elements", elements) return u.select_atoms("resid 2-12") @pytest.fixture @@ -171,28 +185,35 @@ def test_no_atoms_attr(self): @pytest.mark.parametrize("smi", ["[H]", "C", "O", "[He]"]) def test_single_atom_mol(self, smi): - u = mda.Universe.from_smiles(smi, addHs=False, - generate_coordinates=False) + u = mda.Universe.from_smiles( + smi, addHs=False, generate_coordinates=False + ) mol = u.atoms.convert_to.rdkit(NoImplicit=False) assert mol.GetNumAtoms() == 1 assert mol.GetAtomWithIdx(0).GetSymbol() == smi.strip("[]") - @pytest.mark.parametrize("resname, n_atoms, n_fragments", [ - ("PRO", 14, 1), - ("ILE", 38, 1), - ("ALA", 20, 2), - ("GLY", 21, 3), - ]) + @pytest.mark.parametrize( + "resname, n_atoms, n_fragments", + [ + ("PRO", 14, 1), + ("ILE", 38, 1), + ("ALA", 20, 2), + ("GLY", 21, 3), + ], + ) def test_mol_from_selection(self, peptide, resname, n_atoms, n_fragments): mol = peptide.select_atoms("resname %s" % resname).convert_to("RDKIT") assert n_atoms == mol.GetNumAtoms() assert n_fragments == len(Chem.GetMolFrags(mol)) - @pytest.mark.parametrize("sel_str, atom_index", [ - ("resid 1", 0), - ("resname LYS and name NZ", 1), - ("resid 34 and altloc B", 2), - ]) + @pytest.mark.parametrize( + "sel_str, atom_index", + [ + ("resid 1", 0), + ("resname LYS and name NZ", 1), + ("resid 34 and altloc B", 2), + ], + ) def test_monomer_info(self, pdb, sel_str, atom_index): sel = pdb.select_atoms(sel_str) mda_atom = sel.atoms[atom_index] @@ -209,8 +230,9 @@ def test_monomer_info(self, pdb, sel_str, atom_index): assert mda_atom.segindex == mi.GetSegmentNumber() assert mda_atom.tempfactor == mi.GetTempFactor() - @pytest.mark.parametrize("rdmol", ["mol2_mol", "smiles_mol"], - indirect=True) + @pytest.mark.parametrize( + "rdmol", ["mol2_mol", "smiles_mol"], indirect=True + ) def test_identical_topology(self, rdmol): u = mda.Universe(rdmol) umol = u.atoms.convert_to("RDKIT") @@ -220,27 +242,26 @@ def test_identical_topology(self, rdmol): assert_equal(u.atoms.elements, u2.atoms.elements) assert_equal(u.atoms.names, u2.atoms.names) assert_allclose( - u.atoms.positions, - u2.atoms.positions, - rtol=0, - atol=1e-7) + u.atoms.positions, u2.atoms.positions, rtol=0, atol=1e-7 + ) def test_raise_requires_elements(self): u = mda.Universe(mol2_molecule) # Delete topology attribute (PR #3069) - u.del_TopologyAttr('elements') + u.del_TopologyAttr("elements") with pytest.raises( AttributeError, - match="`elements` attribute is required for the RDKitConverter" + match="`elements` attribute is required for the RDKitConverter", ): u.atoms.convert_to("RDKIT") def test_warn_guess_bonds(self): u = mda.Universe(PDB_helix) - with pytest.warns(UserWarning, - match="No `bonds` attribute in this AtomGroup"): + with pytest.warns( + UserWarning, match="No `bonds` attribute in this AtomGroup" + ): u.atoms.convert_to("RDKIT") def test_bonds_outside_sel(self): @@ -249,9 +270,10 @@ def test_bonds_outside_sel(self): ag.convert_to.rdkit(NoImplicit=False) def test_error_no_hydrogen(self, uo2): - with pytest.raises(AttributeError, - match="the converter requires all hydrogens to be " - "explicit"): + with pytest.raises( + AttributeError, + match="the converter requires all hydrogens to be " "explicit", + ): uo2.atoms.convert_to("RDKIT") def test_error_no_hydrogen_implicit(self, uo2): @@ -260,28 +282,32 @@ def test_error_no_hydrogen_implicit(self, uo2): uo2.atoms.convert_to.rdkit(NoImplicit=False) def test_warning_no_hydrogen_force(self, uo2): - with pytest.warns(UserWarning, - match="Forcing to continue the conversion"): + with pytest.warns( + UserWarning, match="Forcing to continue the conversion" + ): uo2.atoms.convert_to.rdkit(NoImplicit=False, force=True) - @pytest.mark.parametrize("attr, value, expected", [ - ("names", "N", " N "), - ("names", "CA", " CA "), - ("names", "CAT", " CAT"), - ("names", "N1", " N1 "), - ("names", "CE2", " CE2"), - ("names", "C12", " C12"), - ("names", "HD12", "HD12"), - ("names", "C123", "C123"), - ("altLocs", "A", "A"), - ("chainIDs", "B", "B"), - ("icodes", "C", "C"), - ("occupancies", 0.5, 0.5), - ("resnames", "LIG", "LIG"), - ("resids", 123, 123), - ("segindices", 1, 1), - ("tempfactors", 0.8, 0.8), - ]) + @pytest.mark.parametrize( + "attr, value, expected", + [ + ("names", "N", " N "), + ("names", "CA", " CA "), + ("names", "CAT", " CAT"), + ("names", "N1", " N1 "), + ("names", "CE2", " CE2"), + ("names", "C12", " C12"), + ("names", "HD12", "HD12"), + ("names", "C123", "C123"), + ("altLocs", "A", "A"), + ("chainIDs", "B", "B"), + ("icodes", "C", "C"), + ("occupancies", 0.5, 0.5), + ("resnames", "LIG", "LIG"), + ("resids", 123, 123), + ("segindices", 1, 1), + ("tempfactors", 0.8, 0.8), + ], + ) def test_add_mda_attr_to_rdkit(self, attr, value, expected): mi = Chem.AtomPDBResidueInfo() _add_mda_attr_to_rdkit(attr, value, mi) @@ -297,10 +323,13 @@ def test_other_attributes(self, mol2, idx): assert mda_atom.segid == rdatom.GetProp("_MDAnalysis_segid") assert mda_atom.type == rdatom.GetProp("_MDAnalysis_type") - @pytest.mark.parametrize("sel_str", [ - "resname ALA", - "resname PRO and segid A", - ]) + @pytest.mark.parametrize( + "sel_str", + [ + "resname ALA", + "resname PRO and segid A", + ], + ) def test_index_property(self, pdb, sel_str): ag = pdb.select_atoms(sel_str) mol = ag.convert_to.rdkit(NoImplicit=False) @@ -320,7 +349,8 @@ def test_assign_stereochemistry(self, mol2): def test_trajectory_coords(self): u = mda.Universe.from_smiles( - "CCO", numConfs=3, rdkit_kwargs=dict(randomSeed=42)) + "CCO", numConfs=3, rdkit_kwargs=dict(randomSeed=42) + ) for ts in u.trajectory: mol = u.atoms.convert_to("RDKIT") positions = mol.GetConformer().GetPositions() @@ -441,26 +471,34 @@ def assert_isomorphic_resonance_structure(self, mol, ref): """ isomorphic = mol.HasSubstructMatch(ref) if not isomorphic: - isomorphic = bool(Chem.ResonanceMolSupplier(mol) - .GetSubstructMatch(ref)) - assert isomorphic, f"{Chem.MolToSmiles(ref)} != {Chem.MolToSmiles(mol)}" - - @pytest.mark.parametrize("smi, out", [ - ("C(-[H])(-[H])(-[H])-[H]", "C"), - ("[C](-[H])(-[H])-[C](-[H])-[H]", "C=C"), - ("[C]1(-[H])-[C](-[H])-[C](-[H])-[C](-[H])-[C](-[H])-[C]1(-[H])", - "c1ccccc1"), - ("C-[C](-[H])-[O]", "C(=O)C"), - ("[H]-[C](-[O])-[N](-[H])-[H]", "C(=O)N"), - ("[N]-[C]-[H]", "N#C"), - ("C-[C](-[O]-[H])-[O]", "CC(=O)O"), - ("[P](-[O]-[H])(-[O]-[H])(-[O]-[H])-[O]", "P(O)(O)(O)=O"), - ("[P](-[O]-[H])(-[O]-[H])(-[O])-[O]", "P([O-])(O)(O)=O"), - ("[P](-[O]-[H])(-[O])(-[O])-[O]", "P([O-])([O-])(O)=O"), - ("[P](-[O])(-[O])(-[O])-[O]", "P([O-])([O-])([O-])=O"), - ("[H]-[O]-[N]-[O]", "ON=O"), - ("[N]-[C]-[O]", "N#C[O-]"), - ]) + isomorphic = bool( + Chem.ResonanceMolSupplier(mol).GetSubstructMatch(ref) + ) + assert ( + isomorphic + ), f"{Chem.MolToSmiles(ref)} != {Chem.MolToSmiles(mol)}" + + @pytest.mark.parametrize( + "smi, out", + [ + ("C(-[H])(-[H])(-[H])-[H]", "C"), + ("[C](-[H])(-[H])-[C](-[H])-[H]", "C=C"), + ( + "[C]1(-[H])-[C](-[H])-[C](-[H])-[C](-[H])-[C](-[H])-[C]1(-[H])", + "c1ccccc1", + ), + ("C-[C](-[H])-[O]", "C(=O)C"), + ("[H]-[C](-[O])-[N](-[H])-[H]", "C(=O)N"), + ("[N]-[C]-[H]", "N#C"), + ("C-[C](-[O]-[H])-[O]", "CC(=O)O"), + ("[P](-[O]-[H])(-[O]-[H])(-[O]-[H])-[O]", "P(O)(O)(O)=O"), + ("[P](-[O]-[H])(-[O]-[H])(-[O])-[O]", "P([O-])(O)(O)=O"), + ("[P](-[O]-[H])(-[O])(-[O])-[O]", "P([O-])([O-])(O)=O"), + ("[P](-[O])(-[O])(-[O])-[O]", "P([O-])([O-])([O-])=O"), + ("[H]-[O]-[N]-[O]", "ON=O"), + ("[N]-[C]-[O]", "N#C[O-]"), + ], + ) def test_infer_bond_orders(self, smi, out): mol = Chem.MolFromSmiles(smi, sanitize=False) mol.UpdatePropertyCache(strict=False) @@ -468,14 +506,19 @@ def test_infer_bond_orders(self, smi, out): Chem.SanitizeMol(mol) mol = Chem.RemoveHs(mol) molref = Chem.MolFromSmiles(out) - assert is_isomorphic(mol, molref), "{} != {}".format(Chem.MolToSmiles(mol), out) - - @pytest.mark.parametrize("smi, atom_idx, charge", [ - ("[C](-[H])(-[H])(-[H])-[O]", 4, -1), - ("[N]-[C]-[O]", 2, -1), - ("[N](-[H])(-[H])(-[H])-[H]", 0, 1), - ("C-[C](-[O])-[O]", 3, -1), - ]) + assert is_isomorphic(mol, molref), "{} != {}".format( + Chem.MolToSmiles(mol), out + ) + + @pytest.mark.parametrize( + "smi, atom_idx, charge", + [ + ("[C](-[H])(-[H])(-[H])-[O]", 4, -1), + ("[N]-[C]-[O]", 2, -1), + ("[N](-[H])(-[H])(-[H])-[H]", 0, 1), + ("C-[C](-[O])-[O]", 3, -1), + ], + ) def test_infer_charges(self, smi, atom_idx, charge): mol = Chem.MolFromSmiles(smi, sanitize=False) mol.UpdatePropertyCache(strict=False) @@ -483,48 +526,62 @@ def test_infer_charges(self, smi, atom_idx, charge): Chem.SanitizeMol(mol) assert mol.GetAtomWithIdx(atom_idx).GetFormalCharge() == charge - @pytest.mark.parametrize("smi, out", [ - ("[S](-[O]-[H])(-[O]-[H])(-[O])-[O]", "S(=O)(=O)(O)O"), - ("[S](-[O]-[H])(-[O])(-[O])-[O]", "S(=O)(=O)([O-])O"), - ("[S](-[O])(-[O])(-[O])-[O]", "S(=O)(=O)([O-])[O-]"), - ("C-[N](-[H])-[C](-[N](-[H])-[H])-[N](-[H])-[H]", - "CNC(N)=[N+](-[H])-[H]"), - ("[O]-[C](-[H])-[C](-[H])-[H]", "C([O-])=C"), - ("C-[N](-[O])-[O]", "C[N+](=O)[O-]"), - ("C(-[N](-[O])-[O])-[N](-[O])-[O]", "C([N+](=O)[O-])[N+](=O)[O-]"), - ("C-[N](-[O])-[O].C-[N](-[O])-[O]", "C[N+](=O)[O-].C[N+](=O)[O-]"), - ("[C-](=O)-C", "[C](=O)-C"), - ("[H]-[N-]-C", "[H]-[N]-C"), - ("[O]-[C]1-[C](-[H])-[C](-[H])-[C](-[H])-[C](-[H])-[C](-[H])1", - "[O-]c1ccccc1"), - ("[O]-[C]1-[C](-[H])-[C](-[H])-[C](-[H])-[C]1-[O]", - "[O-]C1=CC=CC1=O"), - ("[H]-[C]-[C]-[C](-[H])-[C](-[H])-[H]", "C#CC=C"), - ("[H]-[C]-[C]-[C]-[C]-[H]", "C#CC#C"), - ]) + @pytest.mark.parametrize( + "smi, out", + [ + ("[S](-[O]-[H])(-[O]-[H])(-[O])-[O]", "S(=O)(=O)(O)O"), + ("[S](-[O]-[H])(-[O])(-[O])-[O]", "S(=O)(=O)([O-])O"), + ("[S](-[O])(-[O])(-[O])-[O]", "S(=O)(=O)([O-])[O-]"), + ( + "C-[N](-[H])-[C](-[N](-[H])-[H])-[N](-[H])-[H]", + "CNC(N)=[N+](-[H])-[H]", + ), + ("[O]-[C](-[H])-[C](-[H])-[H]", "C([O-])=C"), + ("C-[N](-[O])-[O]", "C[N+](=O)[O-]"), + ("C(-[N](-[O])-[O])-[N](-[O])-[O]", "C([N+](=O)[O-])[N+](=O)[O-]"), + ("C-[N](-[O])-[O].C-[N](-[O])-[O]", "C[N+](=O)[O-].C[N+](=O)[O-]"), + ("[C-](=O)-C", "[C](=O)-C"), + ("[H]-[N-]-C", "[H]-[N]-C"), + ( + "[O]-[C]1-[C](-[H])-[C](-[H])-[C](-[H])-[C](-[H])-[C](-[H])1", + "[O-]c1ccccc1", + ), + ( + "[O]-[C]1-[C](-[H])-[C](-[H])-[C](-[H])-[C]1-[O]", + "[O-]C1=CC=CC1=O", + ), + ("[H]-[C]-[C]-[C](-[H])-[C](-[H])-[H]", "C#CC=C"), + ("[H]-[C]-[C]-[C]-[C]-[H]", "C#CC#C"), + ], + ) def test_standardize_patterns(self, smi, out): mol = Chem.MolFromSmiles(smi, sanitize=False) mol.UpdatePropertyCache(strict=False) mol = self.assign_bond_orders_and_charges(mol) mol = Chem.RemoveHs(mol) molref = Chem.MolFromSmiles(out) - assert is_isomorphic(mol, molref), "{} != {}".format(Chem.MolToSmiles(mol), out) - - @pytest.mark.parametrize("attr, value, getter", [ - ("index", 42, "GetIntProp"), - ("index", np.int8(42), "GetIntProp"), - ("index", np.int16(42), "GetIntProp"), - ("index", np.int32(42), "GetIntProp"), - ("index", np.int64(42), "GetIntProp"), - ("index", np.uint8(42), "GetIntProp"), - ("index", np.uint16(42), "GetIntProp"), - ("index", np.uint32(42), "GetIntProp"), - ("index", np.uint64(42), "GetIntProp"), - ("charge", 4.2, "GetDoubleProp"), - ("charge", np.float32(4.2), "GetDoubleProp"), - ("charge", np.float64(4.2), "GetDoubleProp"), - ("type", "C.3", "GetProp"), - ]) + assert is_isomorphic(mol, molref), "{} != {}".format( + Chem.MolToSmiles(mol), out + ) + + @pytest.mark.parametrize( + "attr, value, getter", + [ + ("index", 42, "GetIntProp"), + ("index", np.int8(42), "GetIntProp"), + ("index", np.int16(42), "GetIntProp"), + ("index", np.int32(42), "GetIntProp"), + ("index", np.int64(42), "GetIntProp"), + ("index", np.uint8(42), "GetIntProp"), + ("index", np.uint16(42), "GetIntProp"), + ("index", np.uint32(42), "GetIntProp"), + ("index", np.uint64(42), "GetIntProp"), + ("charge", 4.2, "GetDoubleProp"), + ("charge", np.float32(4.2), "GetDoubleProp"), + ("charge", np.float64(4.2), "GetDoubleProp"), + ("type", "C.3", "GetProp"), + ], + ) def test_set_atom_property(self, attr, value, getter): atom = Chem.Atom(1) prop = "_MDAnalysis_%s" % attr @@ -536,11 +593,14 @@ def test_ignore_prop(self): _set_atom_property(atom, "foo", {"bar": "baz"}) assert "foo" not in atom.GetPropsAsDict().items() - @pytest.mark.parametrize("smi", [ - "c1ccc(cc1)-c1ccccc1-c1ccccc1", - "c1cc[nH]c1", - "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]", - ]) + @pytest.mark.parametrize( + "smi", + [ + "c1ccc(cc1)-c1ccccc1-c1ccccc1", + "c1cc[nH]c1", + "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]", + ], + ) def test_transfer_properties(self, smi): mol = Chem.MolFromSmiles(smi) mol = self.add_Hs_remove_bo_and_charges(mol) @@ -554,86 +614,88 @@ def test_transfer_properties(self, smi): new = {} for a in newmol.GetAtoms(): ix = a.GetIntProp("_MDAnalysis_index") - new[ix] = {"_MDAnalysis_index": ix, - "dummy": a.GetProp("dummy")} + new[ix] = {"_MDAnalysis_index": ix, "dummy": a.GetProp("dummy")} props = a.GetPropsAsDict().keys() assert "old_mapno" not in props assert "react_atom_idx" not in props assert new == old - @pytest.mark.parametrize("smi", [ - "c1ccc(cc1)-c1ccccc1-c1ccccc1", - "c1cc[nH]c1", - "c1ccc(cc1)-c1ccc(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1", - "c1ccc2c(c1)c1ccccc1c1ccccc1c1ccccc1c1ccccc21", - "c1csc(c1)-c1ccoc1-c1cc[nH]c1", - "C1=C2C(=NC=N1)N=CN2", - "CN1C=NC(=C1SC2=NC=NC3=C2NC=N3)[N+](=O)[O-]", - "c1c[nH]c(c1)-c1ccc(s1)-c1ccoc1-c1c[nH]cc1-c1ccccc1", - "C=CC=CC=CC=CC=CC=C", - "NCCCCC([NH3+])C(=O)[O-]", - "CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC=[NH+]C", - "C#CC=C", - # HID HIE HIP residues, see PR #2941 - "O=C([C@H](CC1=CNC=N1)N)O", - "O=C([C@H](CC1=CN=CN1)N)O", - "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]", - # fixes from PR #3044 - "CCOC(=O)c1cc2cc(C(=O)O)ccc2[nH]1", - "[O-][n+]1cccnc1", - "C[n+]1ccccc1", - "[PH4+]", - "c1nc[nH]n1", - "CC(=O)C=C(C)N", - "CC(C)=CC=C[O-]", - "O=S(C)(C)=NC", - "Cc1ccc2c3ncn(Cc4ccco4)c(O)c-3nc2c1", - "CCCC/C=C/C#CC#CCCCCCCCC(=O)O", - "c1c2c(=O)n3cccc(C)c3nc2n(C)c(=N)c1C(=O)NCc1cnccc1", - "N#Cc1c[nH]c(C(=O)NC(=O)c2cc[n+]([O-])cc2)n1", - "C[C@@H](Oc1cc(F)ccc1Nc1ncnc2cc(N=S3(=O)CCC3)cc(F)c12)C(=O)NCC#N", - "[O-][n+]1onc2ccccc21", - "Cc1cc[n+](CC[n+]2ccc(C)cc2)cc1", - "[O-]c1ccccc1", - "[O-]C=CC=CCC=CC=[N+](C)C", - "C=[N+](-[O-])-C", - "C-[N-]-C(=O)C", - # amino acids - "C[C@H](N)C(=O)O", # A - "NCC(=O)O", # G - "CC[C@H](C)[C@H](N)C(=O)O", # I - "CC(C)C[C@H](N)C(=O)O", # L - "O=C(O)[C@@H]1CCCN1", # P - "CC(C)[C@H](N)C(=O)O", # V - "N[C@@H](Cc1ccccc1)C(=O)O", # F - "N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O", # W - "N[C@@H](Cc1ccc(O)cc1)C(=O)O", # Y - "N[C@@H](CC(=O)O)C(=O)O", # D - "N[C@@H](CCC(=O)O)C(=O)O", # E - "N=C(N)NCCC[C@H](N)C(=O)O", # R - "N[C@@H](Cc1c[nH]cn1)C(=O)O", # H - "NCCCC[C@H](N)C(=O)O", # K - "N[C@@H](CO)C(=O)O", # S - "C[C@@H](O)[C@H](N)C(=O)O", # T - "N[C@@H](CS)C(=O)O", # C - "CSCC[C@H](N)C(=O)O", # M - "NC(=O)C[C@H](N)C(=O)O", # N - "NC(=O)CC[C@H](N)C(=O)O", # Q - # nucleic acids - "Nc1ncnc2c1ncn2[C@H]1C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O1", # A - "Cc1cn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]c1=O", # T - "O=c1ccn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1", # U - "Nc1ccn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)n1", # C - "Nc1nc2c(ncn2[C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1", # G - # RDKitConverter benchmark 2022-05-23, fixed by better sorting - "CC(C)NS(C)(=O)=Nc1ccc(-c2ccc(-c3nc4[nH]c(O[C@@H]5CO[C@@H]6[C@H](O)CO[C@H]56)nc4cc3Cl)cc2)cc1 CHEMBL4111464", - "C[n+]1c2ccccc2c(C(=O)O)c2[nH]c3ccc(Br)cc3c21 CHEMBL511591", - "C[n+]1ccccc1-c1ccc(NC(=O)c2ccc(C(=O)Nc3ccc(-c4cccc[n+]4C)cc3)cc2)cc1 CHEMBL3230729", - "Cc1ccc(/C(O)=C(/C([S-])=NCc2ccccc2)[n+]2cccc(CO)c2)cc1[N+](=O)[O-] CHEMBL1596426", - "CC(C)(O)c1cc2c(cc1C(C)(C)O)-c1ccccc1-2 CHEMBL1990966", - "[O-]/N=C1/c2ccccc2-c2nc3ccc(C(F)(F)F)cc3nc21 CHEMBL4557878", - "N#Cc1c[nH]c(C(=O)Nc2ccc(-c3cccc[n+]3[O-])cc2C2=CCCCC2)n1 CHEMBL1172116", - ]) + @pytest.mark.parametrize( + "smi", + [ + "c1ccc(cc1)-c1ccccc1-c1ccccc1", + "c1cc[nH]c1", + "c1ccc(cc1)-c1ccc(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1", + "c1ccc2c(c1)c1ccccc1c1ccccc1c1ccccc1c1ccccc21", + "c1csc(c1)-c1ccoc1-c1cc[nH]c1", + "C1=C2C(=NC=N1)N=CN2", + "CN1C=NC(=C1SC2=NC=NC3=C2NC=N3)[N+](=O)[O-]", + "c1c[nH]c(c1)-c1ccc(s1)-c1ccoc1-c1c[nH]cc1-c1ccccc1", + "C=CC=CC=CC=CC=CC=C", + "NCCCCC([NH3+])C(=O)[O-]", + "CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC=[NH+]C", + "C#CC=C", + # HID HIE HIP residues, see PR #2941 + "O=C([C@H](CC1=CNC=N1)N)O", + "O=C([C@H](CC1=CN=CN1)N)O", + "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]", + # fixes from PR #3044 + "CCOC(=O)c1cc2cc(C(=O)O)ccc2[nH]1", + "[O-][n+]1cccnc1", + "C[n+]1ccccc1", + "[PH4+]", + "c1nc[nH]n1", + "CC(=O)C=C(C)N", + "CC(C)=CC=C[O-]", + "O=S(C)(C)=NC", + "Cc1ccc2c3ncn(Cc4ccco4)c(O)c-3nc2c1", + "CCCC/C=C/C#CC#CCCCCCCCC(=O)O", + "c1c2c(=O)n3cccc(C)c3nc2n(C)c(=N)c1C(=O)NCc1cnccc1", + "N#Cc1c[nH]c(C(=O)NC(=O)c2cc[n+]([O-])cc2)n1", + "C[C@@H](Oc1cc(F)ccc1Nc1ncnc2cc(N=S3(=O)CCC3)cc(F)c12)C(=O)NCC#N", + "[O-][n+]1onc2ccccc21", + "Cc1cc[n+](CC[n+]2ccc(C)cc2)cc1", + "[O-]c1ccccc1", + "[O-]C=CC=CCC=CC=[N+](C)C", + "C=[N+](-[O-])-C", + "C-[N-]-C(=O)C", + # amino acids + "C[C@H](N)C(=O)O", # A + "NCC(=O)O", # G + "CC[C@H](C)[C@H](N)C(=O)O", # I + "CC(C)C[C@H](N)C(=O)O", # L + "O=C(O)[C@@H]1CCCN1", # P + "CC(C)[C@H](N)C(=O)O", # V + "N[C@@H](Cc1ccccc1)C(=O)O", # F + "N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O", # W + "N[C@@H](Cc1ccc(O)cc1)C(=O)O", # Y + "N[C@@H](CC(=O)O)C(=O)O", # D + "N[C@@H](CCC(=O)O)C(=O)O", # E + "N=C(N)NCCC[C@H](N)C(=O)O", # R + "N[C@@H](Cc1c[nH]cn1)C(=O)O", # H + "NCCCC[C@H](N)C(=O)O", # K + "N[C@@H](CO)C(=O)O", # S + "C[C@@H](O)[C@H](N)C(=O)O", # T + "N[C@@H](CS)C(=O)O", # C + "CSCC[C@H](N)C(=O)O", # M + "NC(=O)C[C@H](N)C(=O)O", # N + "NC(=O)CC[C@H](N)C(=O)O", # Q + # nucleic acids + "Nc1ncnc2c1ncn2[C@H]1C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O1", # A + "Cc1cn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]c1=O", # T + "O=c1ccn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1", # U + "Nc1ccn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)n1", # C + "Nc1nc2c(ncn2[C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1", # G + # RDKitConverter benchmark 2022-05-23, fixed by better sorting + "CC(C)NS(C)(=O)=Nc1ccc(-c2ccc(-c3nc4[nH]c(O[C@@H]5CO[C@@H]6[C@H](O)CO[C@H]56)nc4cc3Cl)cc2)cc1 CHEMBL4111464", + "C[n+]1c2ccccc2c(C(=O)O)c2[nH]c3ccc(Br)cc3c21 CHEMBL511591", + "C[n+]1ccccc1-c1ccc(NC(=O)c2ccc(C(=O)Nc3ccc(-c4cccc[n+]4C)cc3)cc2)cc1 CHEMBL3230729", + "Cc1ccc(/C(O)=C(/C([S-])=NCc2ccccc2)[n+]2cccc(CO)c2)cc1[N+](=O)[O-] CHEMBL1596426", + "CC(C)(O)c1cc2c(cc1C(C)(C)O)-c1ccccc1-2 CHEMBL1990966", + "[O-]/N=C1/c2ccccc2-c2nc3ccc(C(F)(F)F)cc3nc21 CHEMBL4557878", + "N#Cc1c[nH]c(C(=O)Nc2ccc(-c3cccc[n+]3[O-])cc2C2=CCCCC2)n1 CHEMBL1172116", + ], + ) def test_order_independant(self, smi): # generate mol with hydrogens but without bond orders ref = Chem.MolFromSmiles(smi) @@ -645,40 +707,61 @@ def test_order_independant(self, smi): self.assert_isomorphic_resonance_structure(m, ref) @pytest.mark.xfail(reason="Not currently tackled by the RDKitConverter") - @pytest.mark.parametrize("smi", [ - "C-[N+]#N", - "C-N=[N+]=[N-]", - "C-[O+]=C", - "C-[N+]#[C-]", - ]) + @pytest.mark.parametrize( + "smi", + [ + "C-[N+]#N", + "C-N=[N+]=[N-]", + "C-[O+]=C", + "C-[N+]#[C-]", + ], + ) def test_order_independant_issue_3339(self, smi): self.test_order_independant(smi) def test_warn_conjugated_max_iter(self): smi = "[C-]C=CC=CC=CC=CC=CC=C[C-]" mol = Chem.MolFromSmiles(smi) - with pytest.warns(UserWarning, - match="reasonable number of iterations"): + with pytest.warns( + UserWarning, match="reasonable number of iterations" + ): _rebuild_conjugated_bonds(mol, 2) - @pytest.mark.parametrize("smi", [ - "[Li+]", "[Na+]", "[K+]", "[Rb+]", "[Ag+]", "[Cs+]", - "[Mg+2]", "[Ca+2]", "[Cu+2]", "[Zn+2]", "[Sr+2]", "[Ba+2]", - "[Al+3]", "[Fe+2]", - "[Cl-]", - "[O-2]", - "[Na+].[Cl-]", - ]) + @pytest.mark.parametrize( + "smi", + [ + "[Li+]", + "[Na+]", + "[K+]", + "[Rb+]", + "[Ag+]", + "[Cs+]", + "[Mg+2]", + "[Ca+2]", + "[Cu+2]", + "[Zn+2]", + "[Sr+2]", + "[Ba+2]", + "[Al+3]", + "[Fe+2]", + "[Cl-]", + "[O-2]", + "[Na+].[Cl-]", + ], + ) def test_ions(self, smi): ref = Chem.MolFromSmiles(smi) stripped_mol = self.add_Hs_remove_bo_and_charges(ref) mol = self.assign_bond_orders_and_charges(stripped_mol) assert is_isomorphic(mol, ref) - @pytest.mark.parametrize("smi", [ - "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]", - "O=S(C)(C)=NC", - ]) + @pytest.mark.parametrize( + "smi", + [ + "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]", + "O=S(C)(C)=NC", + ], + ) def test_reorder_atoms(self, smi): mol = Chem.MolFromSmiles(smi) mol = Chem.AddHs(mol) @@ -692,9 +775,12 @@ def test_reorder_atoms(self, smi): expected = [a.GetSymbol() for a in mol.GetAtoms()] assert values == expected - @pytest.mark.parametrize("smi", [ - "O=S(C)(C)=NC", - ]) + @pytest.mark.parametrize( + "smi", + [ + "O=S(C)(C)=NC", + ], + ) def test_warn_empty_coords(self, smi): mol = Chem.MolFromSmiles(smi) mol = Chem.AddHs(mol) @@ -708,15 +794,19 @@ def test_pdb_names(self): u = mda.Universe(PDB_helix) mol = u.atoms.convert_to.rdkit() names = u.atoms.names - rd_names = np.array([a.GetProp("_MDAnalysis_name") - for a in mol.GetAtoms()]) + rd_names = np.array( + [a.GetProp("_MDAnalysis_name") for a in mol.GetAtoms()] + ) assert (names == rd_names).all() - @pytest.mark.parametrize("smi", [ - r"F/C(Br)=C(Cl)/I", - r"F\C(Br)=C(Cl)\I", - "F-C(Br)=C(Cl)-I", - ]) + @pytest.mark.parametrize( + "smi", + [ + r"F/C(Br)=C(Cl)/I", + r"F\C(Br)=C(Cl)\I", + "F-C(Br)=C(Cl)-I", + ], + ) def test_bond_stereo_not_stereoany(self, smi): u = mda.Universe.from_smiles(smi) mol = u.atoms.convert_to.rdkit(force=True) @@ -726,11 +816,14 @@ def test_bond_stereo_not_stereoany(self, smi): def test_atom_sorter(self): mol = Chem.MolFromSmiles( - "[H]-[C](-[H])-[C](-[H])-[C]-[C]-[H]", sanitize=False) + "[H]-[C](-[H])-[C](-[H])-[C]-[C]-[H]", sanitize=False + ) # corresponding mol: C=C-C#C # atom indices: 1 3 5 6 mol.UpdatePropertyCache() - sorted_atoms = sorted([atom for atom in mol.GetAtoms() - if atom.GetAtomicNum() > 1], key=_atom_sorter) + sorted_atoms = sorted( + [atom for atom in mol.GetAtoms() if atom.GetAtomicNum() > 1], + key=_atom_sorter, + ) sorted_indices = [atom.GetIdx() for atom in sorted_atoms] assert sorted_indices == [6, 5, 1, 3] diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit_parser.py b/testsuite/MDAnalysisTests/converters/test_rdkit_parser.py index bf432990fa..f82d74a561 100644 --- a/testsuite/MDAnalysisTests/converters/test_rdkit_parser.py +++ b/testsuite/MDAnalysisTests/converters/test_rdkit_parser.py @@ -32,15 +32,24 @@ # TODO: remove these shims when RDKit # has a release supporting NumPy 2 -Chem = pytest.importorskip('rdkit.Chem') -AllChem = pytest.importorskip('rdkit.Chem.AllChem') +Chem = pytest.importorskip("rdkit.Chem") +AllChem = pytest.importorskip("rdkit.Chem.AllChem") class RDKitParserBase(ParserBase): parser = mda.converters.RDKitParser.RDKitParser - expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', - 'resids', 'resnums', 'chiralities', 'segids', 'bonds', - ] + expected_attrs = [ + "ids", + "names", + "elements", + "masses", + "aromaticities", + "resids", + "resnums", + "chiralities", + "segids", + "bonds", + ] expected_n_atoms = 0 expected_n_residues = 1 @@ -53,14 +62,14 @@ def top(self, filename): yield p.parse() def test_creates_universe(self, filename): - u = mda.Universe(filename, format='RDKIT') + u = mda.Universe(filename, format="RDKIT") assert isinstance(u, mda.Universe) def test_bonds_total_counts(self, top): assert len(top.bonds.values) == self.expected_n_bonds def test_guessed_attributes(self, filename): - u = mda.Universe(filename, format='RDKIT') + u = mda.Universe(filename, format="RDKIT") u_guessed_attrs = [a.attrname for a in u._topology.guessed_attributes] for attr in self.guessed_attrs: assert hasattr(u.atoms, attr) @@ -70,7 +79,7 @@ def test_guessed_attributes(self, filename): class TestRDKitParserMOL2(RDKitParserBase): ref_filename = mol2_molecule - expected_attrs = RDKitParserBase.expected_attrs + ['charges', 'types'] + expected_attrs = RDKitParserBase.expected_attrs + ["charges", "types"] expected_n_atoms = 49 expected_n_residues = 1 @@ -89,7 +98,7 @@ def _create_mol_gasteiger_charges(self): def _remove_tripos_charges(self, mol): for atom in mol.GetAtoms(): atom.ClearProp("_TriposPartialCharge") - + @pytest.fixture def top_gas_tripos(self): mol = self._create_mol_gasteiger_charges() @@ -106,17 +115,21 @@ def top_gasteiger(self): mol = self._create_mol_gasteiger_charges() self._remove_tripos_charges(mol) return self.parser(mol).parse() - def test_bond_orders(self, top, filename): expected = [bond.GetBondTypeAsDouble() for bond in filename.GetBonds()] assert top.bonds.order == expected - - def test_multiple_charge_priority(self, - top_gas_tripos, filename_gasteiger): - expected = np.array([ - a.GetDoubleProp('_GasteigerCharge') for a in - filename_gasteiger.GetAtoms()], dtype=np.float32) + + def test_multiple_charge_priority( + self, top_gas_tripos, filename_gasteiger + ): + expected = np.array( + [ + a.GetDoubleProp("_GasteigerCharge") + for a in filename_gasteiger.GetAtoms() + ], + dtype=np.float32, + ) assert_equal(expected, top_gas_tripos.charges.values) def test_multiple_charge_props_warning(self): @@ -129,37 +142,55 @@ def test_multiple_charge_props_warning(self): # Verify the warning assert len(w) == 1 assert "_GasteigerCharge and _TriposPartialCharge" in str( - w[-1].message) + w[-1].message + ) def test_gasteiger_charges(self, top_gasteiger, filename_gasteiger): - expected = np.array([ - a.GetDoubleProp('_GasteigerCharge') for a in - filename_gasteiger.GetAtoms()], dtype=np.float32) + expected = np.array( + [ + a.GetDoubleProp("_GasteigerCharge") + for a in filename_gasteiger.GetAtoms() + ], + dtype=np.float32, + ) assert_equal(expected, top_gasteiger.charges.values) def test_tripos_charges(self, top, filename): - expected = np.array([ - a.GetDoubleProp('_TriposPartialCharge') for a in filename.GetAtoms() - ], dtype=np.float32) + expected = np.array( + [ + a.GetDoubleProp("_TriposPartialCharge") + for a in filename.GetAtoms() + ], + dtype=np.float32, + ) assert_equal(expected, top.charges.values) def test_aromaticity(self, top, filename): - expected = np.array([ - atom.GetIsAromatic() for atom in filename.GetAtoms()]) + expected = np.array( + [atom.GetIsAromatic() for atom in filename.GetAtoms()] + ) assert_equal(expected, top.aromaticities.values) def test_guessed_types(self, filename): - u = mda.Universe(filename, format='RDKIT') - assert_equal(u.atoms.types[:7], ['N.am', 'S.o2', - 'N.am', 'N.am', 'O.2', 'O.2', 'C.3']) + u = mda.Universe(filename, format="RDKIT") + assert_equal( + u.atoms.types[:7], + ["N.am", "S.o2", "N.am", "N.am", "O.2", "O.2", "C.3"], + ) + class TestRDKitParserPDB(RDKitParserBase): ref_filename = PDB_helix expected_attrs = RDKitParserBase.expected_attrs + [ - 'resnames', 'altLocs', 'chainIDs', 'occupancies', 'icodes', - 'tempfactors'] - + "resnames", + "altLocs", + "chainIDs", + "occupancies", + "icodes", + "tempfactors", + ] + expected_n_atoms = 137 expected_n_residues = 13 expected_n_segments = 1 @@ -172,15 +203,16 @@ def filename(self): def test_partial_residueinfo_raise_error(self, filename): mol = Chem.RemoveHs(filename) mh = Chem.AddHs(mol) - with pytest.raises(ValueError, - match="ResidueInfo is only partially available"): + with pytest.raises( + ValueError, match="ResidueInfo is only partially available" + ): mda.Universe(mh) mh = Chem.AddHs(mol, addResidueInfo=True) mda.Universe(mh) - + def test_guessed_types(self, filename): - u = mda.Universe(filename, format='RDKIT') - assert_equal(u.atoms.types[:7], ['N', 'H', 'C', 'H', 'C', 'H', 'H']) + u = mda.Universe(filename, format="RDKIT") + assert_equal(u.atoms.types[:7], ["N", "H", "C", "H", "C", "H", "H"]) class TestRDKitParserSMILES(RDKitParserBase): @@ -215,5 +247,5 @@ def test_bond_orders(self, top, filename): assert top.bonds.order == expected def test_guessed_types(self, filename): - u = mda.Universe(filename, format='RDKIT') - assert_equal(u.atoms.types[:7], ['CA', 'C', 'C', 'C', 'C', 'C', 'O']) + u = mda.Universe(filename, format="RDKIT") + assert_equal(u.atoms.types[:7], ["CA", "C", "C", "C", "C", "C", "O"]) diff --git a/testsuite/pyproject.toml b/testsuite/pyproject.toml index 9c660c9548..6be06957d7 100644 --- a/testsuite/pyproject.toml +++ b/testsuite/pyproject.toml @@ -162,6 +162,7 @@ setup\.py | MDAnalysisTests/auxiliary/.*\.py | MDAnalysisTests/lib/.*\.py | MDAnalysisTests/transformations/.*\.py +| MDAnalysisTests/converters/.*\.py ) ''' extend-exclude = '''