From cdfc4875fc723c16e5bf1848ee0f85b5c4de8079 Mon Sep 17 00:00:00 2001 From: Jackson Burns Date: Mon, 1 Jul 2024 10:39:41 -0400 Subject: [PATCH] fix cython compilation errors (0.23 -> 3.n), SEE EXTENDED!! here are all the solutions that were implemented: - in a couple places we were implicitly casting a double to a numpy float, now need to explicitly grad the _real_ part of the double - Cython does not allow nested function declaration inside cpdef functions, move all of these outside of their parent functions or redefine them to be purely functional where practical - similar to the above, lambda function are no longer allowed - get the same treatment. what's different here is that usually we are using lambda in sorting calls, so we can replace these with operator.itemgetter or attrgetter, where relevant. this also involves re-writing a couple reduce calls, which also used lambda - modern cython does not allow re-declaring existing Cython variables (in previous versions this was a warning I think), so I just remove these where needed. (btw Cython is super cool, actually points out both of the declaration so that you can delete one) i made these fixes while listening to The Metal by Tenacious D, Talk Too Much by Renee Rapp, BEST INTEREST by Tyler, The Creator (love the bass line), mos thoser by food house, and Doritos & Fritos by 100 Gecs --- rmgpy/kinetics/kineticsdata.pyx | 5 +- rmgpy/molecule/group.py | 168 ++++++++++++++++---------------- rmgpy/molecule/inchi.py | 25 +++-- rmgpy/molecule/molecule.py | 38 ++++---- rmgpy/molecule/resonance.py | 14 ++- rmgpy/pdep/reaction.pyx | 2 +- rmgpy/reaction.py | 79 +++++++-------- 7 files changed, 168 insertions(+), 163 deletions(-) diff --git a/rmgpy/kinetics/kineticsdata.pyx b/rmgpy/kinetics/kineticsdata.pyx index 7b4617cdd0..292a56a7ab 100644 --- a/rmgpy/kinetics/kineticsdata.pyx +++ b/rmgpy/kinetics/kineticsdata.pyx @@ -249,9 +249,8 @@ cdef class PDepKineticsData(PDepKineticsModel): Plow = Pdata[j] Phigh = Pdata[j + 1] if Plow <= P and P <= Phigh: - klow = kdata[i, j] * (kdata[i + 1, j] / kdata[i, j]) ** ((T - Tlow) / (Thigh - Tlow)) - khigh = kdata[i, j + 1] * (kdata[i + 1, j + 1] / kdata[i, j + 1]) ** ( - (T - Tlow) / (Thigh - Tlow)) + klow = kdata[i, j] * (kdata[i + 1, j] / kdata[i, j]) ** ((T - Tlow) / (Thigh - Tlow)).real + khigh = kdata[i, j + 1] * (kdata[i + 1, j + 1] / kdata[i, j + 1]) ** ((T - Tlow) / (Thigh - Tlow)).real k = klow * (khigh / klow) ** (log(P / Plow) / log(Phigh / Plow)) break diff --git a/rmgpy/molecule/group.py b/rmgpy/molecule/group.py index 1dc56cc7f7..e9a6e6433f 100644 --- a/rmgpy/molecule/group.py +++ b/rmgpy/molecule/group.py @@ -46,8 +46,91 @@ from rmgpy.molecule.graph import Vertex, Edge, Graph from rmgpy.molecule.fragment import CuttingLabel +# helper functions +# these were originall nested inside the indicated parent function, but when we upgraded to +# Cython 3 this was no longer allowed - thus, they now live here. -################################################################################ +# add_implicit_benzene +def check_set(super_list, sub_list): + """ + Args: + super_list: list to check if superset of partList + sub_list: list to check if subset of superList + + Returns: Boolean to see if super_list is a superset of sub_list + + """ + super_set = set(super_list) + sub_set = set(sub_list) + return super_set.issuperset(sub_set) + +def add_cb_atom_to_ring(ring, cb_atom): + """ + Every 'Cb' atom belongs in exactly one benzene ring. This function checks + adds the cb_atom to the ring (in connectivity order) if the cb_atom is connected + to any the last or first atom in the partial ring. + + Args: + ring: list of :class:GroupAtoms representing a partial ring to merge + cb_atom: :class:GroupAtom with atomtype 'Cb' + + Returns: If cb_atom connects to the beginning or end of ring, returns a + new list of the merged ring, otherwise an empty list + + """ + + merged_ring = [] + # ring already complete + if len(ring) == 6: return merged_ring + for atom2, bond12 in cb_atom.bonds.items(): + if bond12.is_benzene(): + if atom2 is ring[-1]: + merged_ring = ring + [cb_atom] + elif atom2 is ring[0]: + merged_ring = [cb_atom] + ring + + return merged_ring + +def merge_overlapping_benzene_rings(ring1, ring2, od): + """ + The input arguments of rings are always in the order that the atoms appear + inside the ring. That is, each atom is connected to the ones adjacent on the + list. + + Args: + ring1: list of :class:GroupAtoms representing first partial ring to merge + ring2: list :class:GroupAtoms representing second partial ring to merge + od: in for overlap distance + + This function tries to see if the beginning or ends of each list have the + same atom objects, i.e the two part rings should be merged together. + + Returns: If rings are mergable, returns a new list of the merged ring, otherwise + an empty list + + """ + new_ring = [] + # ring already complete + if len(ring1) == 6 or len(ring2) == 6: return new_ring + + # start of ring1 matches end of ring2 + match_list1 = [x1 is x2 for x1, x2 in zip(ring1[-od:], ring2[:od])] + # end of ring1 matches end of ring2 + match_list2 = [x1 is x2 for x1, x2 in zip(ring1[-od:], ring2[:od - 1:-1])] + # start of ring1 matches end of ring2 + match_list3 = [x1 is x2 for x1, x2 in zip(ring1[:od], ring2[-od:])] + # start of ring1 matches start of ring2 + match_list4 = [x1 is x2 for x1, x2 in zip(ring1[:od], ring2[od::-1])] + if False not in match_list1: + new_ring = ring1 + ring2[od:] + elif False not in match_list2: + new_ring = ring1 + ring2[-od - 1::-1] + elif False not in match_list3: + new_ring = ring2[:-od] + ring1 + elif False not in match_list4: + new_ring = ring2[:od - 1:-1] + ring1 + + return new_ring class GroupAtom(Vertex): """ @@ -2531,89 +2614,6 @@ def add_implicit_benzene(self): # Note that atomtypes like N5bd are mostly referred to as Cb in this code, # which was first written for just carbon. - # First define some helper functions - def check_set(super_list, sub_list): - """ - Args: - super_list: list to check if superset of partList - sub_list: list to check if subset of superList - - Returns: Boolean to see if super_list is a superset of sub_list - - """ - super_set = set(super_list) - sub_set = set(sub_list) - return super_set.issuperset(sub_set) - - def add_cb_atom_to_ring(ring, cb_atom): - """ - Every 'Cb' atom belongs in exactly one benzene ring. This function checks - adds the cb_atom to the ring (in connectivity order) if the cb_atom is connected - to any the last or first atom in the partial ring. - - Args: - ring: list of :class:GroupAtoms representing a partial ring to merge - cb_atom: :class:GroupAtom with atomtype 'Cb' - - Returns: If cb_atom connects to the beginning or end of ring, returns a - new list of the merged ring, otherwise an empty list - - """ - - merged_ring = [] - # ring already complete - if len(ring) == 6: return merged_ring - for atom2, bond12 in cb_atom.bonds.items(): - if bond12.is_benzene(): - if atom2 is ring[-1]: - merged_ring = ring + [cb_atom] - elif atom2 is ring[0]: - merged_ring = [cb_atom] + ring - - return merged_ring - - def merge_overlapping_benzene_rings(ring1, ring2, od): - """ - The input arguments of rings are always in the order that the atoms appear - inside the ring. That is, each atom is connected to the ones adjacent on the - list. - - Args: - ring1: list of :class:GroupAtoms representing first partial ring to merge - ring2: list :class:GroupAtoms representing second partial ring to merge - od: in for overlap distance - - This function tries to see if the beginning or ends of each list have the - same atom objects, i.e the two part rings should be merged together. - - Returns: If rings are mergable, returns a new list of the merged ring, otherwise - an empty list - - """ - new_ring = [] - # ring already complete - if len(ring1) == 6 or len(ring2) == 6: return new_ring - - # start of ring1 matches end of ring2 - match_list1 = [x1 is x2 for x1, x2 in zip(ring1[-od:], ring2[:od])] - # end of ring1 matches end of ring2 - match_list2 = [x1 is x2 for x1, x2 in zip(ring1[-od:], ring2[:od - 1:-1])] - # start of ring1 matches end of ring2 - match_list3 = [x1 is x2 for x1, x2 in zip(ring1[:od], ring2[-od:])] - # start of ring1 matches start of ring2 - match_list4 = [x1 is x2 for x1, x2 in zip(ring1[:od], ring2[od::-1])] - if False not in match_list1: - new_ring = ring1 + ring2[od:] - elif False not in match_list2: - new_ring = ring1 + ring2[-od - 1::-1] - elif False not in match_list3: - new_ring = ring2[:-od] + ring1 - elif False not in match_list4: - new_ring = ring2[:od - 1:-1] + ring1 - - return new_ring - - ####################################################################################### # start of main algorithm copy_group = deepcopy(self) """ diff --git a/rmgpy/molecule/inchi.py b/rmgpy/molecule/inchi.py index 59e49d8522..438b2b1532 100644 --- a/rmgpy/molecule/inchi.py +++ b/rmgpy/molecule/inchi.py @@ -30,6 +30,7 @@ import itertools import re import warnings +from operator import itemgetter import cython from rdkit import Chem @@ -614,7 +615,7 @@ def create_augmented_layers(mol): atom_indices = [atom_indices.index(i + 1) for i, atom in enumerate(molcopy.atoms)] # sort the atoms based on the order of the atom indices - molcopy.atoms = [x for (y, x) in sorted(zip(atom_indices, molcopy.atoms), key=lambda pair: pair[0])] + molcopy.atoms = [x for (y, x) in sorted(zip(atom_indices, molcopy.atoms), key=itemgetter(0))] ulayer = _create_u_layer(molcopy, auxinfo) @@ -704,17 +705,15 @@ def _convert_3_atom_2_bond_path(start, mol): """ from rmgpy.data.kinetics.family import ReactionRecipe - def is_valid(mol): - """Check if total bond order of oxygen atoms is smaller than 4.""" - - for at in mol.atoms: - if at.number == 8: - order = at.get_total_bond_order() - not_correct = order >= 4 - if not_correct: - return False - - return True + # Check if total bond order of oxygen atoms is smaller than 4 + is_mol_valid = True + for at in mol.atoms: + if at.number == 8: + order = at.get_total_bond_order() + not_correct = order >= 4 + if not_correct: + is_mol_valid = False + break paths = pathfinder.find_allyl_end_with_charge(start) @@ -739,7 +738,7 @@ def is_valid(mol): end.charge += 1 if end.charge < 0 else -1 recipe.apply_forward(mol) - if is_valid(mol): + if is_mol_valid: # unlabel atoms so that they never cause trouble downstream for i, at in enumerate(path[::2]): at.label = '' diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 3f647c336a..5ac1a48702 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -41,6 +41,7 @@ from collections import OrderedDict, defaultdict from copy import deepcopy from urllib.parse import quote +from operator import attrgetter import cython import numpy as np @@ -62,6 +63,10 @@ ################################################################################ +# helper function for sorting +def _skip_first(in_tuple): + return in_tuple[1:] + bond_orders = {'S': 1, 'D': 2, 'T': 3, 'B': 1.5} globals().update({ @@ -2543,30 +2548,21 @@ def get_aromatic_rings(self, rings=None, save_order=False): if rings is None: rings = self.get_relevant_cycles() - def filter_fused_rings(_rings): - """ - Given a list of rings, remove ones which share more than 2 atoms. - """ - cython.declare(toRemove=set, i=cython.int, j=cython.int, toRemoveSorted=list) - - if len(_rings) < 2: - return _rings - + # Remove rings that share more than 3 atoms, since they cannot be planar + cython.declare(toRemove=set, j=cython.int, toRemoveSorted=list) + if len(rings) < 2: + pass + else: to_remove = set() - for i, j in itertools.combinations(range(len(_rings)), 2): - if len(set(_rings[i]) & set(_rings[j])) > 2: + for i, j in itertools.combinations(range(len(rings)), 2): + if len(set(rings[i]) & set(rings[j])) > 2: to_remove.add(i) to_remove.add(j) to_remove_sorted = sorted(to_remove, reverse=True) for i in to_remove_sorted: - del _rings[i] - - return _rings - - # Remove rings that share more than 3 atoms, since they cannot be planar - rings = filter_fused_rings(rings) + del rings[i] # Only keep rings with exactly 6 atoms, since RMG can only handle aromatic benzene rings = [ring for ring in rings if len(ring) == 6] @@ -2702,7 +2698,7 @@ def get_deterministic_sssr(self): tup = (vertex, get_vertex_connectivity_value(vertex), -origin_conn_dict[vertex]) root_candidates_tups.append(tup) - root_vertex = sorted(root_candidates_tups, key=lambda tup0: tup0[1:], reverse=True)[0][0] + root_vertex = sorted(root_candidates_tups, key=_skip_first, reverse=True)[0][0] # Get all cycles involving the root vertex cycles = graph0.get_all_cycles(root_vertex) @@ -2719,7 +2715,7 @@ def get_deterministic_sssr(self): -sum([v.get_total_bond_order() for v in cycle0])) cycle_candidate_tups.append(tup) - cycle = sorted(cycle_candidate_tups, key=lambda tup0: tup0[1:])[0][0] + cycle = sorted(cycle_candidate_tups, key=_skip_first)[0][0] cycle_list.append(cycle) @@ -2803,8 +2799,8 @@ def is_identical(self, other, strict=True): if atom_ids == other_ids: # If the two molecules have the same indices, then they might be identical # Sort the atoms by ID - atom_list = sorted(self.atoms, key=lambda x: x.id) - other_list = sorted(other.atoms, key=lambda x: x.id) + atom_list = sorted(self.atoms, key=attrgetter('id')) + other_list = sorted(other.atoms, key=attrgetter('id')) # If matching atom indices gives a valid mapping, then the molecules are fully identical mapping = {} diff --git a/rmgpy/molecule/resonance.py b/rmgpy/molecule/resonance.py index b207568e71..24eccaa636 100644 --- a/rmgpy/molecule/resonance.py +++ b/rmgpy/molecule/resonance.py @@ -52,6 +52,7 @@ """ import logging +from operator import attrgetter import cython @@ -954,6 +955,13 @@ def generate_clar_structures(mol, save_order=False): return mol_list +# helper functions for sorting +def _sum_atom_ids(atom_list): + return sum(atom.id for atom in atom_list) + +def _tuplize_bond(bond): + return (bond.atom1.id, bond.atom2.id) + def _clar_optimization(mol, constraints=None, max_num=None, save_order=False): """ @@ -982,7 +990,7 @@ def _clar_optimization(mol, constraints=None, max_num=None, save_order=False): molecule = mol.copy(deep=True) aromatic_rings = molecule.get_aromatic_rings(save_order=save_order)[0] - aromatic_rings.sort(key=lambda x: sum([atom.id for atom in x])) + aromatic_rings.sort(key=_sum_atom_ids) if not aromatic_rings: return [] @@ -991,13 +999,13 @@ def _clar_optimization(mol, constraints=None, max_num=None, save_order=False): atoms = set() for ring in aromatic_rings: atoms.update(ring) - atoms = sorted(atoms, key=lambda x: x.id) + atoms = sorted(atoms, key=attrgetter('id')) # Get list of bonds involving the ring atoms, ignoring bonds to hydrogen bonds = set() for atom in atoms: bonds.update([atom.bonds[key] for key in atom.bonds.keys() if key.is_non_hydrogen()]) - bonds = sorted(bonds, key=lambda x: (x.atom1.id, x.atom2.id)) + bonds = sorted(bonds, key=_tuplize_bond) # Identify exocyclic bonds, and save their bond orders exo = [] diff --git a/rmgpy/pdep/reaction.pyx b/rmgpy/pdep/reaction.pyx index 6a6327c5d7..d8e6942de4 100644 --- a/rmgpy/pdep/reaction.pyx +++ b/rmgpy/pdep/reaction.pyx @@ -308,7 +308,7 @@ def apply_inverse_laplace_transform_method(transition_state, and abs(dens_states[r - m, s]) > 1e-12 \ and abs(dens_states[r - m - 1, s]) > 1e-12: num = dens_states[r - m, s] * (dens_states[r - m - 1, s] / dens_states[r - m, s]) \ - ** (-rem / (e_list[r - m - 1] - e_list[r - m])) + ** (-rem / (e_list[r - m - 1] - e_list[r - m])).real k[r, s] = freq_factor * num / dens_states[r, s] elif n >= n_crit: diff --git a/rmgpy/reaction.py b/rmgpy/reaction.py index b8ab3951c8..66119c3805 100644 --- a/rmgpy/reaction.py +++ b/rmgpy/reaction.py @@ -65,6 +65,16 @@ ################################################################################ +# helper function for sorting +def get_sorting_key(spc): + # List of elements to sort by, order is intentional + numbers = [6, 8, 7, 14, 16, 15, 17, 53, 9, 35] # C, O, N, Si, S, P, Cl, I, F, Br + ele_count = dict([(n,0) for n in numbers]) + for atom in spc.molecule[0].atoms: + if isinstance(atom, Atom) and atom.element.number in numbers: + ele_count[atom.element.number] += 1 + return tuple(ele_count[n] for n in numbers) + class Reaction: """ @@ -1516,14 +1526,7 @@ def generate_pairs(self): reactants = self.reactants[:] products = self.products[:] - def get_sorting_key(spc): - # List of elements to sort by, order is intentional - numbers = [6, 8, 7, 14, 16, 15, 17, 53, 9, 35] # C, O, N, Si, S, P, Cl, I, F, Br - ele_count = dict([(n,0) for n in numbers]) - for atom in spc.molecule[0].atoms: - if isinstance(atom, Atom) and atom.element.number in numbers: - ele_count[atom.element.number] += 1 - return tuple(ele_count[n] for n in numbers) + # Sort the reactants and products by element counts reactants.sort(key=get_sorting_key) products.sort(key=get_sorting_key) @@ -1769,7 +1772,7 @@ def get_reduced_mass(self, reverse=False): mass_list = [spc.molecule[0].get_molecular_weight() for spc in self.products] else: mass_list = [spc.molecule[0].get_molecular_weight() for spc in self.reactants] - reduced_mass = reduce((lambda x, y: x * y), mass_list) / sum(mass_list) + reduced_mass = np.prod(mass_list) / sum(mass_list) return reduced_mass def get_mean_sigma_and_epsilon(self, reverse=False): @@ -1795,7 +1798,8 @@ def get_mean_sigma_and_epsilon(self, reverse=False): if any([x == 0 for x in sigmas + epsilons]): raise ValueError mean_sigmas = sum(sigmas) / num_of_spcs - mean_epsilons = reduce((lambda x, y: x * y), epsilons) ** (1 / len(epsilons)) + mean_epsilons = np.prod(epsilons) ** (1 / len(epsilons)) + return mean_sigmas, mean_epsilons def generate_high_p_limit_kinetics(self): @@ -1805,6 +1809,15 @@ def generate_high_p_limit_kinetics(self): """ raise NotImplementedError("generate_high_p_limit_kinetics is not implemented for all Reaction subclasses.") +def same_object(object1, object2, _check_identical, _only_check_label, + _generate_initial_map, _strict, _save_order): + if _only_check_label: + return str(object1) == str(object2) + elif _check_identical: + return object1.is_identical(object2, strict=_strict) + else: + return object1.is_isomorphic(object2, generate_initial_map=_generate_initial_map, + strict=_strict, save_order=_save_order) def same_species_lists(list1, list2, check_identical=False, only_check_label=False, generate_initial_map=False, strict=True, save_order=False): @@ -1826,45 +1839,35 @@ def same_species_lists(list1, list2, check_identical=False, only_check_label=Fal ``True`` if the lists are the same and ``False`` otherwise """ - def same(object1, object2, _check_identical=check_identical, _only_check_label=only_check_label, - _generate_initial_map=generate_initial_map, _strict=strict, _save_order=save_order): - if _only_check_label: - return str(object1) == str(object2) - elif _check_identical: - return object1.is_identical(object2, strict=_strict) - else: - return object1.is_isomorphic(object2, generate_initial_map=_generate_initial_map, - strict=_strict, save_order=_save_order) - if len(list1) == len(list2) == 1: - if same(list1[0], list2[0]): + if same_object(list1[0], list2[0]): return True elif len(list1) == len(list2) == 2: - if same(list1[0], list2[0]) and same(list1[1], list2[1]): + if same_object(list1[0], list2[0]) and same_object(list1[1], list2[1]): return True - elif same(list1[0], list2[1]) and same(list1[1], list2[0]): + elif same_object(list1[0], list2[1]) and same_object(list1[1], list2[0]): return True elif len(list1) == len(list2) == 3: - if same(list1[0], list2[0]): - if same(list1[1], list2[1]): - if same(list1[2], list2[2]): + if same_object(list1[0], list2[0]): + if same_object(list1[1], list2[1]): + if same_object(list1[2], list2[2]): return True - elif same(list1[1], list2[2]): - if same(list1[2], list2[1]): + elif same_object(list1[1], list2[2]): + if same_object(list1[2], list2[1]): return True - elif same(list1[0], list2[1]): - if same(list1[1], list2[0]): - if same(list1[2], list2[2]): + elif same_object(list1[0], list2[1]): + if same_object(list1[1], list2[0]): + if same_object(list1[2], list2[2]): return True - elif same(list1[1], list2[2]): - if same(list1[2], list2[0]): + elif same_object(list1[1], list2[2]): + if same_object(list1[2], list2[0]): return True - elif same(list1[0], list2[2]): - if same(list1[1], list2[0]): - if same(list1[2], list2[1]): + elif same_object(list1[0], list2[2]): + if same_object(list1[1], list2[0]): + if same_object(list1[2], list2[1]): return True - elif same(list1[1], list2[1]): - if same(list1[2], list2[0]): + elif same_object(list1[1], list2[1]): + if same_object(list1[2], list2[0]): return True elif len(list1) == len(list2): raise NotImplementedError("Can't check isomorphism of lists with {0} species/molecules".format(len(list1)))