Skip to content

Commit

Permalink
fix cython compilation errors (0.23 -> 3.n), SEE EXTENDED!!
Browse files Browse the repository at this point in the history
here are all the solutions that were implemented:
 - in a couple places we were implicitly casting a double to a numpy float, now need to explicitly grad the _real_ part of the double
 - Cython does not allow nested function declaration inside cpdef functions, move all of these outside of their parent functions or redefine them to be purely functional where practical
 - similar to the above, lambda function are no longer allowed - get the same treatment. what's different here is that usually we are using lambda in sorting calls, so we can replace these with operator.itemgetter or attrgetter, where relevant. this also involves re-writing a couple reduce calls, which also used lambda
 - modern cython does not allow re-declaring existing Cython variables (in previous versions this was a warning I think), so I just remove these where needed. (btw Cython is super cool, actually points out both of the declaration so that you can delete one)

i made these fixes while listening to The Metal by Tenacious D, Talk Too Much by Renee Rapp, BEST INTEREST by Tyler, The Creator (love the bass line), mos thoser by food house, and Doritos & Fritos by 100 Gecs
  • Loading branch information
JacksonBurns committed Dec 3, 2024
1 parent 6cb89a8 commit cdfc487
Show file tree
Hide file tree
Showing 7 changed files with 168 additions and 163 deletions.
5 changes: 2 additions & 3 deletions rmgpy/kinetics/kineticsdata.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,8 @@ cdef class PDepKineticsData(PDepKineticsModel):
Plow = Pdata[j]
Phigh = Pdata[j + 1]
if Plow <= P and P <= Phigh:
klow = kdata[i, j] * (kdata[i + 1, j] / kdata[i, j]) ** ((T - Tlow) / (Thigh - Tlow))
khigh = kdata[i, j + 1] * (kdata[i + 1, j + 1] / kdata[i, j + 1]) ** (
(T - Tlow) / (Thigh - Tlow))
klow = kdata[i, j] * (kdata[i + 1, j] / kdata[i, j]) ** ((T - Tlow) / (Thigh - Tlow)).real
khigh = kdata[i, j + 1] * (kdata[i + 1, j + 1] / kdata[i, j + 1]) ** ((T - Tlow) / (Thigh - Tlow)).real
k = klow * (khigh / klow) ** (log(P / Plow) / log(Phigh / Plow))
break

Expand Down
168 changes: 84 additions & 84 deletions rmgpy/molecule/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,91 @@
from rmgpy.molecule.graph import Vertex, Edge, Graph
from rmgpy.molecule.fragment import CuttingLabel

# helper functions
# these were originall nested inside the indicated parent function, but when we upgraded to
# Cython 3 this was no longer allowed - thus, they now live here.

################################################################################
# add_implicit_benzene
def check_set(super_list, sub_list):
"""
Args:
super_list: list to check if superset of partList
sub_list: list to check if subset of superList
Returns: Boolean to see if super_list is a superset of sub_list
"""
super_set = set(super_list)
sub_set = set(sub_list)
return super_set.issuperset(sub_set)

def add_cb_atom_to_ring(ring, cb_atom):
"""
Every 'Cb' atom belongs in exactly one benzene ring. This function checks
adds the cb_atom to the ring (in connectivity order) if the cb_atom is connected
to any the last or first atom in the partial ring.
Args:
ring: list of :class:GroupAtoms representing a partial ring to merge
cb_atom: :class:GroupAtom with atomtype 'Cb'
Returns: If cb_atom connects to the beginning or end of ring, returns a
new list of the merged ring, otherwise an empty list
"""

merged_ring = []
# ring already complete
if len(ring) == 6: return merged_ring
for atom2, bond12 in cb_atom.bonds.items():
if bond12.is_benzene():
if atom2 is ring[-1]:
merged_ring = ring + [cb_atom]
elif atom2 is ring[0]:
merged_ring = [cb_atom] + ring

return merged_ring

def merge_overlapping_benzene_rings(ring1, ring2, od):
"""
The input arguments of rings are always in the order that the atoms appear
inside the ring. That is, each atom is connected to the ones adjacent on the
list.
Args:
ring1: list of :class:GroupAtoms representing first partial ring to merge
ring2: list :class:GroupAtoms representing second partial ring to merge
od: in for overlap distance
This function tries to see if the beginning or ends of each list have the
same atom objects, i.e the two part rings should be merged together.
Returns: If rings are mergable, returns a new list of the merged ring, otherwise
an empty list
"""
new_ring = []
# ring already complete
if len(ring1) == 6 or len(ring2) == 6: return new_ring

# start of ring1 matches end of ring2
match_list1 = [x1 is x2 for x1, x2 in zip(ring1[-od:], ring2[:od])]
# end of ring1 matches end of ring2
match_list2 = [x1 is x2 for x1, x2 in zip(ring1[-od:], ring2[:od - 1:-1])]
# start of ring1 matches end of ring2
match_list3 = [x1 is x2 for x1, x2 in zip(ring1[:od], ring2[-od:])]
# start of ring1 matches start of ring2
match_list4 = [x1 is x2 for x1, x2 in zip(ring1[:od], ring2[od::-1])]
if False not in match_list1:
new_ring = ring1 + ring2[od:]
elif False not in match_list2:
new_ring = ring1 + ring2[-od - 1::-1]
elif False not in match_list3:
new_ring = ring2[:-od] + ring1
elif False not in match_list4:
new_ring = ring2[:od - 1:-1] + ring1

return new_ring

class GroupAtom(Vertex):
"""
Expand Down Expand Up @@ -2531,89 +2614,6 @@ def add_implicit_benzene(self):
# Note that atomtypes like N5bd are mostly referred to as Cb in this code,
# which was first written for just carbon.

# First define some helper functions
def check_set(super_list, sub_list):
"""
Args:
super_list: list to check if superset of partList
sub_list: list to check if subset of superList
Returns: Boolean to see if super_list is a superset of sub_list
"""
super_set = set(super_list)
sub_set = set(sub_list)
return super_set.issuperset(sub_set)

def add_cb_atom_to_ring(ring, cb_atom):
"""
Every 'Cb' atom belongs in exactly one benzene ring. This function checks
adds the cb_atom to the ring (in connectivity order) if the cb_atom is connected
to any the last or first atom in the partial ring.
Args:
ring: list of :class:GroupAtoms representing a partial ring to merge
cb_atom: :class:GroupAtom with atomtype 'Cb'
Returns: If cb_atom connects to the beginning or end of ring, returns a
new list of the merged ring, otherwise an empty list
"""

merged_ring = []
# ring already complete
if len(ring) == 6: return merged_ring
for atom2, bond12 in cb_atom.bonds.items():
if bond12.is_benzene():
if atom2 is ring[-1]:
merged_ring = ring + [cb_atom]
elif atom2 is ring[0]:
merged_ring = [cb_atom] + ring

return merged_ring

def merge_overlapping_benzene_rings(ring1, ring2, od):
"""
The input arguments of rings are always in the order that the atoms appear
inside the ring. That is, each atom is connected to the ones adjacent on the
list.
Args:
ring1: list of :class:GroupAtoms representing first partial ring to merge
ring2: list :class:GroupAtoms representing second partial ring to merge
od: in for overlap distance
This function tries to see if the beginning or ends of each list have the
same atom objects, i.e the two part rings should be merged together.
Returns: If rings are mergable, returns a new list of the merged ring, otherwise
an empty list
"""
new_ring = []
# ring already complete
if len(ring1) == 6 or len(ring2) == 6: return new_ring

# start of ring1 matches end of ring2
match_list1 = [x1 is x2 for x1, x2 in zip(ring1[-od:], ring2[:od])]
# end of ring1 matches end of ring2
match_list2 = [x1 is x2 for x1, x2 in zip(ring1[-od:], ring2[:od - 1:-1])]
# start of ring1 matches end of ring2
match_list3 = [x1 is x2 for x1, x2 in zip(ring1[:od], ring2[-od:])]
# start of ring1 matches start of ring2
match_list4 = [x1 is x2 for x1, x2 in zip(ring1[:od], ring2[od::-1])]
if False not in match_list1:
new_ring = ring1 + ring2[od:]
elif False not in match_list2:
new_ring = ring1 + ring2[-od - 1::-1]
elif False not in match_list3:
new_ring = ring2[:-od] + ring1
elif False not in match_list4:
new_ring = ring2[:od - 1:-1] + ring1

return new_ring

#######################################################################################
# start of main algorithm
copy_group = deepcopy(self)
"""
Expand Down
25 changes: 12 additions & 13 deletions rmgpy/molecule/inchi.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import itertools
import re
import warnings
from operator import itemgetter

import cython
from rdkit import Chem
Expand Down Expand Up @@ -614,7 +615,7 @@ def create_augmented_layers(mol):
atom_indices = [atom_indices.index(i + 1) for i, atom in enumerate(molcopy.atoms)]

# sort the atoms based on the order of the atom indices
molcopy.atoms = [x for (y, x) in sorted(zip(atom_indices, molcopy.atoms), key=lambda pair: pair[0])]
molcopy.atoms = [x for (y, x) in sorted(zip(atom_indices, molcopy.atoms), key=itemgetter(0))]

ulayer = _create_u_layer(molcopy, auxinfo)

Expand Down Expand Up @@ -704,17 +705,15 @@ def _convert_3_atom_2_bond_path(start, mol):
"""
from rmgpy.data.kinetics.family import ReactionRecipe

def is_valid(mol):
"""Check if total bond order of oxygen atoms is smaller than 4."""

for at in mol.atoms:
if at.number == 8:
order = at.get_total_bond_order()
not_correct = order >= 4
if not_correct:
return False

return True
# Check if total bond order of oxygen atoms is smaller than 4
is_mol_valid = True
for at in mol.atoms:
if at.number == 8:
order = at.get_total_bond_order()
not_correct = order >= 4
if not_correct:
is_mol_valid = False
break

paths = pathfinder.find_allyl_end_with_charge(start)

Expand All @@ -739,7 +738,7 @@ def is_valid(mol):
end.charge += 1 if end.charge < 0 else -1
recipe.apply_forward(mol)

if is_valid(mol):
if is_mol_valid:
# unlabel atoms so that they never cause trouble downstream
for i, at in enumerate(path[::2]):
at.label = ''
Expand Down
38 changes: 17 additions & 21 deletions rmgpy/molecule/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from collections import OrderedDict, defaultdict
from copy import deepcopy
from urllib.parse import quote
from operator import attrgetter

import cython
import numpy as np
Expand All @@ -62,6 +63,10 @@

################################################################################

# helper function for sorting
def _skip_first(in_tuple):
return in_tuple[1:]

bond_orders = {'S': 1, 'D': 2, 'T': 3, 'B': 1.5}

globals().update({
Expand Down Expand Up @@ -2543,30 +2548,21 @@ def get_aromatic_rings(self, rings=None, save_order=False):
if rings is None:
rings = self.get_relevant_cycles()

def filter_fused_rings(_rings):
"""
Given a list of rings, remove ones which share more than 2 atoms.
"""
cython.declare(toRemove=set, i=cython.int, j=cython.int, toRemoveSorted=list)

if len(_rings) < 2:
return _rings

# Remove rings that share more than 3 atoms, since they cannot be planar
cython.declare(toRemove=set, j=cython.int, toRemoveSorted=list)
if len(rings) < 2:
pass
else:
to_remove = set()
for i, j in itertools.combinations(range(len(_rings)), 2):
if len(set(_rings[i]) & set(_rings[j])) > 2:
for i, j in itertools.combinations(range(len(rings)), 2):
if len(set(rings[i]) & set(rings[j])) > 2:
to_remove.add(i)
to_remove.add(j)

to_remove_sorted = sorted(to_remove, reverse=True)

for i in to_remove_sorted:
del _rings[i]

return _rings

# Remove rings that share more than 3 atoms, since they cannot be planar
rings = filter_fused_rings(rings)
del rings[i]

# Only keep rings with exactly 6 atoms, since RMG can only handle aromatic benzene
rings = [ring for ring in rings if len(ring) == 6]
Expand Down Expand Up @@ -2702,7 +2698,7 @@ def get_deterministic_sssr(self):
tup = (vertex, get_vertex_connectivity_value(vertex), -origin_conn_dict[vertex])
root_candidates_tups.append(tup)

root_vertex = sorted(root_candidates_tups, key=lambda tup0: tup0[1:], reverse=True)[0][0]
root_vertex = sorted(root_candidates_tups, key=_skip_first, reverse=True)[0][0]

# Get all cycles involving the root vertex
cycles = graph0.get_all_cycles(root_vertex)
Expand All @@ -2719,7 +2715,7 @@ def get_deterministic_sssr(self):
-sum([v.get_total_bond_order() for v in cycle0]))
cycle_candidate_tups.append(tup)

cycle = sorted(cycle_candidate_tups, key=lambda tup0: tup0[1:])[0][0]
cycle = sorted(cycle_candidate_tups, key=_skip_first)[0][0]

cycle_list.append(cycle)

Expand Down Expand Up @@ -2803,8 +2799,8 @@ def is_identical(self, other, strict=True):
if atom_ids == other_ids:
# If the two molecules have the same indices, then they might be identical
# Sort the atoms by ID
atom_list = sorted(self.atoms, key=lambda x: x.id)
other_list = sorted(other.atoms, key=lambda x: x.id)
atom_list = sorted(self.atoms, key=attrgetter('id'))
other_list = sorted(other.atoms, key=attrgetter('id'))

# If matching atom indices gives a valid mapping, then the molecules are fully identical
mapping = {}
Expand Down
14 changes: 11 additions & 3 deletions rmgpy/molecule/resonance.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
"""

import logging
from operator import attrgetter

import cython

Expand Down Expand Up @@ -954,6 +955,13 @@ def generate_clar_structures(mol, save_order=False):

return mol_list

# helper functions for sorting
def _sum_atom_ids(atom_list):
return sum(atom.id for atom in atom_list)

def _tuplize_bond(bond):
return (bond.atom1.id, bond.atom2.id)


def _clar_optimization(mol, constraints=None, max_num=None, save_order=False):
"""
Expand Down Expand Up @@ -982,7 +990,7 @@ def _clar_optimization(mol, constraints=None, max_num=None, save_order=False):
molecule = mol.copy(deep=True)

aromatic_rings = molecule.get_aromatic_rings(save_order=save_order)[0]
aromatic_rings.sort(key=lambda x: sum([atom.id for atom in x]))
aromatic_rings.sort(key=_sum_atom_ids)

if not aromatic_rings:
return []
Expand All @@ -991,13 +999,13 @@ def _clar_optimization(mol, constraints=None, max_num=None, save_order=False):
atoms = set()
for ring in aromatic_rings:
atoms.update(ring)
atoms = sorted(atoms, key=lambda x: x.id)
atoms = sorted(atoms, key=attrgetter('id'))

# Get list of bonds involving the ring atoms, ignoring bonds to hydrogen
bonds = set()
for atom in atoms:
bonds.update([atom.bonds[key] for key in atom.bonds.keys() if key.is_non_hydrogen()])
bonds = sorted(bonds, key=lambda x: (x.atom1.id, x.atom2.id))
bonds = sorted(bonds, key=_tuplize_bond)

# Identify exocyclic bonds, and save their bond orders
exo = []
Expand Down
2 changes: 1 addition & 1 deletion rmgpy/pdep/reaction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def apply_inverse_laplace_transform_method(transition_state,
and abs(dens_states[r - m, s]) > 1e-12 \
and abs(dens_states[r - m - 1, s]) > 1e-12:
num = dens_states[r - m, s] * (dens_states[r - m - 1, s] / dens_states[r - m, s]) \
** (-rem / (e_list[r - m - 1] - e_list[r - m]))
** (-rem / (e_list[r - m - 1] - e_list[r - m])).real
k[r, s] = freq_factor * num / dens_states[r, s]

elif n >= n_crit:
Expand Down
Loading

0 comments on commit cdfc487

Please sign in to comment.