Skip to content

Commit 801aba2

Browse files
Merge pull request #26 from doyle-lab-ucla/dev2
2 parents 96ee06a + ea84090 commit 801aba2

File tree

3 files changed

+46
-38
lines changed

3 files changed

+46
-38
lines changed

autoqchem/helper_classes.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class slurm_status(enum.IntEnum):
3636
failed = 4 #: job failed
3737
incomplete = 5 #: job is incomplete, it should be resubmitted
3838
uploaded = 6 #: job has been uploaded to the DB succesfully
39+
inspect = 7 #: job needs to be inspected due to problematic labeling
3940

4041

4142
@enum.unique

autoqchem/rdkit_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ def rdmol_from_slurm_jobs(jobs, postDFT=True) -> Chem.Mol:
185185
elements, connectivity_matrix, charges = jobs[0].elements, jobs[0].connectivity_matrix, jobs[0].charges
186186
conformer_coordinates = []
187187
energies = []
188+
labels_ok = True
188189
for j in jobs:
189190
if postDFT:
190191

@@ -193,7 +194,8 @@ def rdmol_from_slurm_jobs(jobs, postDFT=True) -> Chem.Mol:
193194
le.get_atom_labels()
194195

195196
# verify that the labels are in the same order in gaussian after running it
196-
assert tuple(le.labels) == tuple(elements)
197+
if tuple(le.labels) != tuple(elements):
198+
labels_ok = False
197199

198200
le.get_geometry()
199201
conformer_coordinates.append(le.geom[list('XYZ')].values)
@@ -216,7 +218,7 @@ def rdmol_from_slurm_jobs(jobs, postDFT=True) -> Chem.Mol:
216218
energies = [AllChem.MMFFGetMoleculeForceField(rdmol, props, confId=i).CalcEnergy()
217219
for i in range(rdmol.GetNumConformers())]
218220

219-
return rdmol, energies
221+
return rdmol, energies, labels_ok
220222

221223

222224
def rdmol_from_sge_jobs(jobs, postDFT=True) -> Chem.Mol:

autoqchem/slurm_manager.py

Lines changed: 41 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -234,33 +234,33 @@ def _retrieve_single_job(self, job) -> slurm_status:
234234

235235
# initialize the log extractor, it will try to read basic info from the file
236236
le = gaussian_log_extractor(log_file.local)
237-
if len(job.tasks) == le.n_tasks:
238-
job.status = slurm_status.done
239-
else:
240-
try: # look for more specific exception
241-
le.check_for_exceptions()
237+
238+
try: # look for more specific exception
239+
le.check_for_exceptions()
242240

243-
except NoGeometryException:
244-
job.status = slurm_status.failed
245-
logger.warning(
246-
f"Job {job.base_name} failed - the log file does not contain geometry. Cannot resubmit.")
241+
except NoGeometryException:
242+
job.status = slurm_status.failed
243+
logger.warning(
244+
f"Job {job.base_name} failed - the log file does not contain geometry. Cannot resubmit.")
247245

248-
except NegativeFrequencyException:
249-
job.status = slurm_status.incomplete
250-
logger.warning(
251-
f"Job {job.base_name} incomplete - log file contains negative frequencies. Resubmit job.")
246+
except NegativeFrequencyException:
247+
job.status = slurm_status.incomplete
248+
logger.warning(
249+
f"Job {job.base_name} incomplete - log file contains negative frequencies. Resubmit job.")
252250

253-
except OptimizationIncompleteException:
254-
job.status = slurm_status.incomplete
255-
logger.warning(f"Job {job.base_name} incomplete - geometry optimization did not complete.")
251+
except OptimizationIncompleteException:
252+
job.status = slurm_status.incomplete
253+
logger.warning(f"Job {job.base_name} incomplete - geometry optimization did not complete.")
256254

257-
except Exception as e:
258-
job.status = slurm_status.failed
259-
logger.warning(f"Job {job.base_name} failed with unhandled exception: {e}")
255+
except Exception as e:
256+
job.status = slurm_status.failed
257+
logger.warning(f"Job {job.base_name} failed with unhandled exception: {e}")
260258

261-
else: # no exceptions were thrown, but still the job is incomplete
262-
job.status = slurm_status.incomplete
263-
logger.warning(f"Job {job.base_name} incomplete.")
259+
if len(job.tasks) == le.n_tasks:
260+
job.status = slurm_status.done
261+
else: # no exceptions were thrown, but still the job is incomplete
262+
job.status = slurm_status.incomplete
263+
logger.warning(f"Job {job.base_name} incomplete.")
264264

265265
except FileNotFoundError:
266266
job.status = slurm_status.failed
@@ -369,20 +369,25 @@ def upload_done_molecules_to_db(self, tags, RMSD_threshold=0.35) -> None:
369369

370370
for done_can in done_cans:
371371
(keys, jobs) = zip(*self.get_jobs(can=done_can).items())
372-
rdmol, energies = rdmol_from_slurm_jobs(jobs, postDFT=True)
373-
keep = prune_rmsds(rdmol, RMSD_threshold)
374-
logger.info(f"Molecule {done_can} has {len(keys) - len(keep)} / {len(keys)} duplicate conformers.")
375-
376-
# remove duplicate jobs
377-
can_keys_to_remove = [key for i, key in enumerate(keys) if i not in keep]
378-
to_remove_jobs = {name: job for name, job in self.jobs.items() if name in can_keys_to_remove}
379-
logger.info(
380-
f"Removing {len(keys) - len(keep)} / {len(keys)} jobs and log files that contain duplicate conformers.")
381-
self.remove_jobs(to_remove_jobs)
382-
383-
# upload non-duplicate jobs
384-
can_keys_to_keep = [key for i, key in enumerate(keys) if i in keep]
385-
self._upload_can_to_db(can_keys_to_keep, tags)
372+
rdmol, energies, labels_ok = rdmol_from_slurm_jobs(jobs, postDFT=True)
373+
if labels_ok:
374+
keep = prune_rmsds(rdmol, RMSD_threshold)
375+
logger.info(f"Molecule {done_can} has {len(keys) - len(keep)} / {len(keys)} duplicate conformers.")
376+
377+
# remove duplicate jobs
378+
can_keys_to_remove = [key for i, key in enumerate(keys) if i not in keep]
379+
to_remove_jobs = {name: job for name, job in self.jobs.items() if name in can_keys_to_remove}
380+
logger.info(
381+
f"Removing {len(keys) - len(keep)} / {len(keys)} jobs and log files that contain duplicate conformers.")
382+
self.remove_jobs(to_remove_jobs)
383+
384+
# upload non-duplicate jobs
385+
can_keys_to_keep = [key for i, key in enumerate(keys) if i in keep]
386+
self._upload_can_to_db(can_keys_to_keep, tags)
387+
else:
388+
for key in keys:
389+
self.jobs[key].status = slurm_status.inspect
390+
self._cache()
386391

387392
def _upload_can_to_db(self, keys, tags) -> None:
388393
"""Uploading single molecule conformers to database.

0 commit comments

Comments
 (0)