Skip to content

Commit

Permalink
Merge pull request #49 from ncats/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
iwwwish authored Nov 26, 2021
2 parents 9440263 + 65272cb commit e1604b1
Showing 8 changed files with 37 additions and 15 deletions.
2 changes: 1 addition & 1 deletion server/app.py
Original file line number Diff line number Diff line change
@@ -193,7 +193,7 @@ def predict_df(df, smi_column_name, models):

pred_df = predictor.get_predictions()
if data_path != '':
predictor.record_predictions(f'{data_path}predictions.csv')
predictor.record_predictions(f'{data_path}/predictions.csv')
pred_df = working_df.join(pred_df)
pred_df.drop(['mols', 'kekule_smiles'], axis=1, inplace=True)

Binary file modified server/default.profraw
Binary file not shown.
5 changes: 3 additions & 2 deletions server/predictors/base/gcnn.py
Original file line number Diff line number Diff line change
@@ -81,6 +81,7 @@ def gcnn_predict(self, model, scaler) -> Tuple[array, array]:
dt = datetime.datetime.now(timezone.utc)
utc_time = dt.replace(tzinfo=timezone.utc)
utc_timestamp = utc_time.timestamp()

self.raw_predictions_df = self.raw_predictions_df.append(
pd.DataFrame(
{ 'SMILES': self.smiles, 'model': self.model_name, 'prediction': predictions, 'timestamp': utc_timestamp }
@@ -92,5 +93,5 @@ def gcnn_predict(self, model, scaler) -> Tuple[array, array]:
if len(self.predictions_df.index) > len(predictions) or np.ma.count_masked(predictions) > 0:
self.model_errors.append('graph convolutional neural network')
self.has_errors = True
return predictions, labels

return predictions, labels
2 changes: 1 addition & 1 deletion server/predictors/cyp450/cyp450_predictor.py
Original file line number Diff line number Diff line change
@@ -184,7 +184,7 @@ def get_predictions(self):
pool.join()

end = time.time()
print(f'{end - start} seconds to CYP450 predict {len(self.predictions_df.index)} molecules')
print(f'CYP450: {end - start} seconds to predict {len(self.predictions_df.index)} molecules')

return self.predictions_df

29 changes: 25 additions & 4 deletions server/predictors/liver_cytosol/lc_predictor.py
Original file line number Diff line number Diff line change
@@ -65,7 +65,6 @@ def __init__(self, kekule_smiles: array = None, morgan_fp: array = None, smiles:
self.predictions_df = pd.DataFrame(columns=columns)
self.raw_predictions_df = pd.DataFrame()

print('Generating Morgan FPs...')
desc_gen = DescriptorGen()
kekule_smiles_df['desc'] = kekule_smiles_df['kekule_smiles'].apply(desc_gen.from_smiles)
self.morgan_fp = np.stack(kekule_smiles_df.desc)
@@ -84,15 +83,30 @@ def get_predictions(self):
pred_probs = model.predict_proba(features).T[1]
self.raw_predictions_df[model_name] = pred_probs

end = time.time()
print(f'{end - start} seconds to HLC predict {len(self.raw_predictions_df.index)} molecules')

self.raw_predictions_df['average'] = self.raw_predictions_df.mean(axis=1)
avg_pred_probs = self.raw_predictions_df['average'].tolist()
#self.predictions_df['Predicted Class (Probability)'] = pd.Series(pd.Series(avg_pred_probs).round().astype(int).astype(str) + ' (' + pd.Series(avg_pred_probs).round(2).astype(str) + ')')
self.predictions_df['Predicted Class (Probability)'] = pd.Series(pd.Series(avg_pred_probs).round().astype(int).astype(str) + ' (' + pd.Series(np.where(np.asarray(avg_pred_probs)>=0.5, np.asarray(avg_pred_probs), (1-np.asarray(avg_pred_probs)))).round(2).astype(str) + ')')
self.predictions_df['Prediction'] = pd.Series(pd.Series(np.where(np.asarray(avg_pred_probs)>=0.5, 'unstable', 'stable')))

# empyting the raw df
self.raw_predictions_df = pd.DataFrame(None)

# populate raw df for recording preds
if self.smiles is not None:
dt = datetime.datetime.now(timezone.utc)
utc_time = dt.replace(tzinfo=timezone.utc)
utc_timestamp = utc_time.timestamp()
self.raw_predictions_df = self.raw_predictions_df.append(
pd.DataFrame(
{ 'SMILES': self.smiles, 'model': 'hlc', 'prediction': avg_pred_probs, 'timestamp': utc_timestamp }
),
ignore_index = True
)

end = time.time()
print(f'HLC: {end - start} seconds to predict {len(self.raw_predictions_df.index)} molecules')

return self.predictions_df

def _error_callback(self, error):
@@ -105,3 +119,10 @@ def get_errors(self):

def columns_dict(self):
return self._columns_dict.copy()

def record_predictions(self, file_path):
if len(self.raw_predictions_df.index) > 0:
with open(file_path, 'a') as fw:
rows = self.raw_predictions_df.values.tolist()
cw = csv.writer(fw)
cw.writerows(rows)
6 changes: 3 additions & 3 deletions server/predictors/pampa50/pampa_predictor.py
Original file line number Diff line number Diff line change
@@ -53,7 +53,7 @@ def __init__(self, kekule_smiles: array = None, smiles: array = None):
}

self.model_name = 'pampa50'

def get_predictions(self) -> DataFrame:
"""
Function that calculates consensus predictions
@@ -67,10 +67,10 @@ def get_predictions(self) -> DataFrame:
start = time.time()
gcnn_predictions, gcnn_labels = self.gcnn_predict(pampa_gcnn_model, pampa_gcnn_scaler)
end = time.time()
print(f'{end - start} seconds to PAMPA predict {len(self.predictions_df.index)} molecules')
print(f'PAMPA 5.0: {end - start} seconds to predict {len(self.predictions_df.index)} molecules')

self.predictions_df['Prediction'] = pd.Series(
pd.Series(np.where(gcnn_predictions>=0.5, 'low permeability', 'moderate or high permeability'))
)

return self.predictions_df
2 changes: 1 addition & 1 deletion server/predictors/rlm/rlm_predictor.py
Original file line number Diff line number Diff line change
@@ -67,7 +67,7 @@ def get_predictions(self) -> DataFrame:
start = time.time()
gcnn_predictions, gcnn_labels = self.gcnn_predict(rlm_gcnn_model, rlm_gcnn_scaler)
end = time.time()
print(f'{end - start} seconds to RLM predict {len(self.predictions_df.index)} molecules')
print(f'RLM: {end - start} seconds to predict {len(self.predictions_df.index)} molecules')

self.predictions_df['Prediction'] = pd.Series(
pd.Series(np.where(gcnn_predictions>=0.5, 'unstable', 'stable'))
6 changes: 3 additions & 3 deletions server/predictors/solubility/solubility_predictor.py
Original file line number Diff line number Diff line change
@@ -53,7 +53,7 @@ def __init__(self, kekule_smiles: array = None, smiles: array = None):
}

self.model_name = 'solubility'

def get_predictions(self) -> DataFrame:
"""
Function that calculates consensus predictions
@@ -67,10 +67,10 @@ def get_predictions(self) -> DataFrame:
start = time.time()
gcnn_predictions, gcnn_labels = self.gcnn_predict(solubility_gcnn_model, solubility_gcnn_scaler)
end = time.time()
print(f'{end - start} seconds to Solubility predict {len(self.predictions_df.index)} molecules')
print(f'Solubility: {end - start} seconds to predict {len(self.predictions_df.index)} molecules')

self.predictions_df['Prediction'] = pd.Series(
pd.Series(np.where(gcnn_predictions>=0.5, 'low solubility', 'high solubility'))
)

return self.predictions_df

0 comments on commit e1604b1

Please sign in to comment.