From 40f50896db9a08106e1429be29becbd041835ea3 Mon Sep 17 00:00:00 2001
From: Geemi Wellawatte <49410838+geemi725@users.noreply.github.com>
Date: Thu, 21 Nov 2024 22:48:52 -0800
Subject: [PATCH] Issue 144 - remove langchain (#145)

* remove langchain dependency

* remove langchain from setup.py

* Fixed tests

* Updated CI

* Updated changelog

* Bumped version

* Relaxed requirements

---------

Co-authored-by: Andrew White <white.d.andrew@gmail.com>
---
 .github/workflows/build.yml      |  4 ++--
 .github/workflows/docs.yml       |  4 ++--
 .github/workflows/paper.yml      |  4 ++--
 .github/workflows/tests.yml      |  2 +-
 docs/source/changelog.rst        |  4 ++++
 exmol/exmol.py                   | 30 ++++++++++++++++++++----------
 exmol/version.py                 |  2 +-
 paper1_CFs/requirements.txt      |  4 ++--
 paper2_LIME/RF-lime.ipynb        | 20 ++++++++------------
 paper2_LIME/Solubility-RNN.ipynb |  9 ++-------
 paper2_LIME/requirements.txt     |  4 ++--
 paper3_Scents/requirements.txt   | 14 +++++++-------
 setup.py                         |  2 +-
 13 files changed, 54 insertions(+), 49 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 835b5f8a..d1859ef5 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -15,10 +15,10 @@ jobs:
 
     steps:
     - uses: actions/checkout@v2
-    - name: Set up Python "3.8"
+    - name: Set up Python "3.11"
       uses: actions/setup-python@v2
       with:
-        python-version: "3.8"
+        python-version: "3.11"
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index ad764d9e..c3ba4a0b 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -12,10 +12,10 @@ jobs:
 
     steps:
     - uses: actions/checkout@v2
-    - name: Set up Python 3.8
+    - name: Set up Python 3.11
       uses: actions/setup-python@v2
       with:
-        python-version: '3.8'
+        python-version: '3.11'
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/.github/workflows/paper.yml b/.github/workflows/paper.yml
index 32ee953f..757ea7bc 100644
--- a/.github/workflows/paper.yml
+++ b/.github/workflows/paper.yml
@@ -13,10 +13,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - name: Set up Python 3.8
+    - name: Set up Python 3.11
       uses: actions/setup-python@v2
       with:
-        python-version: "3.8"
+        python-version: "3.11"
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 559d42f2..7324ebd1 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -13,7 +13,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.8, 3.9, "3.10", "3.11"]
+        python-version: ["3.10", "3.11", "3.12"]
 
     steps:
     - uses: actions/checkout@v2
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 10c36131..85e73544 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -1,6 +1,10 @@
 Change Log
 ==========
 
+v3.1.0 (2024-11-21)
+-------------------
+* Removed langchain and switched to use openai API directly
+
 v3.0.3 (2023-06-19)
 -------------------
 * Now compatible with python 3.11
diff --git a/exmol/exmol.py b/exmol/exmol.py
index eabe072a..827cdb99 100644
--- a/exmol/exmol.py
+++ b/exmol/exmol.py
@@ -27,9 +27,8 @@
 from rdkit.Chem.Draw import MolToImage as mol2img, DrawMorganBit  # type: ignore
 from rdkit.Chem import rdchem  # type: ignore
 from rdkit.DataStructs.cDataStructs import BulkTanimotoSimilarity, TanimotoSimilarity  # type: ignore
-import langchain.llms as llms
-import langchain.prompts as prompts
 
+import openai
 from . import stoned
 from .plot_utils import _mol_images, _image_scatter, _bit2atoms
 from .data import *
@@ -392,6 +391,7 @@ def _check_alphabet_consistency(
     alphabet_symbols = _alphabet_to_elements(set(alphabet_symbols))
     # find all elements in smiles (Upper alpha or upper alpha followed by lower alpha)
     smiles_symbols = set(re.findall(r"[A-Z][a-z]?", smiles))
+
     if check and not smiles_symbols.issubset(alphabet_symbols):
         # show which symbols are not in alphabet
         raise ValueError(
@@ -1410,7 +1410,7 @@ def merge_text_explains(
 def text_explain_generate(
     text_explanations: List[Tuple[str, float]],
     property_name: str,
-    llm: Optional[llms.BaseLLM] = None,
+    llm_model: str = "gpt-4o",
     single: bool = True,
 ) -> str:
     """Insert text explanations into template, and generate explanation.
@@ -1430,14 +1430,24 @@ def text_explain_generate(
             for x in text_explanations
         ]
     )
-    prompt_template = prompts.PromptTemplate(
-        input_variables=["property", "text"],
-        template=_single_prompt if single else _multi_prompt,
-    )
+
+    prompt_template = _single_prompt if single else _multi_prompt
     prompt = prompt_template.format(property=property_name, text=text)
-    if llm is None:
-        llm = llms.OpenAI(temperature=0.05)
-    return llm(prompt)
+
+    messages = [
+        {
+            "role": "system",
+            "content": "Your goal is to explain which molecular features are important to its properties based on the given text.",
+        },
+        {"role": "user", "content": prompt},
+    ]
+    response = openai.chat.completions.create(
+        model=llm_model,
+        messages=messages,
+        temperature=0.05,
+    )
+
+    return response.choices[0].message.content
 
 
 def text_explain(
diff --git a/exmol/version.py b/exmol/version.py
index 36ab2067..5e6690cf 100644
--- a/exmol/version.py
+++ b/exmol/version.py
@@ -1 +1 @@
-__version__ = "3.0.4"
+__version__ = "3.1.0"
diff --git a/paper1_CFs/requirements.txt b/paper1_CFs/requirements.txt
index afd9ef0d..41fb5110 100644
--- a/paper1_CFs/requirements.txt
+++ b/paper1_CFs/requirements.txt
@@ -1,5 +1,5 @@
-mordred[full]==1.2.0
-scikit-learn==1.1.2
+mordred[full]
+scikit-learn
 jupyter
 seaborn
 pandas
diff --git a/paper2_LIME/RF-lime.ipynb b/paper2_LIME/RF-lime.ipynb
index e14a2fa3..86c3b400 100644
--- a/paper2_LIME/RF-lime.ipynb
+++ b/paper2_LIME/RF-lime.ipynb
@@ -30,7 +30,7 @@
     "import numpy as np\n",
     "import mordred, mordred.descriptors\n",
     "from mordred import HydrogenBond, Polarizability\n",
-    "from mordred import SLogP, AcidBase, BertzCT, Aromatic, BondCount, AtomCount\n",
+    "from mordred import SLogP, AcidBase, Aromatic, BondCount, AtomCount\n",
     "from mordred import Calculator\n",
     "\n",
     "import exmol as exmol\n",
@@ -38,7 +38,6 @@
     "import os\n",
     "from sklearn.ensemble import RandomForestRegressor\n",
     "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.metrics import roc_auc_score, plot_roc_curve\n",
     "\n",
     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
     "rdDepictor.SetPreferCoordGen(True)\n",
@@ -50,6 +49,9 @@
     "soldata = pd.read_csv(\n",
     "    \"https://github.com/whitead/dmol-book/raw/main/data/curated-solubility-dataset.csv\"\n",
     ")\n",
+    "#drop smile with containing 'P'\n",
+    "soldata = soldata[soldata[\"SMILES\"].str.contains(\"P\") == False]\n",
+    "\n",
     "features_start_at = list(soldata.columns).index(\"MolWt\")"
    ]
   },
@@ -97,7 +99,8 @@
    "outputs": [],
    "source": [
     "raw_features = np.array(raw_features)\n",
-    "labels = soldata[\"Solubility\"]"
+    "labels = soldata[\"Solubility\"]\n",
+    "print(len(labels)==len(molecules))"
    ]
   },
   {
@@ -197,7 +200,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "smi = soldata.SMILES[1500]\n",
+    "smi = soldata.SMILES[150]\n",
     "stoned_kwargs = {\n",
     "    \"num_samples\": 2000,\n",
     "    \"alphabet\": exmol.get_basic_alphabet(),\n",
@@ -275,13 +278,6 @@
     "plt.gca().invert_yaxis()\n",
     "plt.title(\"Random Forest Regression\", fontsize=12)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -303,7 +299,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.11"
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,
diff --git a/paper2_LIME/Solubility-RNN.ipynb b/paper2_LIME/Solubility-RNN.ipynb
index 450e2355..e37352f7 100644
--- a/paper2_LIME/Solubility-RNN.ipynb
+++ b/paper2_LIME/Solubility-RNN.ipynb
@@ -22,10 +22,6 @@
    "source": [
     "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
-    "from matplotlib.patches import Rectangle, FancyBboxPatch\n",
-    "from matplotlib.offsetbox import AnnotationBbox\n",
-    "import seaborn as sns\n",
-    "import skunk\n",
     "import matplotlib as mpl\n",
     "import numpy as np\n",
     "import tensorflow as tf\n",
@@ -33,10 +29,8 @@
     "import exmol\n",
     "from dataclasses import dataclass\n",
     "from rdkit.Chem.Draw import rdDepictor, MolsToGridImage\n",
-    "from rdkit.Chem import MolFromSmiles, MACCSkeys\n",
+    "from rdkit.Chem import MolFromSmiles\n",
     "import random\n",
-    "\n",
-    "\n",
     "rdDepictor.SetPreferCoordGen(True)\n",
     "import matplotlib.pyplot as plt\n",
     "import matplotlib.font_manager as font_manager\n",
@@ -66,6 +60,7 @@
     "soldata = pd.read_csv(\n",
     "    \"https://github.com/whitead/dmol-book/raw/main/data/curated-solubility-dataset.csv\"\n",
     ")\n",
+    "\n",
     "features_start_at = list(soldata.columns).index(\"MolWt\")\n",
     "np.random.seed(0)\n",
     "random.seed(0)"
diff --git a/paper2_LIME/requirements.txt b/paper2_LIME/requirements.txt
index afd9ef0d..41fb5110 100644
--- a/paper2_LIME/requirements.txt
+++ b/paper2_LIME/requirements.txt
@@ -1,5 +1,5 @@
-mordred[full]==1.2.0
-scikit-learn==1.1.2
+mordred[full]
+scikit-learn
 jupyter
 seaborn
 pandas
diff --git a/paper3_Scents/requirements.txt b/paper3_Scents/requirements.txt
index b0c837e7..2d1b242d 100644
--- a/paper3_Scents/requirements.txt
+++ b/paper3_Scents/requirements.txt
@@ -1,14 +1,14 @@
 pyrfume
-tensorflow==2.5.0
+tensorflow>2.5.0
 seaborn
-jaxlib==0.1.67
-jax==0.2.13
+jaxlib
+jax
 pandas
-dm-haiku==0.0.5
-chex==0.0.7
-optax==0.0.9
+dm-haiku
+chex
+optax
 matplotlib
-scikit-learn==1.1.2
+scikit-learn
 jupyter
 CairoSVG
 Pillow
diff --git a/setup.py b/setup.py
index 2333f616..cd326aa7 100644
--- a/setup.py
+++ b/setup.py
@@ -28,7 +28,7 @@
         "skunk >= 0.4.0",
         "importlib-resources",
         "synspace",
-        "langchain==0.0.343",
+        "openai",
     ],
     test_suite="tests",
     long_description=long_description,