From 40f50896db9a08106e1429be29becbd041835ea3 Mon Sep 17 00:00:00 2001 From: Geemi Wellawatte <49410838+geemi725@users.noreply.github.com> Date: Thu, 21 Nov 2024 22:48:52 -0800 Subject: [PATCH] Issue 144 - remove langchain (#145) * remove langchain dependency * remove langchain from setup.py * Fixed tests * Updated CI * Updated changelog * Bumped version * Relaxed requirements --------- Co-authored-by: Andrew White <white.d.andrew@gmail.com> --- .github/workflows/build.yml | 4 ++-- .github/workflows/docs.yml | 4 ++-- .github/workflows/paper.yml | 4 ++-- .github/workflows/tests.yml | 2 +- docs/source/changelog.rst | 4 ++++ exmol/exmol.py | 30 ++++++++++++++++++++---------- exmol/version.py | 2 +- paper1_CFs/requirements.txt | 4 ++-- paper2_LIME/RF-lime.ipynb | 20 ++++++++------------ paper2_LIME/Solubility-RNN.ipynb | 9 ++------- paper2_LIME/requirements.txt | 4 ++-- paper3_Scents/requirements.txt | 14 +++++++------- setup.py | 2 +- 13 files changed, 54 insertions(+), 49 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 835b5f8a..d1859ef5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,10 +15,10 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up Python "3.8" + - name: Set up Python "3.11" uses: actions/setup-python@v2 with: - python-version: "3.8" + python-version: "3.11" - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index ad764d9e..c3ba4a0b 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -12,10 +12,10 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up Python 3.8 + - name: Set up Python 3.11 uses: actions/setup-python@v2 with: - python-version: '3.8' + python-version: '3.11' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/paper.yml b/.github/workflows/paper.yml index 32ee953f..757ea7bc 100644 --- a/.github/workflows/paper.yml +++ b/.github/workflows/paper.yml @@ -13,10 +13,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up Python 3.8 + - name: Set up Python 3.11 uses: actions/setup-python@v2 with: - python-version: "3.8" + python-version: "3.11" - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 559d42f2..7324ebd1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, "3.10", "3.11"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v2 diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 10c36131..85e73544 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,6 +1,10 @@ Change Log ========== +v3.1.0 (2024-11-21) +------------------- +* Removed langchain and switched to use openai API directly + v3.0.3 (2023-06-19) ------------------- * Now compatible with python 3.11 diff --git a/exmol/exmol.py b/exmol/exmol.py index eabe072a..827cdb99 100644 --- a/exmol/exmol.py +++ b/exmol/exmol.py @@ -27,9 +27,8 @@ from rdkit.Chem.Draw import MolToImage as mol2img, DrawMorganBit # type: ignore from rdkit.Chem import rdchem # type: ignore from rdkit.DataStructs.cDataStructs import BulkTanimotoSimilarity, TanimotoSimilarity # type: ignore -import langchain.llms as llms -import langchain.prompts as prompts +import openai from . import stoned from .plot_utils import _mol_images, _image_scatter, _bit2atoms from .data import * @@ -392,6 +391,7 @@ def _check_alphabet_consistency( alphabet_symbols = _alphabet_to_elements(set(alphabet_symbols)) # find all elements in smiles (Upper alpha or upper alpha followed by lower alpha) smiles_symbols = set(re.findall(r"[A-Z][a-z]?", smiles)) + if check and not smiles_symbols.issubset(alphabet_symbols): # show which symbols are not in alphabet raise ValueError( @@ -1410,7 +1410,7 @@ def merge_text_explains( def text_explain_generate( text_explanations: List[Tuple[str, float]], property_name: str, - llm: Optional[llms.BaseLLM] = None, + llm_model: str = "gpt-4o", single: bool = True, ) -> str: """Insert text explanations into template, and generate explanation. @@ -1430,14 +1430,24 @@ def text_explain_generate( for x in text_explanations ] ) - prompt_template = prompts.PromptTemplate( - input_variables=["property", "text"], - template=_single_prompt if single else _multi_prompt, - ) + + prompt_template = _single_prompt if single else _multi_prompt prompt = prompt_template.format(property=property_name, text=text) - if llm is None: - llm = llms.OpenAI(temperature=0.05) - return llm(prompt) + + messages = [ + { + "role": "system", + "content": "Your goal is to explain which molecular features are important to its properties based on the given text.", + }, + {"role": "user", "content": prompt}, + ] + response = openai.chat.completions.create( + model=llm_model, + messages=messages, + temperature=0.05, + ) + + return response.choices[0].message.content def text_explain( diff --git a/exmol/version.py b/exmol/version.py index 36ab2067..5e6690cf 100644 --- a/exmol/version.py +++ b/exmol/version.py @@ -1 +1 @@ -__version__ = "3.0.4" +__version__ = "3.1.0" diff --git a/paper1_CFs/requirements.txt b/paper1_CFs/requirements.txt index afd9ef0d..41fb5110 100644 --- a/paper1_CFs/requirements.txt +++ b/paper1_CFs/requirements.txt @@ -1,5 +1,5 @@ -mordred[full]==1.2.0 -scikit-learn==1.1.2 +mordred[full] +scikit-learn jupyter seaborn pandas diff --git a/paper2_LIME/RF-lime.ipynb b/paper2_LIME/RF-lime.ipynb index e14a2fa3..86c3b400 100644 --- a/paper2_LIME/RF-lime.ipynb +++ b/paper2_LIME/RF-lime.ipynb @@ -30,7 +30,7 @@ "import numpy as np\n", "import mordred, mordred.descriptors\n", "from mordred import HydrogenBond, Polarizability\n", - "from mordred import SLogP, AcidBase, BertzCT, Aromatic, BondCount, AtomCount\n", + "from mordred import SLogP, AcidBase, Aromatic, BondCount, AtomCount\n", "from mordred import Calculator\n", "\n", "import exmol as exmol\n", @@ -38,7 +38,6 @@ "import os\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.model_selection import train_test_split\n", - "from sklearn.metrics import roc_auc_score, plot_roc_curve\n", "\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", "rdDepictor.SetPreferCoordGen(True)\n", @@ -50,6 +49,9 @@ "soldata = pd.read_csv(\n", " \"https://github.com/whitead/dmol-book/raw/main/data/curated-solubility-dataset.csv\"\n", ")\n", + "#drop smile with containing 'P'\n", + "soldata = soldata[soldata[\"SMILES\"].str.contains(\"P\") == False]\n", + "\n", "features_start_at = list(soldata.columns).index(\"MolWt\")" ] }, @@ -97,7 +99,8 @@ "outputs": [], "source": [ "raw_features = np.array(raw_features)\n", - "labels = soldata[\"Solubility\"]" + "labels = soldata[\"Solubility\"]\n", + "print(len(labels)==len(molecules))" ] }, { @@ -197,7 +200,7 @@ "metadata": {}, "outputs": [], "source": [ - "smi = soldata.SMILES[1500]\n", + "smi = soldata.SMILES[150]\n", "stoned_kwargs = {\n", " \"num_samples\": 2000,\n", " \"alphabet\": exmol.get_basic_alphabet(),\n", @@ -275,13 +278,6 @@ "plt.gca().invert_yaxis()\n", "plt.title(\"Random Forest Regression\", fontsize=12)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -303,7 +299,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.11" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/paper2_LIME/Solubility-RNN.ipynb b/paper2_LIME/Solubility-RNN.ipynb index 450e2355..e37352f7 100644 --- a/paper2_LIME/Solubility-RNN.ipynb +++ b/paper2_LIME/Solubility-RNN.ipynb @@ -22,10 +22,6 @@ "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", - "from matplotlib.patches import Rectangle, FancyBboxPatch\n", - "from matplotlib.offsetbox import AnnotationBbox\n", - "import seaborn as sns\n", - "import skunk\n", "import matplotlib as mpl\n", "import numpy as np\n", "import tensorflow as tf\n", @@ -33,10 +29,8 @@ "import exmol\n", "from dataclasses import dataclass\n", "from rdkit.Chem.Draw import rdDepictor, MolsToGridImage\n", - "from rdkit.Chem import MolFromSmiles, MACCSkeys\n", + "from rdkit.Chem import MolFromSmiles\n", "import random\n", - "\n", - "\n", "rdDepictor.SetPreferCoordGen(True)\n", "import matplotlib.pyplot as plt\n", "import matplotlib.font_manager as font_manager\n", @@ -66,6 +60,7 @@ "soldata = pd.read_csv(\n", " \"https://github.com/whitead/dmol-book/raw/main/data/curated-solubility-dataset.csv\"\n", ")\n", + "\n", "features_start_at = list(soldata.columns).index(\"MolWt\")\n", "np.random.seed(0)\n", "random.seed(0)" diff --git a/paper2_LIME/requirements.txt b/paper2_LIME/requirements.txt index afd9ef0d..41fb5110 100644 --- a/paper2_LIME/requirements.txt +++ b/paper2_LIME/requirements.txt @@ -1,5 +1,5 @@ -mordred[full]==1.2.0 -scikit-learn==1.1.2 +mordred[full] +scikit-learn jupyter seaborn pandas diff --git a/paper3_Scents/requirements.txt b/paper3_Scents/requirements.txt index b0c837e7..2d1b242d 100644 --- a/paper3_Scents/requirements.txt +++ b/paper3_Scents/requirements.txt @@ -1,14 +1,14 @@ pyrfume -tensorflow==2.5.0 +tensorflow>2.5.0 seaborn -jaxlib==0.1.67 -jax==0.2.13 +jaxlib +jax pandas -dm-haiku==0.0.5 -chex==0.0.7 -optax==0.0.9 +dm-haiku +chex +optax matplotlib -scikit-learn==1.1.2 +scikit-learn jupyter CairoSVG Pillow diff --git a/setup.py b/setup.py index 2333f616..cd326aa7 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ "skunk >= 0.4.0", "importlib-resources", "synspace", - "langchain==0.0.343", + "openai", ], test_suite="tests", long_description=long_description,