From 0bc4612a42c6b4b1eb036d5986683f8163b93e15 Mon Sep 17 00:00:00 2001 From: Emil Bohleber Date: Fri, 19 Jul 2024 17:27:24 +0200 Subject: [PATCH] add python code --- .../code/2. self_querying_retriever.ipynb | 444 ++++++++++++++++++ 1 file changed, 444 insertions(+) create mode 100644 src/PYTHON/code/2. self_querying_retriever.ipynb diff --git a/src/PYTHON/code/2. self_querying_retriever.ipynb b/src/PYTHON/code/2. self_querying_retriever.ipynb new file mode 100644 index 0000000..3a2d870 --- /dev/null +++ b/src/PYTHON/code/2. self_querying_retriever.ipynb @@ -0,0 +1,444 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Self Querying Retrieval" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "%pip install -qU langchain\n", + "%pip install -qU langchain-community\n", + "%pip install -qU langchain_openai" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import needed libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.vectorstores import Chroma\n", + "from langchain_core.documents import Document\n", + "from langchain_openai import OpenAIEmbeddings\n", + "from langchain.chains.query_constructor.base import AttributeInfo\n", + "from langchain.retrievers.self_query.base import SelfQueryRetriever\n", + "from langchain_openai import ChatOpenAI" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Config" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "embedding_model = OpenAIEmbeddings()\n", + "collection_name = \"books\"\n", + "language_model_name = \"gpt-3.5-turbo-0125\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load documents" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "docs = [\n", + " Document(\n", + " page_content= \"Portrays Elizabeth Bennet's growth in discerning true character over appearances, set against Regency England's social mores.\",\n", + " metadata= {\n", + " \"name\": \"Pride and Prejudice\",\n", + " \"author\": \"Jane Austen\",\n", + " \"first_published\": 1813,\n", + " \"genre\": \"Romance\",\n", + " \"origin\": \"England\",\n", + " \"rating\": 4.5\n", + " }\n", + " ),\n", + " Document(\n", + " page_content= \"Resists a totalitarian regime under constant surveillance, showcasing the perils of absolute power and the spirit of rebellion.\",\n", + " metadata= {\n", + " \"name\": \"1984\",\n", + " \"author\": \"George Orwell\",\n", + " \"first_published\": 1949,\n", + " \"genre\": \"Dystopian\",\n", + " \"origin\": \"England\",\n", + " \"rating\": 4.7\n", + " }\n", + " ),\n", + " Document(\n", + " page_content= \"Exposes racial injustices in the South through Scout Finch, whose father defends a wrongly accused black man, challenging societal prejudices.\",\n", + " metadata= {\n", + " \"name\": \"To Kill a Mockingbird\",\n", + " \"author\": \"Harper Lee\",\n", + " \"first_published\": 1960,\n", + " \"genre\": \"Southern Gothic\",\n", + " \"origin\": \"United States\",\n", + " \"rating\": 4.8\n", + " }\n", + " ),\n", + " Document(\n", + " page_content= \"Reveals the Jazz Age's allure and despair through Gatsby's doomed love, critiquing the American Dream's corruption.\",\n", + " metadata= {\n", + " \"name\": \"The Great Gatsby\",\n", + " \"author\": \"F. Scott Fitzgerald\",\n", + " \"first_published\": 1925,\n", + " \"genre\": \"Tragedy\",\n", + " \"origin\": \"United States\",\n", + " \"rating\": 4.6\n", + " }\n", + " ),\n", + " Document(\n", + " page_content= \"Follows Frodo Baggins on a quest to destroy a powerful ring, weaving a tale of bravery, friendship, and darkness in Middle-earth.\",\n", + " metadata= {\n", + " \"name\": \"The Lord of the Rings\",\n", + " \"author\": \"J.R.R. Tolkien\",\n", + " \"first_published\": 1954,\n", + " \"genre\": \"Fantasy\",\n", + " \"origin\": \"England\",\n", + " \"rating\": 4.9\n", + " }\n", + " ),\n", + " Document(\n", + " page_content= \"Confronts the haunting legacies of slavery through Sethe, a former slave tormented by her past, exploring themes of family and freedom.\",\n", + " metadata= {\n", + " \"name\": \"Beloved\",\n", + " \"author\": \"Toni Morrison\",\n", + " \"first_published\": 1987,\n", + " \"genre\": \"Historical Fiction\",\n", + " \"origin\": \"United States\",\n", + " \"rating\": 4.7\n", + " }\n", + " ),\n", + " Document(\n", + " page_content= \"Captures teenage angst and alienation through Holden Caulfield's cynical view of adult hypocrisy and the pains of growing up.\",\n", + " metadata= {\n", + " \"name\": \"The Catcher in the Rye\",\n", + " \"author\": \"J.D. Salinger\",\n", + " \"first_published\": 1951,\n", + " \"genre\": \"Coming-of-Age\",\n", + " \"origin\": \"United States\",\n", + " \"rating\": 4.5\n", + " }\n", + " ),\n", + " Document(\n", + " page_content= \"Examines the fallout of unchecked scientific ambition through Victor Frankenstein's creation of life, highlighting the ethical limits of science.\",\n", + " metadata= {\n", + " \"name\": \"Frankenstein\",\n", + " \"author\": \"Mary Shelley\",\n", + " \"first_published\": 1818,\n", + " \"genre\": \"Science Fiction\",\n", + " \"origin\": \"England\",\n", + " \"rating\": 4.6\n", + " }\n", + " ),\n", + " Document(\n", + " page_content= \"Depicts a future where technological progress has stunted humanity, questioning the cost of happiness and freedom.\",\n", + " metadata= {\n", + " \"name\": \"Brave New World\",\n", + " \"author\": \"Aldous Huxley\",\n", + " \"first_published\": 1932,\n", + " \"genre\": \"Science Fiction\",\n", + " \"origin\": \"England\",\n", + " \"rating\": 4.7\n", + " }\n", + " ),\n", + " Document(\n", + " page_content= \"Takes Arthur Dent on a ludicrous space journey, poking fun at life's absurdities and the universe's vast mysteries and the number 42.\",\n", + " metadata= {\n", + " \"name\": \"The Hitchhiker's Guide to the Galaxy\",\n", + " \"author\": \"Douglas Adams\",\n", + " \"first_published\": 1979,\n", + " \"genre\": \"Science Fiction\",\n", + " \"origin\": \"England\",\n", + " \"rating\": 4.5\n", + " }\n", + " ),\n", + " Document(\n", + " page_content= \"Delves into Raskolnikov's psyche after he murders for a 'noble' cause, probing the depths of guilt, morality, and redemption in bleak Russia.\",\n", + " metadata= {\n", + " \"name\": \"Crime and Punishment\",\n", + " \"author\": \"Fyodor Dostoevsky\",\n", + " \"first_published\": 1866,\n", + " \"genre\": \"Psychological Fiction\",\n", + " \"origin\": \"Russia\",\n", + " \"rating\": 4.3\n", + " }\n", + " )\n", + "]\n", + "\n", + "vectorstore = Chroma.from_documents(docs, embedding_model, persist_directory=\"./chroma_db\", collection_name=collection_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Self Querying Retriever Definition\n", + "\n", + "For more info, see https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "metadata_field_info = [\n", + " AttributeInfo(\n", + " name=\"name\",\n", + " description=\"The name of the book\",\n", + " type=\"string\"\n", + " ),\n", + " AttributeInfo(\n", + " name=\"author\",\n", + " description=\"The name of the author of the book\",\n", + " type=\"string\"\n", + " ),\n", + " AttributeInfo(\n", + " name=\"first_published\",\n", + " description=\"The year of the first publishing of the book\",\n", + " type=\"integer\"\n", + " ),\n", + " AttributeInfo(\n", + " name=\"genre\",\n", + " description=\"Genre or genres of the book\",\n", + " type=\"string\"\n", + " ),\n", + " AttributeInfo(\n", + " name=\"origin\",\n", + " description=\"The country of origin of the book\",\n", + " type=\"string\"\n", + " ),\n", + " AttributeInfo(\n", + " name=\"rating\",\n", + " description=\"The rating of the book on a scale of 1 to 5\",\n", + " type=\"float\"\n", + " ),\n", + "]\n", + "document_content_description = \"A description of the books content, themes, characters and setting.\"\n", + "\n", + "llm = ChatOpenAI(temperature=0, model_name=language_model_name)\n", + "retriever = SelfQueryRetriever.from_llm(\n", + " llm,\n", + " vectorstore,\n", + " document_content_description,\n", + " metadata_field_info,\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Show prompt debug, if needed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.globals import set_debug\n", + "\n", + "set_debug(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Follows Frodo Baggins on a quest to destroy a powerful ring, weaving a tale of bravery, friendship, and darkness in Middle-earth.', metadata={'author': 'J.R.R. Tolkien', 'first_published': 1954, 'genre': 'Fantasy', 'name': 'The Lord of the Rings', 'origin': 'England', 'rating': 4.9}),\n", + " Document(page_content='Resists a totalitarian regime under constant surveillance, showcasing the perils of absolute power and the spirit of rebellion.', metadata={'author': 'George Orwell', 'first_published': 1949, 'genre': 'Dystopian', 'name': '1984', 'origin': 'England', 'rating': 4.7}),\n", + " Document(page_content=\"Portrays Elizabeth Bennet's growth in discerning true character over appearances, set against Regency England's social mores.\", metadata={'author': 'Jane Austen', 'first_published': 1813, 'genre': 'Romance', 'name': 'Pride and Prejudice', 'origin': 'England', 'rating': 4.5}),\n", + " Document(page_content='Depicts a future where technological progress has stunted humanity, questioning the cost of happiness and freedom.', metadata={'author': 'Aldous Huxley', 'first_published': 1932, 'genre': 'Science Fiction', 'name': 'Brave New World', 'origin': 'England', 'rating': 4.7})]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.get_relevant_documents(\"A Book about a powerful ring\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Follows Frodo Baggins on a quest to destroy a powerful ring, weaving a tale of bravery, friendship, and darkness in Middle-earth.', metadata={'author': 'J.R.R. Tolkien', 'first_published': 1954, 'genre': 'Fantasy', 'name': 'The Lord of the Rings', 'origin': 'England', 'rating': 4.9}),\n", + " Document(page_content='Exposes racial injustices in the South through Scout Finch, whose father defends a wrongly accused black man, challenging societal prejudices.', metadata={'author': 'Harper Lee', 'first_published': 1960, 'genre': 'Southern Gothic', 'name': 'To Kill a Mockingbird', 'origin': 'United States', 'rating': 4.8})]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.get_relevant_documents(\"A Book with a rating above 4.7\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content=\"Portrays Elizabeth Bennet's growth in discerning true character over appearances, set against Regency England's social mores.\", metadata={'author': 'Jane Austen', 'first_published': 1813, 'genre': 'Romance', 'name': 'Pride and Prejudice', 'origin': 'England', 'rating': 4.5}),\n", + " Document(page_content=\"Examines the fallout of unchecked scientific ambition through Victor Frankenstein's creation of life, highlighting the ethical limits of science.\", metadata={'author': 'Mary Shelley', 'first_published': 1818, 'genre': 'Science Fiction', 'name': 'Frankenstein', 'origin': 'England', 'rating': 4.6}),\n", + " Document(page_content=\"Delves into Raskolnikov's psyche after he murders for a 'noble' cause, probing the depths of guilt, morality, and redemption in bleak Russia.\", metadata={'author': 'Fyodor Dostoevsky', 'first_published': 1866, 'genre': 'Psychological Fiction', 'name': 'Crime and Punishment', 'origin': 'Russia', 'rating': 4.3})]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.get_relevant_documents(\"Published before 1900\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content=\"Reveals the Jazz Age's allure and despair through Gatsby's doomed love, critiquing the American Dream's corruption.\", metadata={'author': 'F. Scott Fitzgerald', 'first_published': 1925, 'genre': 'Tragedy', 'name': 'The Great Gatsby', 'origin': 'United States', 'rating': 4.6}),\n", + " Document(page_content='Depicts a future where technological progress has stunted humanity, questioning the cost of happiness and freedom.', metadata={'author': 'Aldous Huxley', 'first_published': 1932, 'genre': 'Science Fiction', 'name': 'Brave New World', 'origin': 'England', 'rating': 4.7}),\n", + " Document(page_content=\"Captures teenage angst and alienation through Holden Caulfield's cynical view of adult hypocrisy and the pains of growing up.\", metadata={'author': 'J.D. Salinger', 'first_published': 1951, 'genre': 'Coming-of-Age', 'name': 'The Catcher in the Rye', 'origin': 'United States', 'rating': 4.5}),\n", + " Document(page_content='Confronts the haunting legacies of slavery through Sethe, a former slave tormented by her past, exploring themes of family and freedom.', metadata={'author': 'Toni Morrison', 'first_published': 1987, 'genre': 'Historical Fiction', 'name': 'Beloved', 'origin': 'United States', 'rating': 4.7})]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.get_relevant_documents(\"Tragic book about the American Dream\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Depicts a future where technological progress has stunted humanity, questioning the cost of happiness and freedom.', metadata={'author': 'Aldous Huxley', 'first_published': 1932, 'genre': 'Science Fiction', 'name': 'Brave New World', 'origin': 'England', 'rating': 4.7}),\n", + " Document(page_content='Resists a totalitarian regime under constant surveillance, showcasing the perils of absolute power and the spirit of rebellion.', metadata={'author': 'George Orwell', 'first_published': 1949, 'genre': 'Dystopian', 'name': '1984', 'origin': 'England', 'rating': 4.7}),\n", + " Document(page_content='Follows Frodo Baggins on a quest to destroy a powerful ring, weaving a tale of bravery, friendship, and darkness in Middle-earth.', metadata={'author': 'J.R.R. Tolkien', 'first_published': 1954, 'genre': 'Fantasy', 'name': 'The Lord of the Rings', 'origin': 'England', 'rating': 4.9})]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# retriever.get_relevant_documents(\"A Science Fiction book from United States with a rating above 4.6\")\n", + "retriever.get_relevant_documents(\"A dystopian book from England with a rating above 4.6\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content=\"Takes Arthur Dent on a ludicrous space journey, poking fun at life's absurdities and the universe's vast mysteries and the number 42.\", metadata={'author': 'Douglas Adams', 'first_published': 1979, 'genre': 'Science Fiction', 'name': \"The Hitchhiker's Guide to the Galaxy\", 'origin': 'England', 'rating': 4.5}),\n", + " Document(page_content='Depicts a future where technological progress has stunted humanity, questioning the cost of happiness and freedom.', metadata={'author': 'Aldous Huxley', 'first_published': 1932, 'genre': 'Science Fiction', 'name': 'Brave New World', 'origin': 'England', 'rating': 4.7}),\n", + " Document(page_content=\"Captures teenage angst and alienation through Holden Caulfield's cynical view of adult hypocrisy and the pains of growing up.\", metadata={'author': 'J.D. Salinger', 'first_published': 1951, 'genre': 'Coming-of-Age', 'name': 'The Catcher in the Rye', 'origin': 'United States', 'rating': 4.5}),\n", + " Document(page_content='Resists a totalitarian regime under constant surveillance, showcasing the perils of absolute power and the spirit of rebellion.', metadata={'author': 'George Orwell', 'first_published': 1949, 'genre': 'Dystopian', 'name': '1984', 'origin': 'England', 'rating': 4.7})]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.get_relevant_documents(\"Books about the number 42\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}