amore-labs
diff --git a/‎backend/modules/llm.py
Lines changed: 2 additions & 2 deletions b/‎backend/modules/llm.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎backend/modules/metadata_utils.py
Lines changed: 2 additions & 2 deletions b/‎backend/modules/metadata_utils.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/developer tutorials/change model.ipynb
Lines changed: 16 additions & 1 deletion b/‎docs/developer tutorials/change model.ipynb
Lines changed: 16 additions & 1 deletion
diff --git a/‎docs/developer tutorials/create vectordb.ipynb
Lines changed: 8 additions & 0 deletions b/‎docs/developer tutorials/create vectordb.ipynb
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/developer tutorials/get an llm summary.ipynb
Lines changed: 10 additions & 1 deletion b/‎docs/developer tutorials/get an llm summary.ipynb
Lines changed: 10 additions & 1 deletion
diff --git a/‎docs/developer tutorials/index.md
Lines changed: 5 additions & 0 deletions b/‎docs/developer tutorials/index.md
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/developer tutorials/load vectordb and get results.ipynb
Lines changed: 8 additions & 0 deletions b/‎docs/developer tutorials/load vectordb and get results.ipynb
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/developer tutorials/run multiple queries and aggregate.ipynb
Lines changed: 11 additions & 0 deletions b/‎docs/developer tutorials/run multiple queries and aggregate.ipynb
Lines changed: 11 additions & 0 deletions
@@ -207,8 +207,8 @@ def create_vector_store(
     if config["testing_flag"]:
         # subset the data for testing
         if config["test_subset_2000"] == True:
-            print("[INFO] Subsetting the data to 2000 rows.")
-            documents = documents[:2000]
+            print("[INFO] Subsetting the data to 100 rows.")
+            documents = documents[:100]
     unique_docs, unique_ids = generate_unique_documents(documents, db)
 
     print(
 
@@ -136,8 +136,8 @@ def get_all_metadata_from_openml(config: dict) -> Tuple[pd.DataFrame, Sequence[i
 
         # subset the data for testing
         if config["test_subset_2000"] == True:
-            print("[INFO] Subsetting the data to 2000 rows.")
-            all_objects = all_objects[:2000]
+            print("[INFO] Subsetting the data to 100 rows.")
+            all_objects = all_objects[:100]
 
         data_id = [int(all_objects.iloc[i]["did"]) for i in range(len(all_objects))]
 
 
@@ -1,5 +1,13 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tutorial on changing models\n",
+    "- How would you use a different embedding and llm model?"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -25,6 +33,13 @@
     "from modules.llm import setup_vector_db_and_qa"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initial config"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 4,
@@ -108,7 +123,7 @@
    "metadata": {},
    "source": [
     "# IMPORTANT\n",
-    "- Do NOT forget to add the models to ollama/get_ollama.sh"
+    "- Do NOT forget to change the model to the best model in ollama/get_ollama.sh"
    ]
   }
  ],
 
@@ -1,5 +1,13 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tutorial on creating a vector database with openml objects\n",
+    "- How would you use the API to create a vector database with openml objects (datasets, flows etc)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
 
@@ -1,5 +1,13 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Getting an LLM summary using the API\n",
+    "- How would you use the API and an LLM model + prompt to generate a summary of the results obtained from the RAG pipeline?"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -53,7 +61,8 @@
    "metadata": {},
    "source": [
     "# Get LLM summary of a string\n",
-    "- Ensure that Ollama is running before this works ```bash ollama/.get_ollama.sh``` (or use the desktop Ollama app for testing)"
+    "- Ensure that Ollama is running before this works ```bash ollama/.get_ollama.sh``` (or use the desktop Ollama app for testing)\n",
+    "- As you can tell, the data needs to be a string. To then get the results from a bunch of langchain documents, you must first concatenate the text you care about into a single string."
    ]
   },
   {
 
@@ -0,0 +1,5 @@
+# Developer Tutorials
+
+- Hello there, future OpenML contributor! It is nice meeting you here. This page is a collection of tutorials that will help you get started with contributing to the OpenML RAG pipeline.
+- The tutorials show you how to perform common tasks and should make it a lot easier to get started with contributing to this project.
+- Note that you would have had to setup the project before you begin. If you missed this step, please refer to [index](../index.md)
@@ -1,5 +1,13 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Load the Chroma Db and get retrieval results for a given query\n",
+    "- How would you load the Chroma Db and get retrieval results for a given query?"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 6,
 
@@ -523,6 +523,17 @@
      "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
+    },
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
+      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
+      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
+      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+     ]
     }
    ],
    "source": [
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,13 @@`
`1`	`1`	`{`
`2`	`2`	`"cells": [`
	`3`	`+ {`
	`4`	`+ "cell_type": "markdown",`
	`5`	`+ "metadata": {},`
	`6`	`+ "source": [`
	`7`	`+ "# Tutorial on creating a vector database with openml objects\n",`
	`8`	`+ "- How would you use the API to create a vector database with openml objects (datasets, flows etc)"`
	`9`	`+ ]`
	`10`	`+ },`
`3`	`11`	`{`
`4`	`12`	`"cell_type": "code",`
`5`	`13`	`"execution_count": 1,`