diff --git a/database/config.py b/database/config.py
deleted file mode 100644
index e3bdef7..0000000
--- a/database/config.py
+++ /dev/null
@@ -1 +0,0 @@
-EMBEDDING_MODEL = "text-embedding-3-small"
diff --git a/database/db-dev.ipynb b/database/db-dev.ipynb
deleted file mode 100644
index b70751c..0000000
--- a/database/db-dev.ipynb
+++ /dev/null
@@ -1,859 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import json\n",
-    "import os\n",
-    "import subprocess\n",
-    "from pathlib import Path\n",
-    "from pprint import pprint\n",
-    "from typing import Dict, List\n",
-    "\n",
-    "import tiktoken\n",
-    "import yaml\n",
-    "from config import EMBEDDING_MODEL\n",
-    "from dotenv import find_dotenv, load_dotenv\n",
-    "from loguru import logger\n",
-    "from openai import OpenAI\n",
-    "from qdrant_client import QdrantClient\n",
-    "from qdrant_client.http.models import PointStruct\n",
-    "from tqdm.auto import tqdm\n",
-    "from utils import (\n",
-    "    create_collection,\n",
-    "    embed_text,\n",
-    "    get_collection_info,\n",
-    "    get_count,\n",
-    "    search,\n",
-    "    upsert,\n",
-    ")\n",
-    "\n",
-    "load_dotenv(find_dotenv())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Get config data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "config_path = Path(\"../config.yaml\")\n",
-    "\n",
-    "with config_path.open(\"r\") as file:\n",
-    "    config = yaml.safe_load(file)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Load raw scraped data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "raw_data_path = Path(\"../scraper/srb_labor_law_data.json\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open(raw_data_path, \"r\", encoding=\"utf-8\") as file:\n",
-    "    raw_data = json.loads(file.read())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Embedding text chunks"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Create JSONL for parallel embedding. <br>\n",
-    "The script for parallel processing is taken from [OpenAI CookBook](https://github.com/openai/openai-cookbook/blob/main/examples/api_request_parallel_processor.py)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "filename = Path(\"./requests_to_parallel_process.jsonl\")\n",
-    "jobs = [\n",
-    "    {\n",
-    "        \"model\": config[\"openai\"][\"embedding_model\"][\"name\"],\n",
-    "        \"input\": \". \".join([sample[\"title\"], \" \".join(sample[\"texts\"])]),\n",
-    "    }\n",
-    "    for sample in raw_data\n",
-    "]\n",
-    "with open(filename, \"w\") as f:\n",
-    "    for job in jobs:\n",
-    "        json_string = json.dumps(job)\n",
-    "        f.write(json_string + \"\\n\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Terminal command to run parallel processing."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "! python api_request_parallel_processor.py \\\n",
-    "  --requests_filepath requests_to_parallel_process.jsonl \\\n",
-    "  --save_filepath requests_to_parallel_process_results.jsonl \\\n",
-    "  --request_url https://api.openai.com/v1/embeddings \\\n",
-    "  --max_requests_per_minute 2500 \\\n",
-    "  --max_tokens_per_minute 900000 \\\n",
-    "  --token_encoding_name cl100k_base \\\n",
-    "  --max_attempts 5 \\\n",
-    "  --logging_level 20"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Format the data"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Load the embeddings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "embeddings_path = Path(\"./requests_to_parallel_process_results.jsonl\")\n",
-    "with open(embeddings_path, \"r\", encoding=\"utf-8\") as file:\n",
-    "    embeddings = []\n",
-    "    for line in file:\n",
-    "        embeddings.append(json.loads(line))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Create a look-up table of: article_name --> (embedding, text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "embeddings_lookup = {}\n",
-    "for item in embeddings:\n",
-    "    text = item[0][\"input\"]\n",
-    "    article_name = text.split(\". \")[0]\n",
-    "    embedding = item[1][\"data\"][0][\"embedding\"]\n",
-    "    embeddings_lookup[article_name] = {\"embedding\": embedding, \"text\": text}"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Create PointStructures for Qdrant database."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "points = []\n",
-    "\n",
-    "for id, dictionary in enumerate(raw_data):\n",
-    "    title = dictionary[\"title\"]\n",
-    "    link = dictionary[\"link\"]\n",
-    "    if title in embeddings_lookup:\n",
-    "        embedding, text = (\n",
-    "            embeddings_lookup[title][\"embedding\"],\n",
-    "            embeddings_lookup[title][\"text\"],\n",
-    "        )\n",
-    "        points.append(\n",
-    "            PointStruct(\n",
-    "                id=id,\n",
-    "                vector=embedding,\n",
-    "                payload={\"title\": title, \"text\": text, \"link\": link},\n",
-    "            )\n",
-    "        )\n",
-    "    else:\n",
-    "        print(\n",
-    "            f\"Warning: No embedding found for title '{title}'. This item will be skipped.\"\n",
-    "        )"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Create Vector database"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "qdrant_client = QdrantClient(\n",
-    "    url=os.environ[\"QDRANT_CLUSTER_URL\"],\n",
-    "    api_key=os.environ[\"QDRANT_API_KEY\"],\n",
-    ")\n",
-    "\n",
-    "openai_client = OpenAI(api_key=os.environ[\"OPENAI_API_KEY\"])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Create a collection"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m2024-04-28 23:31:47.156\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mcreate_collection\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mCreating collection: labor_law with vector size: 1536.\u001b[0m\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "collection_name = \"labor_law\"\n",
-    "create_collection(client=qdrant_client, name=collection_name)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Upload data to collection"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "upsert(client=qdrant_client, collection=collection_name, points=points)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Check collection ifnormation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "get_collection_info(client=qdrant_client, collection=collection_name)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Get collection points count"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "313"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "get_count(client=qdrant_client, collection=collection_name)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Search the Vector database "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "path_to_tests = Path(\"./test_queries.json\")\n",
-    "with open(path_to_tests, \"r\", encoding=\"utf-8\") as file:\n",
-    "    test_samples = json.loads(file.read())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_samples[\"hard\"]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Get embeddings for tests"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for level in test_samples.keys():\n",
-    "    for i, sample in enumerate(test_samples[level]):\n",
-    "        response = embed_text(\n",
-    "            client=openai_client,\n",
-    "            text=sample[\"query\"],\n",
-    "            model=config[\"openai\"][\"embedding_model\"][\"name\"],\n",
-    "        )\n",
-    "        embedding = response.data[0].embedding\n",
-    "        test_samples[level][i][\"embedding\"] = embedding"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Save tests with embeddings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open(path_to_tests, \"w\", encoding=\"utf-8\") as file:\n",
-    "    file.write(json.dumps(test_samples, indent=4))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'Koliko traje porodiljsko odsustvo?'"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "query = test_samples[\"hard\"][3][\"query\"]\n",
-    "embedding = test_samples[\"hard\"][3][\"embedding\"]\n",
-    "query"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "collection_name = \"zakon_o_radu\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "response = search(\n",
-    "    client=qdrant_client,\n",
-    "    collection=collection_name,\n",
-    "    query_vector=embedding,\n",
-    "    with_vectors=True,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Upated for multiple laws"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Getting Embeddings"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Initial settings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 67,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "laws_dir = Path(\"../scraper/laws\")\n",
-    "law_paths = list(laws_dir.iterdir())\n",
-    "\n",
-    "embeddings_dir = Path(\"./embeddings\")\n",
-    "embeddings_dir.mkdir(exist_ok=True)\n",
-    "\n",
-    "to_process_dir = Path(\"./to_process\")\n",
-    "to_process_dir.mkdir(exist_ok=True)\n",
-    "\n",
-    "max_num_tokens_per_chunk = 8191\n",
-    "\n",
-    "if not laws_dir.exists():\n",
-    "    logger.error(f\"No laws directory found.\")\n",
-    "\n",
-    "if not len(law_paths):\n",
-    "    logger.error(f\"No laws found in directory.\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 87,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def load_json(path: Path) -> List[Dict]:\n",
-    "    if not path.exists():\n",
-    "        logger.error(f\"File: {path} does not exist.\")\n",
-    "    with open(path, \"r\", encoding=\"utf-8\") as file:\n",
-    "        data = json.loads(file.read())\n",
-    "    return data\n",
-    "\n",
-    "\n",
-    "def prepare_for_embedding(\n",
-    "    output_path: Path, scraped_data: List[Dict], embedding_model: str = EMBEDDING_MODEL\n",
-    ") -> None:\n",
-    "    jobs = [\n",
-    "        {\n",
-    "            \"model\": embedding_model,\n",
-    "            \"input\": \"[\" + sample[\"title\"] + \"]: \" + \" \".join(sample[\"texts\"]),\n",
-    "        }\n",
-    "        for sample in scraped_data\n",
-    "    ]\n",
-    "    with open(output_path, \"w\", encoding=\"utf-8\") as file:\n",
-    "        for job in jobs:\n",
-    "            json_string = json.dumps(job)\n",
-    "            file.write(json_string + \"\\n\")\n",
-    "\n",
-    "\n",
-    "def get_token_num(text: str, model_name: str = EMBEDDING_MODEL) -> int:\n",
-    "    enc = tiktoken.encoding_for_model(EMBEDDING_MODEL)\n",
-    "    return len(enc.encode(text))\n",
-    "\n",
-    "\n",
-    "def run_api_request_processor(\n",
-    "    requests_filepath: Path,\n",
-    "    save_path: Path,\n",
-    "    max_requests_per_minute: int = 2500,\n",
-    "    max_tokens_per_minute: int = 900000,\n",
-    "    token_encoding_name: str = \"cl100k_base\",\n",
-    "    max_attempts: int = 5,\n",
-    "    logging_level: int = 20,\n",
-    ") -> None:\n",
-    "    if not requests_filepath.exists():\n",
-    "        logger.error(f\"File {requests_filepath} does not exist.\")\n",
-    "    if save_path.suffix != \".jsonl\":\n",
-    "        logger.error(f\"Save path {save_path} must be JSONL.\")\n",
-    "\n",
-    "    command = [\n",
-    "        \"python\",\n",
-    "        \"api_request_parallel_processor.py\",\n",
-    "        \"--requests_filepath\",\n",
-    "        requests_filepath,\n",
-    "        \"--save_filepath\",\n",
-    "        save_path,\n",
-    "        \"--request_url\",\n",
-    "        \"https://api.openai.com/v1/embeddings\",\n",
-    "        \"--max_requests_per_minute\",\n",
-    "        str(max_requests_per_minute),\n",
-    "        \"--max_tokens_per_minute\",\n",
-    "        str(max_tokens_per_minute),\n",
-    "        \"--token_encoding_name\",\n",
-    "        token_encoding_name,\n",
-    "        \"--max_attempts\",\n",
-    "        str(max_attempts),\n",
-    "        \"--logging_level\",\n",
-    "        str(logging_level),\n",
-    "    ]\n",
-    "    result = subprocess.run(command, text=True, capture_output=True)\n",
-    "\n",
-    "    if result.returncode == 0:\n",
-    "        logger.info(\"Embedding executed successfully.\")\n",
-    "        logger.info(f\"Embeddings saved to: {save_path}\")\n",
-    "    else:\n",
-    "        logger.error(\"Error in Embedding execution!\")\n",
-    "        logger.error(\"Error:\", result.stderr)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Check number of tokens per chunk. <br>\n",
-    "⚠️ Integrate this into processing."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 70,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a96fbe906d654281a786ade22d58a76c",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Checking tokens length:   0%|          | 0/5 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "for file_path in tqdm(law_paths, desc=\"Checking tokens length\", total=len(law_paths)):\n",
-    "    scraped_data = load_json(path=file_path)\n",
-    "\n",
-    "    for i, element in enumerate(scraped_data):\n",
-    "        full_text = \" \".join(element[\"texts\"])\n",
-    "        num_tokens = get_token_num(text=full_text)\n",
-    "        if num_tokens > max_num_tokens_per_chunk:\n",
-    "            print(i, element)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 88,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4ac361c4e2804841a627da657f3b2f91",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Embedding scraped laws:   0%|          | 0/5 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m2024-04-21 22:23:46.169\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mrun_api_request_processor\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1mEmbedding executed successfully.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:23:46.171\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mrun_api_request_processor\u001b[0m:\u001b[36m68\u001b[0m - \u001b[1mEmbeddings saved to: embeddings/zakon-o-porezu-na-dohodak-gradjana.jsonl\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:23:47.885\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mrun_api_request_processor\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1mEmbedding executed successfully.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:23:47.886\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mrun_api_request_processor\u001b[0m:\u001b[36m68\u001b[0m - \u001b[1mEmbeddings saved to: embeddings/zakon_o_zastiti_potrosaca.jsonl\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:23:49.781\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mrun_api_request_processor\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1mEmbedding executed successfully.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:23:49.782\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mrun_api_request_processor\u001b[0m:\u001b[36m68\u001b[0m - \u001b[1mEmbeddings saved to: embeddings/porodicni_zakon.jsonl\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:23:51.651\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mrun_api_request_processor\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1mEmbedding executed successfully.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:23:51.652\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mrun_api_request_processor\u001b[0m:\u001b[36m68\u001b[0m - \u001b[1mEmbeddings saved to: embeddings/zakon_o_zastiti_podataka_o_licnosti.jsonl\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:23:53.465\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mrun_api_request_processor\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1mEmbedding executed successfully.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:23:53.467\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mrun_api_request_processor\u001b[0m:\u001b[36m68\u001b[0m - \u001b[1mEmbeddings saved to: embeddings/zakon_o_radu.jsonl\u001b[0m\n"
-     ]
-    }
-   ],
-   "source": [
-    "for file_path in tqdm(law_paths, desc=\"Embedding scraped laws\", total=len(law_paths)):\n",
-    "    scraped_data = load_json(path=file_path)\n",
-    "\n",
-    "    requests_filepath = to_process_dir / (file_path.stem + \".jsonl\")\n",
-    "    prepare_for_embedding(\n",
-    "        output_path=requests_filepath,\n",
-    "        scraped_data=scraped_data,\n",
-    "    )\n",
-    "\n",
-    "    processed_filepath = embeddings_dir / requests_filepath.name\n",
-    "    run_api_request_processor(\n",
-    "        requests_filepath=requests_filepath, save_path=processed_filepath\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Creating vector database"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 71,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "embeddings_dir = Path(\"./embeddings\")\n",
-    "embedding_paths = list(embeddings_dir.iterdir())\n",
-    "\n",
-    "if not embeddings_dir.exists():\n",
-    "    logger.error(f\"No embeddings directory found.\")\n",
-    "\n",
-    "if not len(embedding_paths):\n",
-    "    logger.error(f\"No embedding files found in directory.\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 94,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def load_embeddings(path: Path) -> List[Dict]:\n",
-    "    if not path.exists():\n",
-    "        logger.error(f\"File: {path} does not exist.\")\n",
-    "\n",
-    "    with open(path, \"r\", encoding=\"utf-8\") as file:\n",
-    "        embedded_data = []\n",
-    "        for line in file:\n",
-    "            embedded_data.append(json.loads(line))\n",
-    "\n",
-    "    return embedded_data\n",
-    "\n",
-    "\n",
-    "def get_embedings_article_lookup(embedded_data: List[Dict]) -> Dict:\n",
-    "    embeddings_lookup = {}\n",
-    "    for item in embedded_data:\n",
-    "        text = item[0][\"input\"]\n",
-    "        article_name = text.split(\"]: \")[0][1:]\n",
-    "        embedding = item[1][\"data\"][0][\"embedding\"]\n",
-    "        embeddings_lookup[article_name] = {\"embedding\": embedding, \"text\": text}\n",
-    "\n",
-    "    return embeddings_lookup\n",
-    "\n",
-    "\n",
-    "def get_data_points(raw_data: List[Dict], embeddings_lookup: Dict) -> List[PointStruct]:\n",
-    "    points = []\n",
-    "\n",
-    "    for id, dictionary in enumerate(raw_data):\n",
-    "        title = dictionary[\"title\"]\n",
-    "        link = dictionary[\"link\"]\n",
-    "        if title in embeddings_lookup:\n",
-    "            embedding, text = (\n",
-    "                embeddings_lookup[title][\"embedding\"],\n",
-    "                embeddings_lookup[title][\"text\"],\n",
-    "            )\n",
-    "            points.append(\n",
-    "                PointStruct(\n",
-    "                    id=id,\n",
-    "                    vector=embedding,\n",
-    "                    payload={\"title\": title, \"text\": text, \"link\": link},\n",
-    "                )\n",
-    "            )\n",
-    "        else:\n",
-    "            logger.warning(\n",
-    "                f\"Warning: No embedding found for title '{title}'. This item will be skipped.\"\n",
-    "            )\n",
-    "\n",
-    "    return points"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 95,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f6a9a69294f143768641ecc26ed4268d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Creating vector database collections:   0%|          | 0/5 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m2024-04-21 22:29:29.809\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mcreate_collection\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1mCreating collection: porodicni_zakon with vector size: 1536.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:29:33.578\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m21\u001b[0m - \u001b[1mCreated \"porodicni_zakon\" collection with 364 data points.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:29:33.658\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mcreate_collection\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1mCreating collection: zakon_o_radu with vector size: 1536.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:29:36.784\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m21\u001b[0m - \u001b[1mCreated \"zakon_o_radu\" collection with 313 data points.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:29:36.820\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mcreate_collection\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1mCreating collection: zakon_o_zastiti_podataka_o_licnosti with vector size: 1536.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:29:38.111\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m21\u001b[0m - \u001b[1mCreated \"zakon_o_zastiti_podataka_o_licnosti\" collection with 102 data points.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:29:38.166\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mcreate_collection\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1mCreating collection: zakon_o_zastiti_potrosaca with vector size: 1536.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:29:40.287\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m21\u001b[0m - \u001b[1mCreated \"zakon_o_zastiti_potrosaca\" collection with 198 data points.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:29:40.348\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mutils\u001b[0m:\u001b[36mcreate_collection\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1mCreating collection: zakon_o_porezu_na_dohodak_gradjana with vector size: 1536.\u001b[0m\n",
-      "\u001b[32m2024-04-21 22:29:43.001\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m21\u001b[0m - \u001b[1mCreated \"zakon_o_porezu_na_dohodak_gradjana\" collection with 256 data points.\u001b[0m\n"
-     ]
-    }
-   ],
-   "source": [
-    "for file_path in tqdm(\n",
-    "    embedding_paths,\n",
-    "    desc=\"Creating vector database collections\",\n",
-    "    total=len(embedding_paths),\n",
-    "):\n",
-    "    embedded_data = load_embeddings(path=file_path)\n",
-    "    embeddings_lookup = get_embedings_article_lookup(embedded_data)\n",
-    "\n",
-    "    raw_data_path = laws_dir / file_path.with_suffix(\".json\").name\n",
-    "    raw_data = load_json(path=raw_data_path)\n",
-    "\n",
-    "    points = get_data_points(raw_data=raw_data, embeddings_lookup=embeddings_lookup)\n",
-    "\n",
-    "    collection_name = file_path.stem.replace(\"-\", \"_\")\n",
-    "    create_collection(client=qdrant_client, name=collection_name)\n",
-    "    upsert(client=qdrant_client, collection=collection_name, points=points)\n",
-    "\n",
-    "    if not get_count(client=qdrant_client, collection=collection_name) == len(raw_data):\n",
-    "        logger.error(f\"There are missing points in {collection_name} collection.\")\n",
-    "\n",
-    "    logger.info(\n",
-    "        f'Created \"{collection_name}\" collection with {get_count(client=qdrant_client, collection=collection_name)} data points.'\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}