Merge branch 'main' into rodrigom/benchmarking_readme_changes

snova-rodrigom · Sep 27, 2024 · de56e6f · de56e6f
2 parents 7823d20 + 86a94a7
commit de56e6f
Show file tree

Hide file tree

Showing 53 changed files with 2,661 additions and 1,682 deletions.
diff --git a/.gitignore b/.gitignore
@@ -228,7 +228,7 @@ financial_assistant/streamlit/cache/*
 financial_assistant/streamlit/cache_prod_mode/*
 
 #multimodal upload
-multimodal_knowledge_retriever/data/upload
+multimodal_knowledge_retriever/data/upload*
 
 =======
 #web crawled retriever data
@@ -271,4 +271,5 @@ yoda/llava_data_prep/sec_data
 yoda/synthetic_data
 yoda/llava_data_prep/scripts/table.aux
 yoda/llava_data_prep/notebooks/yolov8n.pt
-yoda/llava_data_prep/test
+yoda/llava_data_prep/test
+yoda/llava_data_prep/scripts/yolov8n.pt
diff --git a/Dockerfile b/Dockerfile
@@ -1,8 +1,9 @@
 # Use an official Python runtime as a parent image
-FROM python:3.11.3-slim-buster
+FROM python:3.11.5-bookworm as builder
 
 # Set environment variables
 ENV PYTHONUNBUFFERED=1
+ENV PIP_NO_CACHE_DIR=1
 
 # Install system dependencies
 RUN apt-get update && apt-get install -y \
@@ -22,20 +23,45 @@ RUN apt-get update && apt-get install -y \
 # Set working directory in the container
 WORKDIR /app
 
-# Copy the current directory contents into the container at /app
-COPY . /app
+# Copy only the requirements file first
+COPY base-requirements.txt .
 
 # Upgrade pip and install project dependencies
-RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r base-requirements.txt
+RUN pip install --upgrade pip
 
-# Expose the ports for the parsing service and Streamlit
-EXPOSE 8005 8501
+# Use BuildKit's cache mount to speed up pip installs
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r base-requirements.txt
+
+# Final stage
+FROM python:3.11.5-slim-bookworm
+
+# Copy installed packages from builder stage
+COPY --from=builder /usr/local /usr/local
+
+# Install runtime system dependencies
+RUN apt-get update && apt-get install -y \
+    poppler-utils \
+    tesseract-ocr \
+    qpdf \
+    ffmpeg \
+    libsm6 \
+    libxext6 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set working directory in the container
+WORKDIR /app
+
+# Copy the application code
+COPY . .
 
 # Copy the startup script and make it executable
 COPY docker-startup.sh /usr/local/bin/
 RUN chmod +x /usr/local/bin/docker-startup.sh
 
+# Expose the ports for the parsing service and Streamlit
+EXPOSE 8005 8501
+
 # Set the startup script as the entrypoint
 ENTRYPOINT ["/usr/local/bin/docker-startup.sh"]
 

diff --git a/Makefile b/Makefile
@@ -9,6 +9,10 @@ endif
 PARSING_DIR := utils/parsing/unstructured-api
 PARSING_VENV := venv
 
+
+STREAMLIT_PORT := 8501
+
+
 # Set OS-specific variables and commands
 ifeq ($(DETECTED_OS),Windows)
     PYTHON := python
@@ -410,17 +414,20 @@ endif
 .PHONY: docker-build
 docker-build:
 	@echo "Building Docker image..."
-	docker build -t ai-starter-kit .
+	DOCKER_BUILDKIT=1 docker build \
+		--build-arg BUILDKIT_INLINE_CACHE=1 \
+		--cache-from ai-starter-kit \
+		-t ai-starter-kit .
 
 .PHONY: docker-run
 docker-run: docker-build
 	@echo "Running Docker container..."
-	docker run -it --rm -p 8005:8005 -p 8501:8501 ai-starter-kit
+	docker run -it --rm -p 8005:8005 -p $(STREAMLIT_PORT):8501 ai-starter-kit
 
 .PHONY: docker-shell
 docker-shell: docker-build
 	@echo "Opening a shell in the Docker container..."
-	docker run -it --rm -p 8005:8005 -p 8501:8501 ai-starter-kit /bin/bash
+	docker run -it --rm -p 8005:8005 -p $(STREAMLIT_PORT):8501 ai-starter-kit /bin/bash
 
 .PHONY: docker-run-kit
 docker-run-kit: docker-build
@@ -430,11 +437,18 @@ docker-run-kit: docker-build
 		exit 1; \
 	fi
 	@if [ -z "$(COMMAND)" ]; then \
-		docker run -it --rm -p 8005:8005 -p 8501:8501 ai-starter-kit /bin/bash -c "cd $(KIT) && streamlit run streamlit/app.py --browser.gatherUsageStats false"; \
+		docker run -it --rm -p 8005:8005 -p $(STREAMLIT_PORT):8501 \
+			-v $(PWD)/$(KIT):/app/$(KIT) \
+			ai-starter-kit /bin/bash -c \
+			"cd $(KIT) && if [ -f requirements.txt ]; then pip install -r requirements.txt; fi && streamlit run streamlit/app.py --server.port 8501 --server.address 0.0.0.0 --browser.gatherUsageStats false"; \
 	else \
-		docker run -it --rm -p 8005:8005 -p 8501:8501 ai-starter-kit /bin/bash -c "cd $(KIT) && $(COMMAND)"; \
+		docker run -it --rm -p 8005:8005 -p $(STREAMLIT_PORT):8501 \
+			-v $(PWD)/$(KIT):/app/$(KIT) \
+			ai-starter-kit /bin/bash -c \
+			"cd $(KIT) && if [ -f requirements.txt ]; then pip install -r requirements.txt; fi && $(COMMAND)"; \
 	fi
 
+
 # Set up test suite
 .PHONY: setup-test-suite
 setup-test-suite: ensure-pyenv

diff --git a/README.md b/README.md
@@ -168,6 +168,13 @@ The table belows lists the available kits, which are grouped into four categorie
 <td width="20%"> Advanced AI Capabilities </td>
 </tr>
 
+<tr>
+<td width="20%"><a href="sambanova_scribe/README.md"> SambaNova Scribe</a></td>
+<td width="40%">Example implementation of a transcription and summarization workflow.</td>
+<td width="20%"> SambaNova Cloud</td>
+<td width="20%"> Advanced AI Capabilities </td>
+</tr>
+
 </tbody>
 </table>
 

diff --git a/benchmarking/notebooks/analyze-results.ipynb b/benchmarking/notebooks/analyze-results.ipynb
@@ -34,7 +34,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 18,
    "id": "17f7abe9-ed9e-466c-b034-577489aaf98b",
    "metadata": {
     "tags": []
@@ -48,7 +48,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 19,
    "id": "bdb61de7",
    "metadata": {},
    "outputs": [],
@@ -69,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 20,
    "id": "2707495e",
    "metadata": {},
    "outputs": [],
@@ -287,16 +287,23 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "525f5776",
+   "id": "1506628a",
    "metadata": {},
    "outputs": [],
    "source": [
+    "\n",
     "import numpy as np\n",
-    "total_wait_time_ttft = (df_summary['Counts']/df_summary['batch_size_used']*df_summary['server_ttft_s']).sum()\n",
     "\n",
-    "df_summary['num_executed'] = np.ceil(df_summary['Counts'] / df_summary['batch_size_used'])\n",
-    "df_summary['output_tokens'] = df_summary['server_number_output_tokens']/df_summary['Counts']\n",
-    "total_generation_time = (df_summary['num_executed']*df_summary['output_tokens']/df_summary['server_output_token_per_s_per_request']).sum()\n",
+    "# calculate number of batches executed in each batch size\n",
+    "df_summary['num_batches_executed'] = np.ceil(df_summary['Total number of requests'] / df_summary.index.get_level_values('Batch size'))\n",
+    "\n",
+    "# calculate average time taken per request in each batch size\n",
+    "df_summary['output_tokens_per_request'] = df_summary['Total output tokens']/df_summary['Total number of requests']\n",
+    "df_summary['time_taken_per_request'] = df_summary['output_tokens_per_request']/df_summary['Avg. server tokens per sec per request']\n",
+    "\n",
+    "# calculate total ttft and generation times across all batch sizes\n",
+    "total_wait_time_ttft = (df_summary['num_batches_executed']*df_summary['Avg. server TTFT (s)']).sum()\n",
+    "total_generation_time = (df_summary['num_batches_executed']*df_summary['time_taken_per_request']).sum()\n",
     "\n",
     "print(f'Total wait time due to ttft (mins) = {total_wait_time_ttft/60:,.4f}')\n",
     "print(f'Total generation time due (mins) = {total_generation_time/60:,.4f}')\n",
@@ -315,7 +322,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 27,
    "id": "79a2adde",
    "metadata": {},
    "outputs": [],