Add gitignore files to rag-on-edge-SLM-Phi2, rag-on-edge-interface, r…

…ag-on-edge-LLM-32core, rag-on-edge-test, rag-on-edge-web, rag-on-edge-vectorDB modules
Azure-Samples · Oct 2, 2024 · 29394f3 · 29394f3
1 parent 44286e1
commit 29394f3
Show file tree

Hide file tree

Showing 75 changed files with 1,940 additions and 0 deletions.
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/.gitignore b/rag-on-edge/code/rag-on-edge-LLM-32core/.gitignore
@@ -0,0 +1,3 @@
+config/
+.env
+/tests/__pycache__/*.pyc
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/.gitignore b/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/.gitignore
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/.name b/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/.name
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/cctestIoTedgeProject1.iml b/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/cctestIoTedgeProject1.iml
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/inspectionProfiles/profiles_settings.xml b/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/misc.xml b/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/misc.xml
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/modules.xml b/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/modules.xml
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/vcs.xml b/rag-on-edge/code/rag-on-edge-LLM-32core/.idea/vcs.xml
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/.vscode/launch.json b/rag-on-edge/code/rag-on-edge-LLM-32core/.vscode/launch.json
@@ -0,0 +1,28 @@
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "FilterModule Remote Debug (Python)",
+      "type": "python",
+      "request": "attach",
+      "port": 5678,
+      "host": "localhost",
+      "logToFile": true,
+      "redirectOutput": true,
+      "pathMappings": [
+        {
+          "localRoot": "${workspaceFolder}/modules/FilterModule",
+          "remoteRoot": "/app"
+        }
+      ],
+      "windows": {
+        "pathMappings": [
+          {
+            "localRoot": "${workspaceFolder}\\modules\\FilterModule",
+            "remoteRoot": "/app"
+          }
+        ]
+      }
+    }
+  ]
+}
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/README.md b/rag-on-edge/code/rag-on-edge-LLM-32core/README.md
@@ -0,0 +1,12 @@
+# Overview
+
+This is the LLM component of the RAG-on-Edge project.
+
+Before building the container and deploying:
+
+1. The variable `N_THREADS` gets set to the number of logical CPUs available on the system by default. You can override this value by setting the Environment Variable `N_THREADS` in the Kubernetes manifest `./deploy/yaml/rag-llm-dapr-workload.yaml`. This variable is commented out by default.
+
+2. Before deploying the LLM component, make sure to put model files into `./modules/LLMModule/models` folder.
+For quantized Llama2 model, download the model files from the [huggingface Llama-2-7B](https://huggingface.co/TheBloke/Llama-2-7B-GGUF). Download the llama-2-7b.Q4_K_M.gguf version and move the file into `./modules/LLMModule/models` folder.
+
+For Phi2 small language model, download the model files from [huggingface Phi2](https://huggingface.co/TheBloke/phi-2-GGUF/tree/main). Download the Phi-2.Q4_K_M.gguf version.
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/modules/LLMModule/.gitignore b/rag-on-edge/code/rag-on-edge-LLM-32core/modules/LLMModule/.gitignore
@@ -0,0 +1,104 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/modules/LLMModule/.pep8 b/rag-on-edge/code/rag-on-edge-LLM-32core/modules/LLMModule/.pep8
@@ -0,0 +1,2 @@
+[pycodestyle]
+max_line_length = 150
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/modules/LLMModule/Dockerfile b/rag-on-edge/code/rag-on-edge-LLM-32core/modules/LLMModule/Dockerfile
@@ -0,0 +1,24 @@
+FROM python:3.8-bullseye
+
+# Set environment variables
+# ENV N_THREADS=32
+
+# Set the working directory
+WORKDIR /app
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends build-essential libcurl4-openssl-dev libboost-python-dev libpython3-dev python3 python3-pip cmake curl git&& \
+    rm -rf /var/lib/apt/lists/*
+RUN pip3 install --upgrade pip
+RUN pip3 install setuptools
+RUN pip3 install ptvsd==4.1.3
+COPY requirements.txt ./
+RUN pip3 install -r requirements.txt
+RUN pip3 install python-dotenv==0.21.0
+
+# Expose the Dapr sidecar port
+EXPOSE 8601
+
+COPY . .
+
+ENTRYPOINT [ "python3", "-u", "./main.py" ]
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/modules/LLMModule/main.py b/rag-on-edge/code/rag-on-edge-LLM-32core/modules/LLMModule/main.py
@@ -0,0 +1,95 @@
+from flask import Flask, request, jsonify
+from cloudevents.http import from_http
+from dapr.clients import DaprClient
+import json
+import os
+import logging
+#from langchain.llms import LlamaCpp
+from langchain_community.llms import LlamaCpp
+import time
+logging.basicConfig(level=logging.DEBUG)
+
+# Number of threads to use for LLM inference: pass as Env Var to override
+N_THREADS = int(os.getenv('N_THREADS', os.cpu_count()))
+logging.info('Number of threads for LLM inference detected or passed in: ' + str(N_THREADS))
+
+#subscriber using Dapr
+app = Flask(__name__)
+app_port = os.getenv('LLM_PORT', '8601')
+
+llmmodel = LlamaCpp(model_path="./models/llama-2-7b.Q4_K_M.gguf", verbose=True, n_threads=N_THREADS)
+
+llm_prompt = '''Use the Content to answer the Search Query.
+
+Search Query: 
+
+SEARCH_QUERY_HERE
+
+Content: 
+
+SEARCH_CONTENT_HERE
+
+Answer:
+'''
+
+llm_output = '''
+Search Content: 
+
+SEARCH_CONTENT_HERE
+
+Answer:
+
+LLM_CONTENT_HERE
+
+'''
+
+# Register Dapr pub/sub subscriptions
+@app.route('/dapr/subscribe', methods=['GET'])
+def subscribe():
+    subscriptions = [{
+        'pubsubname': 'edgeragpubsub',
+        'topic': 'llm_input_topic',
+        'route': 'llm_input_topic_handler'
+    }]
+    print('Dapr pub/sub is subscribed to: ' + json.dumps(subscriptions))
+    return jsonify(subscriptions)
+
+# Dapr subscription in /dapr/subscribe sets up this route
+@app.route('/llm_input_topic_handler', methods=['POST'])
+def orders_subscriber():
+    event = from_http(request.headers, request.get_data())
+    user_query = str(event.data['user_query'])
+    vdb_result = str(event.data['vdb_result'])
+    request_id = event.data['request_id']
+
+    llm_prompt_prepped = llm_prompt.replace('SEARCH_QUERY_HERE',user_query).replace('SEARCH_CONTENT_HERE',vdb_result)
+
+    # Perform LLM inference
+    inference_result = llm_inference(llm_prompt_prepped)
+    # Publish the LLM inference result
+    output_result_prepped = llm_output.replace('SEARCH_CONTENT_HERE',vdb_result).replace('LLM_CONTENT_HERE',inference_result)
+    #logging.info(output_result_prepped)
+    output_message = {"inference_result": output_result_prepped, "request_id": request_id}
+    with DaprClient() as client:
+        result = client.publish_event(
+            pubsub_name='edgeragpubsub',
+            topic_name='llm_output_topic',
+            data=json.dumps(output_message),
+            data_content_type='application/json',
+        )
+        logging.info('Published data: ' + json.dumps(output_message))
+        time.sleep(1)
+
+    return json.dumps({'success':True}), 200, {'ContentType':'application/json'}
+
+def llm_inference(data):
+    #logging.info('llm input :' + data)
+    #llm_response = llmmodel(data)
+    llm_response = llmmodel.invoke(data)
+    llm_response_str=str(llm_response)
+    #logging.info('llm response :' + llm_response_str)
+    return llm_response_str
+
+
+if __name__ == '__main__':
+    app.run(port=app_port)
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/modules/LLMModule/module.json b/rag-on-edge/code/rag-on-edge-LLM-32core/modules/LLMModule/module.json
@@ -0,0 +1,16 @@
+{
+  "$schema-version": "0.0.1",
+  "description": "",
+  "image": {
+    "repository": "${ACR_ADDRESS}/llmmodule",
+    "tag": {
+      "version": "0.0.${BUILD_BUILDID}",
+      "platforms": {
+        "amd64": "./Dockerfile.amd64",
+        "amd64.debug": "./Dockerfile.amd64.debug"
+      }
+    },
+    "buildOptions": []
+  },
+  "language": "python"
+}
diff --git a/rag-on-edge/code/rag-on-edge-LLM-32core/modules/LLMModule/requirements.txt b/rag-on-edge/code/rag-on-edge-LLM-32core/modules/LLMModule/requirements.txt
@@ -0,0 +1,31 @@
+#packages for both llm and slm
+azure-cli==2.53.1
+openpyxl==3.1.2
+tiktoken==0.5.1
+pathlib==1.0.1 
+Flask==3.0.0
+dapr==1.11.0
+cloudevents==1.10.1
+typing_extensions==4.8.0
+dapr-ext-grpc==1.11.0
+ruamel-yaml==0.17.16
+
+# packages for Llama Language Model 2
+langchain==0.1.11
+llama_cpp_python==0.2.13
+
+
+# packages for Phi 2 
+# pillow~=10.2.0
+# torch~=2.1.1
+# numpy~=1.24.4
+# sentencepiece~=0.1.98
+# transformers>=4.35.2
+# gguf>=0.1.0
+# protobuf>=4.21.0
+# langchain==0.1.7
+# langchain-community==0.0.20
+# langchain-core==0.1.23
+# langsmith==0.0.87
+# llama_cpp_python==0.2.43
+# pydantic==1.10.13
diff --git a/rag-on-edge/code/rag-on-edge-SLM-Phi2/.gitignore b/rag-on-edge/code/rag-on-edge-SLM-Phi2/.gitignore
@@ -0,0 +1,3 @@
+config/
+.env
+/tests/__pycache__/*.pyc
diff --git a/rag-on-edge/code/rag-on-edge-SLM-Phi2/README.md b/rag-on-edge/code/rag-on-edge-SLM-Phi2/README.md
@@ -0,0 +1,10 @@
+# Overview
+
+This is the LLM component of the RAG-on-Edge project.
+
+Before building the container and deploying:
+
+1. The variable `N_THREADS` gets set to the number of logical CPUs available on the system by default. You can override this value by setting the Environment Variable `N_THREADS` in the Kubernetes manifest `./deploy/yaml/rag-llm-dapr-workload.yaml`. This variable is commented out by default.
+
+2. Before deploying the LLM component, make sure to put model files into `./modules/LLMModule/models` folder.
+For Phi2 small language model, download the model files from [huggingface Phi2](https://huggingface.co/TheBloke/phi-2-GGUF/tree/main). Download the Phi-2.Q4_K_M.gguf version.