-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
…ag-on-edge-LLM-32core, rag-on-edge-test, rag-on-edge-web, rag-on-edge-vectorDB modules
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
config/ | ||
.env | ||
/tests/__pycache__/*.pyc |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
{ | ||
"version": "0.2.0", | ||
"configurations": [ | ||
{ | ||
"name": "FilterModule Remote Debug (Python)", | ||
"type": "python", | ||
"request": "attach", | ||
"port": 5678, | ||
"host": "localhost", | ||
"logToFile": true, | ||
"redirectOutput": true, | ||
"pathMappings": [ | ||
{ | ||
"localRoot": "${workspaceFolder}/modules/FilterModule", | ||
"remoteRoot": "/app" | ||
} | ||
], | ||
"windows": { | ||
"pathMappings": [ | ||
{ | ||
"localRoot": "${workspaceFolder}\\modules\\FilterModule", | ||
"remoteRoot": "/app" | ||
} | ||
] | ||
} | ||
} | ||
] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Overview | ||
|
||
This is the LLM component of the RAG-on-Edge project. | ||
|
||
Before building the container and deploying: | ||
|
||
1. The variable `N_THREADS` gets set to the number of logical CPUs available on the system by default. You can override this value by setting the Environment Variable `N_THREADS` in the Kubernetes manifest `./deploy/yaml/rag-llm-dapr-workload.yaml`. This variable is commented out by default. | ||
|
||
2. Before deploying the LLM component, make sure to put model files into `./modules/LLMModule/models` folder. | ||
For quantized Llama2 model, download the model files from the [huggingface Llama-2-7B](https://huggingface.co/TheBloke/Llama-2-7B-GGUF). Download the llama-2-7b.Q4_K_M.gguf version and move the file into `./modules/LLMModule/models` folder. | ||
|
||
For Phi2 small language model, download the model files from [huggingface Phi2](https://huggingface.co/TheBloke/phi-2-GGUF/tree/main). Download the Phi-2.Q4_K_M.gguf version. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# celery beat schedule file | ||
celerybeat-schedule | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[pycodestyle] | ||
max_line_length = 150 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
FROM python:3.8-bullseye | ||
|
||
# Set environment variables | ||
# ENV N_THREADS=32 | ||
|
||
# Set the working directory | ||
WORKDIR /app | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y --no-install-recommends build-essential libcurl4-openssl-dev libboost-python-dev libpython3-dev python3 python3-pip cmake curl git&& \ | ||
rm -rf /var/lib/apt/lists/* | ||
RUN pip3 install --upgrade pip | ||
RUN pip3 install setuptools | ||
RUN pip3 install ptvsd==4.1.3 | ||
COPY requirements.txt ./ | ||
RUN pip3 install -r requirements.txt | ||
RUN pip3 install python-dotenv==0.21.0 | ||
|
||
# Expose the Dapr sidecar port | ||
EXPOSE 8601 | ||
|
||
COPY . . | ||
|
||
ENTRYPOINT [ "python3", "-u", "./main.py" ] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
from flask import Flask, request, jsonify | ||
from cloudevents.http import from_http | ||
from dapr.clients import DaprClient | ||
import json | ||
import os | ||
import logging | ||
#from langchain.llms import LlamaCpp | ||
from langchain_community.llms import LlamaCpp | ||
import time | ||
logging.basicConfig(level=logging.DEBUG) | ||
|
||
# Number of threads to use for LLM inference: pass as Env Var to override | ||
N_THREADS = int(os.getenv('N_THREADS', os.cpu_count())) | ||
logging.info('Number of threads for LLM inference detected or passed in: ' + str(N_THREADS)) | ||
|
||
#subscriber using Dapr | ||
app = Flask(__name__) | ||
app_port = os.getenv('LLM_PORT', '8601') | ||
|
||
llmmodel = LlamaCpp(model_path="./models/llama-2-7b.Q4_K_M.gguf", verbose=True, n_threads=N_THREADS) | ||
|
||
llm_prompt = '''Use the Content to answer the Search Query. | ||
Search Query: | ||
SEARCH_QUERY_HERE | ||
Content: | ||
SEARCH_CONTENT_HERE | ||
Answer: | ||
''' | ||
|
||
llm_output = ''' | ||
Search Content: | ||
SEARCH_CONTENT_HERE | ||
Answer: | ||
LLM_CONTENT_HERE | ||
''' | ||
|
||
# Register Dapr pub/sub subscriptions | ||
@app.route('/dapr/subscribe', methods=['GET']) | ||
def subscribe(): | ||
subscriptions = [{ | ||
'pubsubname': 'edgeragpubsub', | ||
'topic': 'llm_input_topic', | ||
'route': 'llm_input_topic_handler' | ||
}] | ||
print('Dapr pub/sub is subscribed to: ' + json.dumps(subscriptions)) | ||
return jsonify(subscriptions) | ||
|
||
# Dapr subscription in /dapr/subscribe sets up this route | ||
@app.route('/llm_input_topic_handler', methods=['POST']) | ||
def orders_subscriber(): | ||
event = from_http(request.headers, request.get_data()) | ||
user_query = str(event.data['user_query']) | ||
vdb_result = str(event.data['vdb_result']) | ||
request_id = event.data['request_id'] | ||
|
||
llm_prompt_prepped = llm_prompt.replace('SEARCH_QUERY_HERE',user_query).replace('SEARCH_CONTENT_HERE',vdb_result) | ||
|
||
# Perform LLM inference | ||
inference_result = llm_inference(llm_prompt_prepped) | ||
# Publish the LLM inference result | ||
output_result_prepped = llm_output.replace('SEARCH_CONTENT_HERE',vdb_result).replace('LLM_CONTENT_HERE',inference_result) | ||
#logging.info(output_result_prepped) | ||
output_message = {"inference_result": output_result_prepped, "request_id": request_id} | ||
with DaprClient() as client: | ||
result = client.publish_event( | ||
pubsub_name='edgeragpubsub', | ||
topic_name='llm_output_topic', | ||
data=json.dumps(output_message), | ||
data_content_type='application/json', | ||
) | ||
logging.info('Published data: ' + json.dumps(output_message)) | ||
time.sleep(1) | ||
|
||
return json.dumps({'success':True}), 200, {'ContentType':'application/json'} | ||
|
||
def llm_inference(data): | ||
#logging.info('llm input :' + data) | ||
#llm_response = llmmodel(data) | ||
llm_response = llmmodel.invoke(data) | ||
llm_response_str=str(llm_response) | ||
#logging.info('llm response :' + llm_response_str) | ||
return llm_response_str | ||
|
||
|
||
if __name__ == '__main__': | ||
app.run(port=app_port) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
{ | ||
"$schema-version": "0.0.1", | ||
"description": "", | ||
"image": { | ||
"repository": "${ACR_ADDRESS}/llmmodule", | ||
"tag": { | ||
"version": "0.0.${BUILD_BUILDID}", | ||
"platforms": { | ||
"amd64": "./Dockerfile.amd64", | ||
"amd64.debug": "./Dockerfile.amd64.debug" | ||
} | ||
}, | ||
"buildOptions": [] | ||
}, | ||
"language": "python" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#packages for both llm and slm | ||
azure-cli==2.53.1 | ||
openpyxl==3.1.2 | ||
tiktoken==0.5.1 | ||
pathlib==1.0.1 | ||
Flask==3.0.0 | ||
dapr==1.11.0 | ||
cloudevents==1.10.1 | ||
typing_extensions==4.8.0 | ||
dapr-ext-grpc==1.11.0 | ||
ruamel-yaml==0.17.16 | ||
|
||
# packages for Llama Language Model 2 | ||
langchain==0.1.11 | ||
llama_cpp_python==0.2.13 | ||
|
||
|
||
# packages for Phi 2 | ||
# pillow~=10.2.0 | ||
# torch~=2.1.1 | ||
# numpy~=1.24.4 | ||
# sentencepiece~=0.1.98 | ||
# transformers>=4.35.2 | ||
# gguf>=0.1.0 | ||
# protobuf>=4.21.0 | ||
# langchain==0.1.7 | ||
# langchain-community==0.0.20 | ||
# langchain-core==0.1.23 | ||
# langsmith==0.0.87 | ||
# llama_cpp_python==0.2.43 | ||
# pydantic==1.10.13 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
config/ | ||
.env | ||
/tests/__pycache__/*.pyc |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Overview | ||
|
||
This is the LLM component of the RAG-on-Edge project. | ||
|
||
Before building the container and deploying: | ||
|
||
1. The variable `N_THREADS` gets set to the number of logical CPUs available on the system by default. You can override this value by setting the Environment Variable `N_THREADS` in the Kubernetes manifest `./deploy/yaml/rag-llm-dapr-workload.yaml`. This variable is commented out by default. | ||
|
||
2. Before deploying the LLM component, make sure to put model files into `./modules/LLMModule/models` folder. | ||
For Phi2 small language model, download the model files from [huggingface Phi2](https://huggingface.co/TheBloke/phi-2-GGUF/tree/main). Download the Phi-2.Q4_K_M.gguf version. |