-
Notifications
You must be signed in to change notification settings - Fork 8
/
llm_app.py
49 lines (39 loc) · 1.54 KB
/
llm_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import ollama
import streamlit as st
import torch
st.title("Ollama Python Chatbot")
# initialize history
if "messages" not in st.session_state:
st.session_state["messages"] = []
# init models
if "model" not in st.session_state:
st.session_state["model"] = ""
models = [model["name"] for model in ollama.list()["models"]]
st.session_state["model"] = st.selectbox("Choose your model", models)
def model_res_generator():
if torch.cuda.is_available():
# Set the global PyTorch device to GPU
device = torch.device("cuda")
#torch.set_default_tensor_type("torch.cuda.FloatTensor")
else:
# Use CPU if no GPU available
device = torch.device("cpu")
stream = ollama.chat(
model=st.session_state["model"],
messages=st.session_state["messages"],
stream=True,
)
for chunk in stream:
yield chunk["message"]["content"]
# Display chat messages from history on app rerun
for message in st.session_state["messages"]:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("Enter prompt here.."):
# add latest message to history in format {role, content}
st.session_state["messages"].append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
message = st.write_stream(model_res_generator())
st.session_state["messages"].append({"role": "assistant", "content": message})