From 6caa04c041605558b40b3897d2644a44fa7eaf0b Mon Sep 17 00:00:00 2001 From: Jorge Bejar Date: Mon, 22 Jul 2024 18:06:57 -0300 Subject: [PATCH 01/11] Complete removal of unused settings from loading model options --- moxin-backend/src/backend_impls/chat_ui.rs | 13 +++---------- moxin-backend/src/backend_impls/mod.rs | 4 ---- moxin-protocol/src/protocol.rs | 2 -- src/data/chats/mod.rs | 2 -- 4 files changed, 3 insertions(+), 18 deletions(-) diff --git a/moxin-backend/src/backend_impls/chat_ui.rs b/moxin-backend/src/backend_impls/chat_ui.rs index 1cac73cb..411ca05d 100644 --- a/moxin-backend/src/backend_impls/chat_ui.rs +++ b/moxin-backend/src/backend_impls/chat_ui.rs @@ -316,22 +316,15 @@ fn create_wasi( file: &DownloadedFile, load_model: &LoadModelOptions, ) -> wasmedge_sdk::WasmEdgeResult { - let ctx_size = if load_model.n_ctx > 0 { - Some(load_model.n_ctx.to_string()) - } else { - None - }; + let ctx_size = Some(format!("{}", file.context_size)); let n_gpu_layers = match load_model.gpu_layers { moxin_protocol::protocol::GPULayers::Specific(n) => Some(n.to_string()), moxin_protocol::protocol::GPULayers::Max => None, }; - let batch_size = if load_model.n_batch > 0 { - Some(load_model.n_batch.to_string()) - } else { - None - }; + // Set n_batch to a fixed value of 128. + let batch_size = Some(format!("128")); let mut prompt_template = load_model.prompt_template.clone(); if prompt_template.is_none() && !file.prompt_template.is_empty() { diff --git a/moxin-backend/src/backend_impls/mod.rs b/moxin-backend/src/backend_impls/mod.rs index 0dd9d8c3..d91f0653 100644 --- a/moxin-backend/src/backend_impls/mod.rs +++ b/moxin-backend/src/backend_impls/mod.rs @@ -136,8 +136,6 @@ fn test_chat() { prompt_template: None, gpu_layers: moxin_protocol::protocol::GPULayers::Max, use_mlock: false, - n_batch: 512, - n_ctx: 512, rope_freq_scale: 0.0, rope_freq_base: 0.0, context_overflow_policy: moxin_protocol::protocol::ContextOverflowPolicy::StopAtLimit, @@ -211,8 +209,6 @@ fn test_chat_stop() { prompt_template: None, gpu_layers: moxin_protocol::protocol::GPULayers::Max, use_mlock: false, - n_batch: 512, - n_ctx: 512, rope_freq_scale: 0.0, rope_freq_base: 0.0, context_overflow_policy: moxin_protocol::protocol::ContextOverflowPolicy::StopAtLimit, diff --git a/moxin-protocol/src/protocol.rs b/moxin-protocol/src/protocol.rs index 9b250a58..694ef96c 100644 --- a/moxin-protocol/src/protocol.rs +++ b/moxin-protocol/src/protocol.rs @@ -28,8 +28,6 @@ pub struct LoadModelOptions { pub prompt_template: Option, pub gpu_layers: GPULayers, pub use_mlock: bool, - pub n_batch: u32, - pub n_ctx: u32, pub rope_freq_scale: f32, pub rope_freq_base: f32, diff --git a/src/data/chats/mod.rs b/src/data/chats/mod.rs index cb642202..13b27c22 100644 --- a/src/data/chats/mod.rs +++ b/src/data/chats/mod.rs @@ -59,8 +59,6 @@ impl Chats { prompt_template: None, gpu_layers: moxin_protocol::protocol::GPULayers::Max, use_mlock: false, - n_batch: 512, - n_ctx: 512, rope_freq_scale: 0.0, rope_freq_base: 0.0, context_overflow_policy: From 9e4252b774d8a653cf985033da7d852c9591a46e Mon Sep 17 00:00:00 2001 From: Jorge Bejar Date: Tue, 23 Jul 2024 12:01:10 -0300 Subject: [PATCH 02/11] Visual indication when loading models --- src/chat/mod.rs | 2 + src/chat/model_selector.rs | 72 +++++++++++++++- src/chat/model_selector_loading.rs | 133 +++++++++++++++++++++++++++++ src/data/chats/mod.rs | 67 +++++++-------- src/data/chats/model_loader.rs | 88 +++++++++++++++++++ src/data/store.rs | 9 +- 6 files changed, 331 insertions(+), 40 deletions(-) create mode 100644 src/chat/model_selector_loading.rs create mode 100644 src/data/chats/model_loader.rs diff --git a/src/chat/mod.rs b/src/chat/mod.rs index b9f63d15..6223dbd8 100644 --- a/src/chat/mod.rs +++ b/src/chat/mod.rs @@ -10,6 +10,7 @@ pub mod delete_chat_modal; pub mod model_info; pub mod model_selector; pub mod model_selector_list; +pub mod model_selector_loading; pub mod shared; use makepad_widgets::Cx; @@ -25,6 +26,7 @@ pub fn live_design(cx: &mut Cx) { model_info::live_design(cx); model_selector_list::live_design(cx); model_selector::live_design(cx); + model_selector_loading::live_design(cx); shared::live_design(cx); delete_chat_modal::live_design(cx); chat_history_card_options::live_design(cx); diff --git a/src/chat/model_selector.rs b/src/chat/model_selector.rs index 1083a0d7..d11eccd4 100644 --- a/src/chat/model_selector.rs +++ b/src/chat/model_selector.rs @@ -4,7 +4,7 @@ use crate::{ }; use makepad_widgets::*; -use super::model_selector_list::{ModelSelectorAction, ModelSelectorListWidgetExt}; +use super::{model_selector_list::{ModelSelectorAction, ModelSelectorListWidgetExt}, model_selector_loading::ModelSelectorLoadingWidgetExt}; live_design! { import makepad_widgets::base::*; @@ -14,13 +14,15 @@ live_design! { import crate::chat::model_info::ModelInfo; import crate::chat::model_selector_list::ModelSelectorList; + import crate::chat::model_selector_loading::ModelSelectorLoading; ModelSelectorButton = { width: Fill, height: 54, + flow: Overlay, align: {x: 0.0, y: 0.5}, - padding: 16, + padding: 0, draw_bg: { instance radius: 3.0, @@ -29,11 +31,18 @@ live_design! { cursor: Hand, + loading = { + width: Fill, + height: Fill, + visible: false, + } + choose = { width: Fill, height: Fit, align: {x: 0.5, y: 0.5}, + padding: 16, label =