Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions android/app/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ dependencies {
debugImplementation(libs.androidx.ui.tooling)
debugImplementation(libs.androidx.ui.test.manifest)

implementation("dev.deliteai:nimblenet_ktx:0.0.1-dev-1751902318")
implementation("dev.deliteai:nimblenet_core:0.0.1-dev-1751904491")
implementation("dev.deliteai:nimblenet_ktx:0.0.1-dev-1751904494")
implementation("dev.deliteai:nimblenet_core:0.0.1-dev-1751904494")

implementation("com.halilibo.compose-richtext:richtext-ui-material3:1.0.0+")
implementation("com.halilibo.compose-richtext:richtext-commonmark:1.0.0+")
Expand Down
40 changes: 40 additions & 0 deletions android/app/src/main/assets/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"architectures": [
"Qwen3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 151643,
"eos_token_id": 151645,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 6144,
"max_position_embeddings": 40960,
"max_window_layers": 28,
"model_type": "qwen3",
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 1000000,
"sliding_window": null,
"tie_word_embeddings": true,
"torch_dtype": "bfloat16",
"transformers_version": "4.52.0.dev0",
"use_cache": true,
"use_sliding_window": false,
"vocab_size": 151936,
"transformers.js_config": {
"kv_cache_dtype": {
"q4f16": "float16",
"fp16": "float16"
},
"use_external_data_format": {
"model.onnx": true,
"model_fp16.onnx": true
}
}
}
1 change: 1 addition & 0 deletions android/app/src/main/assets/tokenizer.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,41 @@

package dev.deliteai.assistant.domain.features.llm

import android.util.Log
import dev.deliteai.NimbleNet
import dev.deliteai.assistant.utils.TAG

import dev.deliteai.datamodels.NimbleNetTensor
import dev.deliteai.impl.common.DATATYPE

typealias NimbleNetTensorMap = HashMap<String, NimbleNetTensor>
typealias DelitePyForeignFunction = (NimbleNetTensorMap?) -> NimbleNetTensorMap?

object LLMManager {
suspend fun feedInput(input: String, isVoiceInitiated: Boolean) {

private fun createNimbleNetTensorFromForeignFunction(fn: (String?) -> Unit) : NimbleNetTensor {
val callbackDelitePy : DelitePyForeignFunction = fun(input: NimbleNetTensorMap?): NimbleNetTensorMap? {
val outputStream = input?.get("token_stream")?.data as String?
fn(outputStream)
return hashMapOf("result" to NimbleNetTensor(data = true, datatype = DATATYPE.BOOL, shape = intArrayOf()))
}
return NimbleNetTensor(data = callbackDelitePy, datatype = DATATYPE.FUNCTION, shape = intArrayOf())
}

suspend fun feedInput(input: String, isVoiceInitiated: Boolean, callback: (String?)->Unit) : String? {
val res = NimbleNet.runMethod(
"prompt_llm",
"prompt_for_tool_calling",
inputs = hashMapOf(
"query" to NimbleNetTensor(input, DATATYPE.STRING, null),
"is_voice_initiated" to NimbleNetTensor(
if (isVoiceInitiated) 1 else 0,
DATATYPE.INT32,
null
)
"prompt" to NimbleNetTensor(input, DATATYPE.STRING, null),
"output_stream_callback" to createNimbleNetTensorFromForeignFunction(callback)
),
)
check(res.status) { "NimbleNet.runMethod('prompt_llm') failed with status: ${res.status}" }
assert(res.status) { "NimbleNet.runMethod('prompt_for_tool_calling') failed with status: ${res.status}" }
return res.payload?.get("results")?.data as String?
}

suspend fun getNextMap(): Map<String, NimbleNetTensor> {
val res2 = NimbleNet.runMethod("get_next_str", hashMapOf())
val res2 = NimbleNet.runMethod("get_token_stream", hashMapOf())
check(res2.status) { "NimbleNet.runMethod('get_next_str') failed with error: ${res2.error?.message}" }
return res2.payload
?: throw IllegalStateException("NimbleNet.runMethod('get_next_str') returned null payload")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ package dev.deliteai.assistant.domain.features.llm
import dev.deliteai.datamodels.NimbleNetTensor

object LLMService {
suspend fun feedInput(input:String, isVoiceInitiated:Boolean) = LLMManager.feedInput(input, isVoiceInitiated)
suspend fun feedInput(input:String, isVoiceInitiated:Boolean, callbackFunction: (String?)-> Unit) = LLMManager.feedInput(input, isVoiceInitiated, callbackFunction)
suspend fun getNextMap(): Map<String, NimbleNetTensor> = LLMManager.getNextMap()
suspend fun stopLLM() = LLMManager.stopLLM()
suspend fun getLLMName() = LLMManager.getLLMName()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import dev.deliteai.assistant.utils.TAG
import dev.deliteai.assistant.utils.chunkSentence
import dev.deliteai.assistant.utils.mergeChunks
import android.util.Log
import dev.deliteai.impl.common.DATATYPE
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Deferred
import kotlinx.coroutines.Dispatchers
Expand Down Expand Up @@ -51,16 +52,20 @@ class ChatRepository {

suspend fun getModelName() = LLMService.getLLMName()

fun getLLMText(textInput: String) = flow {
LLMService.feedInput(textInput, false)
do {
val outputMap = LLMService.getNextMap()
val currentOutputString = (outputMap["str"] as NimbleNetTensor).data.toString()
emit(GenerateResponseJobStatus.NextItem(currentOutputString))
} while (!outputMap.containsKey("finished"))

emit(GenerateResponseJobStatus.Finished())
Log.d(TAG, "startFeedbackLoop: LLM finished output")
fun getLLMText(textInput: String) = channelFlow {
val callback : (String?) -> Unit = { input ->
repositoryScope.launch(Dispatchers.IO) {
input?.let {
send(GenerateResponseJobStatus.NextItem(input))
}
}
}
val response = LLMService.feedInput(textInput, false, callback)
response?.let {
send(GenerateResponseJobStatus.NextItem(response))
}
send(GenerateResponseJobStatus.Finished())
Log.d(TAG, "startFeedbackLoop: LLM finished output")
}.flowOn(Dispatchers.Default)
.catch { throwable ->
ExceptionLogger.log("getLLMText", throwable)
Expand All @@ -71,7 +76,7 @@ class ChatRepository {
var ttsQueue = ""
val promptText =
if (textInput == Constants.errorASRResponse) Constants.errorLLMInput else textInput
LLMService.feedInput(promptText, true)
LLMService.feedInput(promptText, true){_-> Log.d(TAG, "unsupported")}
val isFirstJobDone = MutableStateFlow(false)
val maxJobs = Semaphore(3)
fillerAudioPlayJob = async(Dispatchers.IO) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,9 @@

package dev.deliteai.assistant.presentation.components

import dev.deliteai.assistant.domain.models.ChatMessage
import dev.deliteai.assistant.presentation.ui.theme.accentHigh1
import dev.deliteai.assistant.presentation.ui.theme.accentLow1
import dev.deliteai.assistant.presentation.ui.theme.backgroundSecondary
import dev.deliteai.assistant.presentation.viewmodels.ChatViewModel
import dev.deliteai.assistant.utils.formatTimeUsingSimpleDateFormat
import androidx.compose.foundation.background
import androidx.compose.foundation.clickable
import androidx.compose.foundation.gestures.detectTapGestures
import androidx.compose.foundation.indication
import androidx.compose.foundation.interaction.MutableInteractionSource
import androidx.compose.foundation.interaction.PressInteraction
import androidx.compose.foundation.layout.Arrangement
Expand All @@ -33,8 +26,8 @@ import androidx.compose.foundation.shape.RoundedCornerShape
import androidx.compose.material3.CircularProgressIndicator
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.ProvideTextStyle
import androidx.compose.material3.ripple
import androidx.compose.material3.Text
import androidx.compose.material3.ripple
import androidx.compose.runtime.Composable
import androidx.compose.runtime.LaunchedEffect
import androidx.compose.runtime.getValue
Expand Down Expand Up @@ -68,6 +61,12 @@ import com.halilibo.richtext.ui.RichTextStyle
import com.halilibo.richtext.ui.TableStyle
import com.halilibo.richtext.ui.material3.RichText
import com.halilibo.richtext.ui.string.RichTextStringStyle
import dev.deliteai.assistant.domain.models.ChatMessage
import dev.deliteai.assistant.presentation.ui.theme.accentHigh1
import dev.deliteai.assistant.presentation.ui.theme.accentLow1
import dev.deliteai.assistant.presentation.ui.theme.backgroundSecondary
import dev.deliteai.assistant.presentation.viewmodels.ChatViewModel
import dev.deliteai.assistant.utils.formatTimeUsingSimpleDateFormat
import kotlinx.coroutines.delay


Expand Down Expand Up @@ -126,7 +125,8 @@ fun ColumnScope.MessageBox(
)
)
.background(if (message.isUserMessage) backgroundSecondary else accentLow1)
.padding(12.dp)
.padding(if (message.message.isEmpty() && !(chatViewModel.currentMessageLoading
.value && isInProgress)) 0.dp else 12.dp)

) {
if (chatViewModel.currentMessageLoading.value && isInProgress) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,24 +42,10 @@ fun ScrollableTextSuggestions(
) {
val allSuggestions = remember {
listOf(
"Design workout routine",
"Recommend wine pairings",
"Write a short poem",
"Draft party menu",
"Create smoothie blends",
"Generate gift ideas",
"Craft cocktail ideas",
"Mix mocktail recipes",
"Suggest hiking essentials",
"Plan a game night",
"Prep for camping",
"Plan a movie marathon",
"Invent signature cocktail",
"Craft lunchbox ideas",
"Who are you?",
"Plan a solo trip",
"Curate weekend playlist",
"Plan a beach day"
"How is the weather here?",
"Where am I located currently?",
"Multiply 23 and 35",
"What is time right now?"
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,14 @@

package dev.deliteai.assistant.presentation.viewmodels

import android.app.Application
import android.util.Log
import android.widget.Toast
import androidx.compose.runtime.mutableFloatStateOf
import androidx.compose.runtime.mutableStateOf
import androidx.lifecycle.AndroidViewModel
import androidx.lifecycle.viewModelScope
import dev.deliteai.NimbleNet
import dev.deliteai.datamodels.NimbleNetTensor
import dev.deliteai.assistant.domain.features.asr.ASRService
import dev.deliteai.assistant.domain.models.Chat
import dev.deliteai.assistant.domain.models.ChatMessage
Expand All @@ -19,13 +25,7 @@ import dev.deliteai.assistant.utils.ExceptionLogger
import dev.deliteai.assistant.utils.LoaderTextProvider
import dev.deliteai.assistant.utils.TAG
import dev.deliteai.assistant.utils.copyTextToClipboard
import android.app.Application
import android.util.Log
import android.widget.Toast
import androidx.compose.runtime.mutableFloatStateOf
import androidx.compose.runtime.mutableStateOf
import androidx.lifecycle.AndroidViewModel
import androidx.lifecycle.viewModelScope
import dev.deliteai.datamodels.NimbleNetTensor
import dev.deliteai.impl.common.DATATYPE
import kotlinx.coroutines.CancellationException
import kotlinx.coroutines.CoroutineExceptionHandler
Expand All @@ -47,6 +47,7 @@ import org.json.JSONArray
import org.json.JSONObject
import java.util.Date
import java.util.UUID
import kotlin.math.min

class ChatViewModel(private val application: Application) : AndroidViewModel(application) {
private val chatRepository = ChatRepository()
Expand Down Expand Up @@ -82,6 +83,9 @@ class ChatViewModel(private val application: Application) : AndroidViewModel(app
private var waitMessageRefreshJob: Job? = null
var topBarTitle = mutableStateOf<String?>(null)

var thinkingStream = mutableStateOf<String>("")
var masterOutputHolder = ""

init {
viewModelScope.launch(Dispatchers.IO) {
chatRepository.getAudioPlaybackSignal().collect { isAudioPlaying ->
Expand Down Expand Up @@ -244,6 +248,8 @@ class ChatViewModel(private val application: Application) : AndroidViewModel(app
}

fun getLLMTextFromTextInput(textInput: String) {
thinkingStream.value = ""

cancelLLMAndClearAudioQueue()
if (!isFirstMessageSent.value) {
isFirstMessageSent.value = true
Expand Down Expand Up @@ -303,6 +309,36 @@ class ChatViewModel(private val application: Application) : AndroidViewModel(app
saveChatToRepository()
}

private val THINK_SENTINEL = "/think"

// Fast non-overlapping counter
private fun String.countOccurrences(needle: String): Int {
if (needle.isEmpty()) return 0
var count = 0
var from = 0
while (true) {
val i = indexOf(needle, startIndex = from)
if (i == -1) break
count++
from = i + needle.length
}
return count
}

private fun cleanForUi(s: String): String {
return s.replace("<think>", "")
.replace("</think>", "")
.replace("<tool_call>", "-> EXECUTING TOOL CALL")
.replace("</tool_call>", "")
.replace("<|im_end|>", "")
// optional: hide the sentinel from the UI as well
.replace(THINK_SENTINEL, "")
}

private var thinkSeen = 0
private var cutAfterThirdThink = -1


private fun handleLLMResult(
result: ChatRepository.GenerateResponseJobStatus,
isAudioExpected: Boolean
Expand All @@ -314,20 +350,43 @@ class ChatViewModel(private val application: Application) : AndroidViewModel(app
}

is ChatRepository.GenerateResponseJobStatus.Finished -> {
val finalOutput = outputStream.value.toString()
addNewMessageToChatHistory(finalOutput, false)
outputStream.value = null
isInterruptButtonVisible.value = false
// val finalOutput = outputStream.value.toString()
// addNewMessageToChatHistory(finalOutput, false)
// outputStream.value = null
// isInterruptButtonVisible.value = false
}

is ChatRepository.GenerateResponseJobStatus.NextItem -> {
outputStream.value = (outputStream.value ?: "") + result.outputText
if (!isAudioExpected && result.outputText.contains(Regex("[A-Za-z0-9]"))) {
currentMessageLoading.value = false
if (waitMessageRefreshJob != null) {
waitMessageRefreshJob!!.cancel()
val newChunk = result.outputText

masterOutputHolder += newChunk

val totalThinks = masterOutputHolder.countOccurrences(THINK_SENTINEL)

if (totalThinks > thinkSeen) {
if (thinkSeen < 3 && totalThinks >= 3 && cutAfterThirdThink < 0) {
val thirdStart = masterOutputHolder.lastIndexOf(THINK_SENTINEL)
if (thirdStart != -1) {
cutAfterThirdThink = thirdStart + THINK_SENTINEL.length
}
}
thinkSeen = totalThinks
thinkingStream.value = "" // reset the "thinking" buffer at sentinel boundaries
}

if (cutAfterThirdThink >= 0) {
val visible = masterOutputHolder.substring(cutAfterThirdThink)
val cleaned = cleanForUi(visible)
outputStream.value = cleaned.substring(0,min(cleaned.length, 147))
} else {
thinkingStream.value = cleanForUi(thinkingStream.value + newChunk)
}

if (!isAudioExpected && newChunk.contains(Regex("[A-Za-z0-9]"))) {
currentMessageLoading.value = false
waitMessageRefreshJob?.cancel()
}

}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ class MainViewModel(private val application: Application) : AndroidViewModel(app

private fun getCT(): String? {
val scriptVersion = "v1.0.1"
return "QWEN_ONNX"

val deviceTier = when (remoteConfigRepository.getDeviceTier()) {
DeviceTier.ONE -> "CHATAPP_TIER_1"
Expand Down
Loading