diff --git a/src/lib/Chat.svelte b/src/lib/Chat.svelte index 2c90ed86..db711425 100644 --- a/src/lib/Chat.svelte +++ b/src/lib/Chat.svelte @@ -304,8 +304,7 @@ chat, autoAddMessages: false, streaming: false, - summaryRequest: true, - maxTokens: 30 + summaryRequest: true }) try { diff --git a/src/lib/ChatRequest.svelte b/src/lib/ChatRequest.svelte index 888b5ca1..14bb64e9 100644 --- a/src/lib/ChatRequest.svelte +++ b/src/lib/ChatRequest.svelte @@ -191,7 +191,7 @@ export class ChatRequest { if (typeof setting.apiTransform === 'function') { value = setting.apiTransform(chatId, setting, value) } - if (key === 'max_tokens') { + if (key === 'max_completion_tokens') { if (opts.maxTokens) value = opts.maxTokens // only as large as requested if (value > maxAllowed || value < 1) value = null // if over max model, do not define max if (value) value = Math.floor(value) diff --git a/src/lib/ChatSettingsModal.svelte b/src/lib/ChatSettingsModal.svelte index 8e822bbe..327c4e61 100644 --- a/src/lib/ChatSettingsModal.svelte +++ b/src/lib/ChatSettingsModal.svelte @@ -186,7 +186,7 @@ const profileSelect = getChatSettingObjectByKey('profile') as ChatSetting & SettingSelect profileSelect.options = await getProfileSelect() chatDefaults.profile = await getDefaultProfileKey() - chatDefaults.max_tokens = getModelMaxTokens(chatSettings.model) + chatDefaults.max_completion_tokens = getModelMaxTokens(chatSettings.model) // const defaultProfile = globalStore.defaultProfile || profileSelect.options[0].value defaultProfile = await getDefaultProfileKey() isDefault = defaultProfile === chatSettings.profile diff --git a/src/lib/Profiles.svelte b/src/lib/Profiles.svelte index 91c07a8e..b660b6c8 100644 --- a/src/lib/Profiles.svelte +++ b/src/lib/Profiles.svelte @@ -236,7 +236,7 @@ The user has walked in on [[CHARACTER_NAME]]. They are on the bridge of the Hear profileName: 'Jen - An uninhibited virtual friend [LLaMA]', temperature: 0.8, top_p: 0.5, - max_tokens: 4096, + max_completion_tokens: 4096, profileDescription: "*** NOT for OpenAI / ChatGPT models ***\n\nA profile attempting a coherent, unrestricted, playful and helpful chat with Stable Beluga 2 and other LLaMA instruct (non-chat) models running on Petals. The goal is to have an assistant that can talk to you about any topic and answer any question without lecturing you and continuously reminding it can't do things because it's only an AI language model. If you have a better a profile, I'm sure @Niek wouldn't mind a pull request or issue opened.\n\nNote that chat with Llama 2 models under Petals can fall apart quickly, devolving into repetitive responses and catch-phrases. The repetitionPentalty settings helps with that, but then also can keep it from repeating layout patterns you're prompting for, so it can be a delicate balance.\n\nThis profile uses:\n- A system prompt designed for character simulation\n- Modified delimiters, etc., to try to keep chat cohearent\n- A summary prompt", continuousChat: 'summary', summaryThreshold: 0.8, diff --git a/src/lib/Settings.svelte b/src/lib/Settings.svelte index f46992d7..0e5b3ccc 100644 --- a/src/lib/Settings.svelte +++ b/src/lib/Settings.svelte @@ -81,7 +81,7 @@ const gptDefaults = { n: 1, stream: true, stop: null, - max_tokens: 512, + max_completion_tokens: 512, presence_penalty: 0, frequency_penalty: 0, logit_bias: null, @@ -496,11 +496,11 @@ const chatSettingsList: ChatSetting[] = [ hide: hideModelSetting }, { - key: 'max_tokens', + key: 'max_completion_tokens', name: 'Max Tokens', title: 'The maximum number of tokens to generate in the completion.\n' + '\n' + - 'The token count of your prompt plus max_tokens cannot exceed the model\'s context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096).\n', + 'The token count of your prompt plus max_completion_tokens cannot exceed the model\'s context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096).\n', min: 1, max: 32768, step: 1, diff --git a/src/lib/Types.svelte b/src/lib/Types.svelte index 0454bd15..16561684 100644 --- a/src/lib/Types.svelte +++ b/src/lib/Types.svelte @@ -52,7 +52,7 @@ export type Request = { n?: number; stream?: boolean; stop?: string | null; - max_tokens?: number; + max_completion_tokens?: number; presence_penalty?: number; frequency_penalty?: number; logit_bias?: Record | null; diff --git a/src/lib/providers/openai/models.svelte b/src/lib/providers/openai/models.svelte index 8affc3ae..66fb6cdc 100644 --- a/src/lib/providers/openai/models.svelte +++ b/src/lib/providers/openai/models.svelte @@ -87,6 +87,18 @@ const gpt4omini = { completion: 0.00000060, // $0.00060 per 1000 tokens completion max: 131072 // 128k max token buffer } +const o1preview = { + ...chatModelBase, + prompt: 0.000015, // $0.015 per 1000 tokens prompt + completion: 0.00006, // $0.06 per 1000 tokens completion + max: 131072 // 128k max token buffer +} +const o1mini = { + ...chatModelBase, + prompt: 0.000003, // $0.003 per 1000 tokens prompt + completion: 0.000012, // $0.012 per 1000 tokens completion + max: 131072 // 128k max token buffer +} const gpt432k = { ...chatModelBase, prompt: 0.00006, // $0.06 per 1000 tokens prompt @@ -129,7 +141,9 @@ export const chatModels : Record = { 'gpt-4-0125-preview': { ...gpt4128kpreview }, 'gpt-4-32k': { ...gpt432k }, 'gpt-4-32k-0314': { ...gpt432k }, - 'gpt-4-32k-0613': { ...gpt432k } + 'gpt-4-32k-0613': { ...gpt432k }, + 'o1-preview': { ...o1preview }, + 'o1-mini': { ...o1mini } } export const fetchRemoteModels = async () => { diff --git a/src/lib/providers/petals/request.svelte b/src/lib/providers/petals/request.svelte index 8d5b8e50..40dfdc67 100644 --- a/src/lib/providers/petals/request.svelte +++ b/src/lib/providers/petals/request.svelte @@ -165,7 +165,7 @@ export const chatRequest = async ( !chatSettings.holdSocket && ws.close() }) - let maxLen = Math.min(opts.maxTokens || chatSettings.max_tokens || maxTokens, maxTokens) + let maxLen = Math.min(opts.maxTokens || chatSettings.max_completion_tokens || maxTokens, maxTokens) let midDel = '' for (let i = 0, l = delimiter.length; i < l; i++) {