Niek · Niek · Oct 17, 2024 · Oct 9, 2024 · Oct 16, 2024 · Oct 17, 2024
diff --git a/src/lib/Chat.svelte b/src/lib/Chat.svelte
@@ -304,8 +304,7 @@
       chat,
       autoAddMessages: false,
       streaming: false,
-      summaryRequest: true,
-      maxTokens: 30
+      summaryRequest: true
     })
 
     try {

diff --git a/src/lib/ChatRequest.svelte b/src/lib/ChatRequest.svelte
@@ -191,7 +191,7 @@ export class ChatRequest {
             if (typeof setting.apiTransform === 'function') {
               value = setting.apiTransform(chatId, setting, value)
             }
-            if (key === 'max_tokens') {
+            if (key === 'max_completion_tokens') {
               if (opts.maxTokens) value = opts.maxTokens // only as large as requested
               if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
               if (value) value = Math.floor(value)

diff --git a/src/lib/ChatSettingsModal.svelte b/src/lib/ChatSettingsModal.svelte
@@ -186,7 +186,7 @@
     const profileSelect = getChatSettingObjectByKey('profile') as ChatSetting & SettingSelect
     profileSelect.options = await getProfileSelect()
     chatDefaults.profile = await getDefaultProfileKey()
-    chatDefaults.max_tokens = getModelMaxTokens(chatSettings.model)
+    chatDefaults.max_completion_tokens = getModelMaxTokens(chatSettings.model)
     // const defaultProfile = globalStore.defaultProfile || profileSelect.options[0].value
     defaultProfile = await getDefaultProfileKey()
     isDefault = defaultProfile === chatSettings.profile

diff --git a/src/lib/Profiles.svelte b/src/lib/Profiles.svelte
@@ -236,7 +236,7 @@ The user has walked in on [[CHARACTER_NAME]]. They are on the bridge of the Hear
       profileName: 'Jen - An uninhibited virtual friend [LLaMA]',
       temperature: 0.8,
       top_p: 0.5,
-      max_tokens: 4096,
+      max_completion_tokens: 4096,
       profileDescription: "*** NOT for OpenAI / ChatGPT models ***\n\nA profile attempting a coherent, unrestricted, playful and helpful chat with Stable Beluga 2 and other LLaMA instruct (non-chat) models running on Petals. The goal is to have an assistant that can talk to you about any topic and answer any question without lecturing you and continuously reminding it can't do things because it's only an AI language model. If you have a better a profile, I'm sure @Niek wouldn't mind a pull request or issue opened.\n\nNote that chat with Llama 2 models under Petals can fall apart quickly, devolving into repetitive responses and catch-phrases. The repetitionPentalty settings helps with that, but then also can keep it from repeating layout patterns you're prompting for, so it can be a delicate balance.\n\nThis profile uses:\n- A system prompt designed for character simulation\n- Modified delimiters, etc., to try to keep chat cohearent\n- A summary prompt",
       continuousChat: 'summary',
       summaryThreshold: 0.8,

diff --git a/src/lib/Settings.svelte b/src/lib/Settings.svelte
@@ -81,7 +81,7 @@ const gptDefaults = {
   n: 1,
   stream: true,
   stop: null,
-  max_tokens: 512,
+  max_completion_tokens: 512,
   presence_penalty: 0,
   frequency_penalty: 0,
   logit_bias: null,
@@ -496,11 +496,11 @@ const chatSettingsList: ChatSetting[] = [
         hide: hideModelSetting
       },
       {
-        key: 'max_tokens',
+        key: 'max_completion_tokens',
         name: 'Max Tokens',
         title: 'The maximum number of tokens to generate in the completion.\n' +
               '\n' +
-              'The token count of your prompt plus max_tokens cannot exceed the model\'s context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096).\n',
+              'The token count of your prompt plus max_completion_tokens cannot exceed the model\'s context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096).\n',
         min: 1,
         max: 32768,
         step: 1,

diff --git a/src/lib/Types.svelte b/src/lib/Types.svelte
@@ -52,7 +52,7 @@ export type Request = {
     n?: number;
     stream?: boolean;
     stop?: string | null;
-    max_tokens?: number;
+    max_completion_tokens?: number;
     presence_penalty?: number;
     frequency_penalty?: number;
     logit_bias?: Record<string, number> | null;

diff --git a/src/lib/providers/openai/models.svelte b/src/lib/providers/openai/models.svelte
@@ -87,6 +87,18 @@ const gpt4omini = {
       completion: 0.00000060, // $0.00060 per 1000 tokens completion
       max: 131072 // 128k max token buffer
 }
+const o1preview = {
+      ...chatModelBase,
+      prompt: 0.000015, // $0.015 per 1000 tokens prompt
+      completion: 0.00006, // $0.06 per 1000 tokens completion
+      max: 131072 // 128k max token buffer
+}
+const o1mini = {
+      ...chatModelBase,
+      prompt: 0.000003, // $0.003 per 1000 tokens prompt
+      completion: 0.000012, // $0.012 per 1000 tokens completion
+      max: 131072 // 128k max token buffer
+}
 const gpt432k = {
       ...chatModelBase,
       prompt: 0.00006, // $0.06 per 1000 tokens prompt
@@ -129,7 +141,9 @@ export const chatModels : Record<string, ModelDetail> = {
   'gpt-4-0125-preview': { ...gpt4128kpreview },
   'gpt-4-32k': { ...gpt432k },
   'gpt-4-32k-0314': { ...gpt432k },
-  'gpt-4-32k-0613': { ...gpt432k }
+  'gpt-4-32k-0613': { ...gpt432k },
+  'o1-preview': { ...o1preview },
+  'o1-mini': { ...o1mini }
 }
 
 export const fetchRemoteModels = async () => {

diff --git a/src/lib/providers/petals/request.svelte b/src/lib/providers/petals/request.svelte
@@ -165,7 +165,7 @@ export const chatRequest = async (
         !chatSettings.holdSocket && ws.close()
       })
 
-      let maxLen = Math.min(opts.maxTokens || chatSettings.max_tokens || maxTokens, maxTokens)
+      let maxLen = Math.min(opts.maxTokens || chatSettings.max_completion_tokens || maxTokens, maxTokens)
 
       let midDel = ''
       for (let i = 0, l = delimiter.length; i < l; i++) {