Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add o1 Models #504

Merged
merged 4 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/lib/Chat.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,7 @@
chat,
autoAddMessages: false,
streaming: false,
summaryRequest: true,
maxTokens: 30
summaryRequest: true
})

try {
Expand Down
2 changes: 1 addition & 1 deletion src/lib/ChatRequest.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ export class ChatRequest {
if (typeof setting.apiTransform === 'function') {
value = setting.apiTransform(chatId, setting, value)
}
if (key === 'max_tokens') {
if (key === 'max_completion_tokens') {
if (opts.maxTokens) value = opts.maxTokens // only as large as requested
if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
if (value) value = Math.floor(value)
Expand Down
2 changes: 1 addition & 1 deletion src/lib/ChatSettingsModal.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@
const profileSelect = getChatSettingObjectByKey('profile') as ChatSetting & SettingSelect
profileSelect.options = await getProfileSelect()
chatDefaults.profile = await getDefaultProfileKey()
chatDefaults.max_tokens = getModelMaxTokens(chatSettings.model)
chatDefaults.max_completion_tokens = getModelMaxTokens(chatSettings.model)
// const defaultProfile = globalStore.defaultProfile || profileSelect.options[0].value
defaultProfile = await getDefaultProfileKey()
isDefault = defaultProfile === chatSettings.profile
Expand Down
2 changes: 1 addition & 1 deletion src/lib/Profiles.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ The user has walked in on [[CHARACTER_NAME]]. They are on the bridge of the Hear
profileName: 'Jen - An uninhibited virtual friend [LLaMA]',
temperature: 0.8,
top_p: 0.5,
max_tokens: 4096,
max_completion_tokens: 4096,
profileDescription: "*** NOT for OpenAI / ChatGPT models ***\n\nA profile attempting a coherent, unrestricted, playful and helpful chat with Stable Beluga 2 and other LLaMA instruct (non-chat) models running on Petals. The goal is to have an assistant that can talk to you about any topic and answer any question without lecturing you and continuously reminding it can't do things because it's only an AI language model. If you have a better a profile, I'm sure @Niek wouldn't mind a pull request or issue opened.\n\nNote that chat with Llama 2 models under Petals can fall apart quickly, devolving into repetitive responses and catch-phrases. The repetitionPentalty settings helps with that, but then also can keep it from repeating layout patterns you're prompting for, so it can be a delicate balance.\n\nThis profile uses:\n- A system prompt designed for character simulation\n- Modified delimiters, etc., to try to keep chat cohearent\n- A summary prompt",
continuousChat: 'summary',
summaryThreshold: 0.8,
Expand Down
6 changes: 3 additions & 3 deletions src/lib/Settings.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ const gptDefaults = {
n: 1,
stream: true,
stop: null,
max_tokens: 512,
max_completion_tokens: 512,
presence_penalty: 0,
frequency_penalty: 0,
logit_bias: null,
Expand Down Expand Up @@ -496,11 +496,11 @@ const chatSettingsList: ChatSetting[] = [
hide: hideModelSetting
},
{
key: 'max_tokens',
key: 'max_completion_tokens',
name: 'Max Tokens',
title: 'The maximum number of tokens to generate in the completion.\n' +
'\n' +
'The token count of your prompt plus max_tokens cannot exceed the model\'s context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096).\n',
'The token count of your prompt plus max_completion_tokens cannot exceed the model\'s context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096).\n',
min: 1,
max: 32768,
step: 1,
Expand Down
2 changes: 1 addition & 1 deletion src/lib/Types.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ export type Request = {
n?: number;
stream?: boolean;
stop?: string | null;
max_tokens?: number;
max_completion_tokens?: number;
presence_penalty?: number;
frequency_penalty?: number;
logit_bias?: Record<string, number> | null;
Expand Down
16 changes: 15 additions & 1 deletion src/lib/providers/openai/models.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,18 @@ const gpt4omini = {
completion: 0.00000060, // $0.00060 per 1000 tokens completion
max: 131072 // 128k max token buffer
}
const o1preview = {
...chatModelBase,
prompt: 0.000015, // $0.015 per 1000 tokens prompt
completion: 0.00006, // $0.06 per 1000 tokens completion
max: 131072 // 128k max token buffer
}
const o1mini = {
...chatModelBase,
prompt: 0.000003, // $0.003 per 1000 tokens prompt
completion: 0.000012, // $0.012 per 1000 tokens completion
max: 131072 // 128k max token buffer
}
const gpt432k = {
...chatModelBase,
prompt: 0.00006, // $0.06 per 1000 tokens prompt
Expand Down Expand Up @@ -129,7 +141,9 @@ export const chatModels : Record<string, ModelDetail> = {
'gpt-4-0125-preview': { ...gpt4128kpreview },
'gpt-4-32k': { ...gpt432k },
'gpt-4-32k-0314': { ...gpt432k },
'gpt-4-32k-0613': { ...gpt432k }
'gpt-4-32k-0613': { ...gpt432k },
'o1-preview': { ...o1preview },
'o1-mini': { ...o1mini }
}

export const fetchRemoteModels = async () => {
Expand Down
2 changes: 1 addition & 1 deletion src/lib/providers/petals/request.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ export const chatRequest = async (
!chatSettings.holdSocket && ws.close()
})

let maxLen = Math.min(opts.maxTokens || chatSettings.max_tokens || maxTokens, maxTokens)
let maxLen = Math.min(opts.maxTokens || chatSettings.max_completion_tokens || maxTokens, maxTokens)

let midDel = ''
for (let i = 0, l = delimiter.length; i < l; i++) {
Expand Down