From eddd45a8756db270554279257bb84359ee5e8cb4 Mon Sep 17 00:00:00 2001 From: Aurelien Franky Date: Thu, 23 Nov 2023 11:16:59 +0100 Subject: [PATCH 1/3] add gpt-4-turbo --- .../@pufflig/ps-models/src/models/openai.ts | 65 +++++++++++++++++++ .../adapters/document_check/document_check.ts | 12 ++-- .../src/adapters/llm/llm_completion.ts | 9 +-- .../@pufflig/ps-nodes-config/src/constants.ts | 16 +++++ 4 files changed, 92 insertions(+), 10 deletions(-) diff --git a/packages/@pufflig/ps-models/src/models/openai.ts b/packages/@pufflig/ps-models/src/models/openai.ts index 06dd011..27a6b17 100644 --- a/packages/@pufflig/ps-models/src/models/openai.ts +++ b/packages/@pufflig/ps-models/src/models/openai.ts @@ -207,6 +207,71 @@ export const openai_completion: ModelDefinition = { }; export const openai_chat: ModelDefinition = { + "gpt-4-1106-preview": { + modelId: "gpt-4-1106-preview", + description: "More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat.", + settings: openai_settings, + streaming: true, + contextLength: 4096, + parameters: [ + { + id: "temperature", + type: "number", + name: "Temperature", + max: 2, + min: 0, + step: 0.1, + defaultValue: 0.4, + description: + "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + }, + { + id: "max_tokens", + type: "number", + name: "Max Tokens", + // although the documentation says the model should support 8192 tokens, it actually supports 4096 + max: 4096, + min: 1, + step: 20, + defaultValue: 1024, + description: + "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + }, + { + id: "top_p", + type: "number", + name: "Top P", + max: 1, + min: 0, + step: 0.1, + defaultValue: 1, + description: + "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + }, + { + id: "frequency_penalty", + type: "number", + name: "Frequency penalty", + max: 2, + min: -2, + step: 0.1, + defaultValue: 0, + description: + "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + }, + { + id: "presence_penalty", + type: "number", + name: "Presence penalty", + max: 2, + min: -2, + step: 0.1, + defaultValue: 0, + description: + "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + }, + ], + }, "gpt-4": { modelId: "gpt-4", description: "More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat.", diff --git a/packages/@pufflig/ps-nodes-config/src/adapters/document_check/document_check.ts b/packages/@pufflig/ps-nodes-config/src/adapters/document_check/document_check.ts index 901c0f5..127db39 100644 --- a/packages/@pufflig/ps-nodes-config/src/adapters/document_check/document_check.ts +++ b/packages/@pufflig/ps-nodes-config/src/adapters/document_check/document_check.ts @@ -1,11 +1,11 @@ -import { chat_models, completion_models, default_completion_model } from "@pufflig/ps-models"; import { NodeConfig } from "@pufflig/ps-types"; +import { default_model, models } from "../../constants"; export const documentCheckNodeType = "modifier/document_check" as const; export const documentCheck: NodeConfig = { - name: "Document Check", - description: "Run a checklist or extract information from a document.", + name: "Document Checklist", + description: "Run a checklist on a document.", tags: ["adapter", "document", "text"], status: "stable", execution: { @@ -33,12 +33,12 @@ export const documentCheck: NodeConfig = { inputs: [ { id: "model", - name: "Model", + name: "AI Settings", description: "The model to use", type: "model", - definition: { ...completion_models, ...chat_models }, + definition: models, defaultValue: { - modelId: default_completion_model, + modelId: default_model, parameters: {}, }, }, diff --git a/packages/@pufflig/ps-nodes-config/src/adapters/llm/llm_completion.ts b/packages/@pufflig/ps-nodes-config/src/adapters/llm/llm_completion.ts index 75c1929..4f0660b 100644 --- a/packages/@pufflig/ps-nodes-config/src/adapters/llm/llm_completion.ts +++ b/packages/@pufflig/ps-nodes-config/src/adapters/llm/llm_completion.ts @@ -1,5 +1,6 @@ -import { default_completion_model, completion_models } from "@pufflig/ps-models"; +import { models } from "@pufflig/ps-models"; import { NodeConfig } from "@pufflig/ps-types"; +import { default_model } from "../../constants"; export const llmCompletionNodeType = "adapter/llm_completion" as const; @@ -34,12 +35,12 @@ export const llmCompletionConfig: NodeConfig = { inputs: [ { id: "model", - name: "Model", + name: "AI Settings", description: "The model to use", type: "model", - definition: completion_models, + definition: models, defaultValue: { - modelId: default_completion_model, + modelId: default_model, parameters: {}, }, }, diff --git a/packages/@pufflig/ps-nodes-config/src/constants.ts b/packages/@pufflig/ps-nodes-config/src/constants.ts index a88c56c..76ec5fd 100644 --- a/packages/@pufflig/ps-nodes-config/src/constants.ts +++ b/packages/@pufflig/ps-nodes-config/src/constants.ts @@ -1 +1,17 @@ +import { chat_models, completion_models } from "@pufflig/ps-models"; + export const default_model = "gpt-3.5-turbo-instruct"; + +export const available_models = [ + "gpt-3.5-turbo-instruct", + "gpt-4-1106-preview", + "anthropic/claude-2", + "meta-llama/llama-2-13b-chat", +]; + +export const models = Object.values({ ...completion_models, ...chat_models }) + .filter((model) => available_models.includes(model.modelId)) + .reduce((acc, model) => { + acc[model.modelId] = model; + return acc; + }, {} as Record); From d6ff40de92ddffe67e6b1ca094c100c44204e9c3 Mon Sep 17 00:00:00 2001 From: Aurelien Franky Date: Thu, 23 Nov 2023 11:21:30 +0100 Subject: [PATCH 2/3] update snapshots --- .../__snapshots__/document_check.test.ts.snap | 390 +----- .../__snapshots__/llm_completion.test.ts.snap | 1225 +++++++++++++++-- 2 files changed, 1103 insertions(+), 512 deletions(-) diff --git a/packages/@pufflig/ps-nodes/src/adapters/document_check/__snapshots__/document_check.test.ts.snap b/packages/@pufflig/ps-nodes/src/adapters/document_check/__snapshots__/document_check.test.ts.snap index ffa4060..29f261b 100644 --- a/packages/@pufflig/ps-nodes/src/adapters/document_check/__snapshots__/document_check.test.ts.snap +++ b/packages/@pufflig/ps-nodes/src/adapters/document_check/__snapshots__/document_check.test.ts.snap @@ -52,266 +52,6 @@ exports[`documentCheck should extract variables correctly 1`] = ` }, ], }, - "babbage-002": { - "contextLength": 16384, - "description": "Replacement for the GPT-3 ada and babbage base models.", - "modelId": "babbage-002", - "parameters": [ - { - "defaultValue": 0.4, - "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", - "id": "temperature", - "max": 2, - "min": 0, - "name": "Temperature", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 1024, - "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", - "id": "max_tokens", - "max": 16384, - "min": 1, - "name": "Max Tokens", - "step": 20, - "type": "number", - }, - { - "defaultValue": 1, - "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", - "id": "top_p", - "max": 1, - "min": 0, - "name": "Top P", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", - "id": "frequency_penalty", - "max": 2, - "min": -2, - "name": "Frequency penalty", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", - "id": "presence_penalty", - "max": 2, - "min": -2, - "name": "Presence penalty", - "step": 0.1, - "type": "number", - }, - ], - "settings": [ - { - "description": "Your OpenAI API key", - "id": "openai/api_key", - "name": "API Key", - }, - ], - "streaming": true, - }, - "davinci-002": { - "contextLength": 16384, - "description": "Replacement for the GPT-3 curie and davinci base models.", - "modelId": "davinci-002", - "parameters": [ - { - "defaultValue": 0.4, - "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", - "id": "temperature", - "max": 2, - "min": 0, - "name": "Temperature", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 1024, - "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", - "id": "max_tokens", - "max": 16384, - "min": 1, - "name": "Max Tokens", - "step": 20, - "type": "number", - }, - { - "defaultValue": 1, - "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", - "id": "top_p", - "max": 1, - "min": 0, - "name": "Top P", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", - "id": "frequency_penalty", - "max": 2, - "min": -2, - "name": "Frequency penalty", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", - "id": "presence_penalty", - "max": 2, - "min": -2, - "name": "Presence penalty", - "step": 0.1, - "type": "number", - }, - ], - "settings": [ - { - "description": "Your OpenAI API key", - "id": "openai/api_key", - "name": "API Key", - }, - ], - "streaming": true, - }, - "gpt-3.5-turbo": { - "contextLength": 4096, - "description": "Most capable GPT-3.5 model and optimized for chat at 1/10th the cost of text-davinci-003.", - "modelId": "gpt-3.5-turbo", - "parameters": [ - { - "defaultValue": 0.4, - "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", - "id": "temperature", - "max": 2, - "min": 0, - "name": "Temperature", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 1024, - "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", - "id": "max_tokens", - "max": 4096, - "min": 1, - "name": "Max Tokens", - "step": 20, - "type": "number", - }, - { - "defaultValue": 1, - "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", - "id": "top_p", - "max": 1, - "min": 0, - "name": "Top P", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", - "id": "frequency_penalty", - "max": 2, - "min": -2, - "name": "Frequency penalty", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", - "id": "presence_penalty", - "max": 2, - "min": -2, - "name": "Presence penalty", - "step": 0.1, - "type": "number", - }, - ], - "settings": [ - { - "description": "Your OpenAI API key", - "id": "openai/api_key", - "name": "API Key", - }, - ], - "streaming": true, - }, - "gpt-3.5-turbo-16k": { - "contextLength": 16385, - "description": "Same capabilities as the standard gpt-3.5-turbo model but with 4 times the context.", - "modelId": "gpt-3.5-turbo-16k", - "parameters": [ - { - "defaultValue": 0.4, - "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", - "id": "temperature", - "max": 2, - "min": 0, - "name": "Temperature", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 1024, - "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", - "id": "max_tokens", - "max": 16385, - "min": 1, - "name": "Max Tokens", - "step": 20, - "type": "number", - }, - { - "defaultValue": 1, - "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", - "id": "top_p", - "max": 1, - "min": 0, - "name": "Top P", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", - "id": "frequency_penalty", - "max": 2, - "min": -2, - "name": "Frequency penalty", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", - "id": "presence_penalty", - "max": 2, - "min": -2, - "name": "Presence penalty", - "step": 0.1, - "type": "number", - }, - ], - "settings": [ - { - "description": "Your OpenAI API key", - "id": "openai/api_key", - "name": "API Key", - }, - ], - "streaming": true, - }, "gpt-3.5-turbo-instruct": { "contextLength": 4096, "description": "Similar capabilities as text-davinci-003 but compatible with legacy Completions endpoint and not Chat Completions.", @@ -377,10 +117,10 @@ exports[`documentCheck should extract variables correctly 1`] = ` ], "streaming": true, }, - "gpt-4": { + "gpt-4-1106-preview": { "contextLength": 4096, "description": "More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat.", - "modelId": "gpt-4", + "modelId": "gpt-4-1106-preview", "parameters": [ { "defaultValue": 0.4, @@ -442,130 +182,6 @@ exports[`documentCheck should extract variables correctly 1`] = ` ], "streaming": true, }, - "gpt-4-32k": { - "contextLength": 32768, - "description": "Same capabilities as the standard gpt-4 mode but with 4x the context length. ", - "modelId": "gpt-4-32k", - "parameters": [ - { - "defaultValue": 0.4, - "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", - "id": "temperature", - "max": 2, - "min": 0, - "name": "Temperature", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 1024, - "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", - "id": "max_tokens", - "max": 32768, - "min": 1, - "name": "Max Tokens", - "step": 20, - "type": "number", - }, - { - "defaultValue": 1, - "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", - "id": "top_p", - "max": 1, - "min": 0, - "name": "Top P", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", - "id": "frequency_penalty", - "max": 2, - "min": -2, - "name": "Frequency penalty", - "step": 0.1, - "type": "number", - }, - { - "defaultValue": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", - "id": "presence_penalty", - "max": 2, - "min": -2, - "name": "Presence penalty", - "step": 0.1, - "type": "number", - }, - ], - "settings": [ - { - "description": "Your OpenAI API key", - "id": "openai/api_key", - "name": "API Key", - }, - ], - "streaming": true, - }, - "gpt2": { - "contextLength": 512, - "description": "", - "modelId": "gpt2", - "parameters": [ - { - "defaultValue": 10, - "description": "The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.", - "id": "temperature", - "max": 100, - "min": 0, - "name": "Temperature", - "step": 1, - "type": "number", - }, - { - "defaultValue": 10, - "description": "The amount of new tokens to be generated, this does not include the input length it is a estimate of the size of generated text you want. Each new tokens slows down the request, so look for balance between response times and length of text generated.", - "id": "max_new_tokens", - "max": 250, - "min": 0, - "name": "Max New Tokens", - "step": 1, - "type": "number", - }, - ], - "settings": [ - { - "description": "Access tokens programmatically authenticate your identity to the Hugging Face Hub.", - "id": "hf/access_token", - "name": "User Access Tokens", - }, - ], - "streaming": false, - }, - "gryphe/mythomax-l2-13b": { - "contextLength": 4096, - "description": "An improved, potentially even perfected variant of MythoMix.", - "modelId": "gryphe/mythomax-l2-13b", - "parameters": [ - { - "defaultValue": 0.4, - "description": "", - "id": "temperature", - "max": 2, - "min": 0, - "name": "Temperature", - "step": 0.1, - "type": "number", - }, - ], - "settings": [ - { - "description": "Your Open Router API key", - "id": "open_router/api_key", - "name": "Open Router API Key", - }, - ], - }, "meta-llama/llama-2-13b-chat": { "contextLength": 4096, "description": "Meta: Llama v2 13B Chat (beta)", @@ -593,7 +209,7 @@ exports[`documentCheck should extract variables correctly 1`] = ` }, "description": "The model to use", "id": "model", - "name": "Model", + "name": "AI Settings", "type": "model", }, { diff --git a/packages/@pufflig/ps-nodes/src/adapters/llm/__snapshots__/llm_completion.test.ts.snap b/packages/@pufflig/ps-nodes/src/adapters/llm/__snapshots__/llm_completion.test.ts.snap index b8fa3d5..3ebe775 100644 --- a/packages/@pufflig/ps-nodes/src/adapters/llm/__snapshots__/llm_completion.test.ts.snap +++ b/packages/@pufflig/ps-nodes/src/adapters/llm/__snapshots__/llm_completion.test.ts.snap @@ -182,10 +182,948 @@ exports[`getInputDefinition - if you pass a template and a variable, take value ], "streaming": true, }, + "gpt-3.5-turbo": { + "contextLength": 4096, + "description": "Most capable GPT-3.5 model and optimized for chat at 1/10th the cost of text-davinci-003.", + "modelId": "gpt-3.5-turbo", + "parameters": [ + { + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 4096, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", + }, + ], + "streaming": true, + }, + "gpt-3.5-turbo-16k": { + "contextLength": 16385, + "description": "Same capabilities as the standard gpt-3.5-turbo model but with 4 times the context.", + "modelId": "gpt-3.5-turbo-16k", + "parameters": [ + { + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 16385, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", + }, + ], + "streaming": true, + }, + "gpt-3.5-turbo-instruct": { + "contextLength": 4096, + "description": "Similar capabilities as text-davinci-003 but compatible with legacy Completions endpoint and not Chat Completions.", + "modelId": "gpt-3.5-turbo-instruct", + "parameters": [ + { + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 4096, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", + }, + ], + "streaming": true, + }, + "gpt-4": { + "contextLength": 4096, + "description": "More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat.", + "modelId": "gpt-4", + "parameters": [ + { + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 4096, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", + }, + ], + "streaming": true, + }, + "gpt-4-1106-preview": { + "contextLength": 4096, + "description": "More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat.", + "modelId": "gpt-4-1106-preview", + "parameters": [ + { + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 4096, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", + }, + ], + "streaming": true, + }, + "gpt-4-32k": { + "contextLength": 32768, + "description": "Same capabilities as the standard gpt-4 mode but with 4x the context length. ", + "modelId": "gpt-4-32k", + "parameters": [ + { + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 32768, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", + }, + ], + "streaming": true, + }, + "gpt2": { + "contextLength": 512, + "description": "", + "modelId": "gpt2", + "parameters": [ + { + "defaultValue": 10, + "description": "The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.", + "id": "temperature", + "max": 100, + "min": 0, + "name": "Temperature", + "step": 1, + "type": "number", + }, + { + "defaultValue": 10, + "description": "The amount of new tokens to be generated, this does not include the input length it is a estimate of the size of generated text you want. Each new tokens slows down the request, so look for balance between response times and length of text generated.", + "id": "max_new_tokens", + "max": 250, + "min": 0, + "name": "Max New Tokens", + "step": 1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Access tokens programmatically authenticate your identity to the Hugging Face Hub.", + "id": "hf/access_token", + "name": "User Access Tokens", + }, + ], + "streaming": false, + }, + "gryphe/mythomax-l2-13b": { + "contextLength": 4096, + "description": "An improved, potentially even perfected variant of MythoMix.", + "modelId": "gryphe/mythomax-l2-13b", + "parameters": [ + { + "defaultValue": 0.4, + "description": "", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your Open Router API key", + "id": "open_router/api_key", + "name": "Open Router API Key", + }, + ], + }, + "meta-llama/llama-2-13b-chat": { + "contextLength": 4096, + "description": "Meta: Llama v2 13B Chat (beta)", + "modelId": "meta-llama/llama-2-13b-chat", + "parameters": [ + { + "defaultValue": 0.4, + "description": "", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your Open Router API key", + "id": "open_router/api_key", + "name": "Open Router API Key", + }, + ], + }, + }, + "description": "The model to use", + "id": "model", + "name": "AI Settings", + "type": "model", + }, + { + "defaultValue": "summarize {{longText}}", + "description": "The prompt to send to the LLM", + "id": "prompt", + "name": "Prompt", + "type": "text", + }, + { + "defaultValue": "some long text", + "description": "", + "id": "longText", + "name": "longText", + "type": "text", + }, +] +`; + +exports[`getInputDefinition - ignores non existing variables 1`] = ` +[ + { + "defaultValue": { + "modelId": "test", + "parameters": {}, + }, + "definition": { + "anthropic/claude-2": { + "contextLength": 8192, + "description": "Claude: superior performance on tasks that require complex reasoning", + "modelId": "anthropic/claude-2", + "parameters": [ + { + "defaultValue": 1, + "description": "Amount of randomness injected into the response.", + "id": "temperature", + "max": 1, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0.7, + "description": "In nucleus sampling, we compute the cumulative distribution over all the options for each subsequent token in decreasing probability order and cut it off once it reaches a particular probability specified by top_p. You should either alter temperature or top_p, but not both.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 5, + "description": "Only sample from the top K options for each subsequent token.", + "id": "top_k", + "max": 100, + "min": 0, + "name": "Top K", + "step": 1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your Open Router API key", + "id": "open_router/api_key", + "name": "Open Router API Key", + }, + ], + }, + "babbage-002": { + "contextLength": 16384, + "description": "Replacement for the GPT-3 ada and babbage base models.", + "modelId": "babbage-002", + "parameters": [ + { + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 16384, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", + }, + ], + "streaming": true, + }, + "davinci-002": { + "contextLength": 16384, + "description": "Replacement for the GPT-3 curie and davinci base models.", + "modelId": "davinci-002", + "parameters": [ + { + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 16384, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", + }, + ], + "streaming": true, + }, + "gpt-3.5-turbo": { + "contextLength": 4096, + "description": "Most capable GPT-3.5 model and optimized for chat at 1/10th the cost of text-davinci-003.", + "modelId": "gpt-3.5-turbo", + "parameters": [ + { + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 4096, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", + }, + ], + "streaming": true, + }, + "gpt-3.5-turbo-16k": { + "contextLength": 16385, + "description": "Same capabilities as the standard gpt-3.5-turbo model but with 4 times the context.", + "modelId": "gpt-3.5-turbo-16k", + "parameters": [ + { + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 16385, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", + }, + ], + "streaming": true, + }, "gpt-3.5-turbo-instruct": { "contextLength": 4096, - "description": "Similar capabilities as text-davinci-003 but compatible with legacy Completions endpoint and not Chat Completions.", - "modelId": "gpt-3.5-turbo-instruct", + "description": "Similar capabilities as text-davinci-003 but compatible with legacy Completions endpoint and not Chat Completions.", + "modelId": "gpt-3.5-turbo-instruct", + "parameters": [ + { + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 4096, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", + }, + ], + "streaming": true, + }, + "gpt-4": { + "contextLength": 4096, + "description": "More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat.", + "modelId": "gpt-4", + "parameters": [ + { + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 4096, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", + }, + ], + "streaming": true, + }, + "gpt-4-1106-preview": { + "contextLength": 4096, + "description": "More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat.", + "modelId": "gpt-4-1106-preview", "parameters": [ { "defaultValue": 0.4, @@ -247,6 +1185,71 @@ exports[`getInputDefinition - if you pass a template and a variable, take value ], "streaming": true, }, + "gpt-4-32k": { + "contextLength": 32768, + "description": "Same capabilities as the standard gpt-4 mode but with 4x the context length. ", + "modelId": "gpt-4-32k", + "parameters": [ + { + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", + "id": "temperature", + "max": 2, + "min": 0, + "name": "Temperature", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 32768, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, + "min": 0, + "name": "Top P", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", + }, + { + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, + "type": "number", + }, + ], + "settings": [ + { + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", + }, + ], + "streaming": true, + }, "gpt2": { "contextLength": 512, "description": "", @@ -333,7 +1336,7 @@ exports[`getInputDefinition - if you pass a template and a variable, take value }, "description": "The model to use", "id": "model", - "name": "Model", + "name": "AI Settings", "type": "model", }, { @@ -344,7 +1347,7 @@ exports[`getInputDefinition - if you pass a template and a variable, take value "type": "text", }, { - "defaultValue": "some long text", + "defaultValue": "", "description": "", "id": "longText", "name": "longText", @@ -353,7 +1356,7 @@ exports[`getInputDefinition - if you pass a template and a variable, take value ] `; -exports[`getInputDefinition - ignores non existing variables 1`] = ` +exports[`getInputDefinition - no variables 1`] = ` [ { "defaultValue": { @@ -535,10 +1538,10 @@ exports[`getInputDefinition - ignores non existing variables 1`] = ` ], "streaming": true, }, - "gpt-3.5-turbo-instruct": { + "gpt-3.5-turbo": { "contextLength": 4096, - "description": "Similar capabilities as text-davinci-003 but compatible with legacy Completions endpoint and not Chat Completions.", - "modelId": "gpt-3.5-turbo-instruct", + "description": "Most capable GPT-3.5 model and optimized for chat at 1/10th the cost of text-davinci-003.", + "modelId": "gpt-3.5-turbo", "parameters": [ { "defaultValue": 0.4, @@ -600,73 +1603,79 @@ exports[`getInputDefinition - ignores non existing variables 1`] = ` ], "streaming": true, }, - "gpt2": { - "contextLength": 512, - "description": "", - "modelId": "gpt2", + "gpt-3.5-turbo-16k": { + "contextLength": 16385, + "description": "Same capabilities as the standard gpt-3.5-turbo model but with 4 times the context.", + "modelId": "gpt-3.5-turbo-16k", "parameters": [ { - "defaultValue": 10, - "description": "The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.", + "defaultValue": 0.4, + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", "id": "temperature", - "max": 100, + "max": 2, "min": 0, "name": "Temperature", - "step": 1, + "step": 0.1, "type": "number", }, { - "defaultValue": 10, - "description": "The amount of new tokens to be generated, this does not include the input length it is a estimate of the size of generated text you want. Each new tokens slows down the request, so look for balance between response times and length of text generated.", - "id": "max_new_tokens", - "max": 250, + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 16385, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", + }, + { + "defaultValue": 1, + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", + "max": 1, "min": 0, - "name": "Max New Tokens", - "step": 1, + "name": "Top P", + "step": 0.1, "type": "number", }, - ], - "settings": [ { - "description": "Access tokens programmatically authenticate your identity to the Hugging Face Hub.", - "id": "hf/access_token", - "name": "User Access Tokens", + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", + "step": 0.1, + "type": "number", }, - ], - "streaming": false, - }, - "gryphe/mythomax-l2-13b": { - "contextLength": 4096, - "description": "An improved, potentially even perfected variant of MythoMix.", - "modelId": "gryphe/mythomax-l2-13b", - "parameters": [ { - "defaultValue": 0.4, - "description": "", - "id": "temperature", + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", "max": 2, - "min": 0, - "name": "Temperature", + "min": -2, + "name": "Presence penalty", "step": 0.1, "type": "number", }, ], "settings": [ { - "description": "Your Open Router API key", - "id": "open_router/api_key", - "name": "Open Router API Key", + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", }, ], + "streaming": true, }, - "meta-llama/llama-2-13b-chat": { + "gpt-3.5-turbo-instruct": { "contextLength": 4096, - "description": "Meta: Llama v2 13B Chat (beta)", - "modelId": "meta-llama/llama-2-13b-chat", + "description": "Similar capabilities as text-davinci-003 but compatible with legacy Completions endpoint and not Chat Completions.", + "modelId": "gpt-3.5-turbo-instruct", "parameters": [ { "defaultValue": 0.4, - "description": "", + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", "id": "temperature", "max": 2, "min": 0, @@ -674,94 +1683,60 @@ exports[`getInputDefinition - ignores non existing variables 1`] = ` "step": 0.1, "type": "number", }, - ], - "settings": [ { - "description": "Your Open Router API key", - "id": "open_router/api_key", - "name": "Open Router API Key", + "defaultValue": 1024, + "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", + "id": "max_tokens", + "max": 4096, + "min": 1, + "name": "Max Tokens", + "step": 20, + "type": "number", }, - ], - }, - }, - "description": "The model to use", - "id": "model", - "name": "Model", - "type": "model", - }, - { - "defaultValue": "summarize {{longText}}", - "description": "The prompt to send to the LLM", - "id": "prompt", - "name": "Prompt", - "type": "text", - }, - { - "defaultValue": "", - "description": "", - "id": "longText", - "name": "longText", - "type": "text", - }, -] -`; - -exports[`getInputDefinition - no variables 1`] = ` -[ - { - "defaultValue": { - "modelId": "test", - "parameters": {}, - }, - "definition": { - "anthropic/claude-2": { - "contextLength": 8192, - "description": "Claude: superior performance on tasks that require complex reasoning", - "modelId": "anthropic/claude-2", - "parameters": [ { "defaultValue": 1, - "description": "Amount of randomness injected into the response.", - "id": "temperature", + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "id": "top_p", "max": 1, "min": 0, - "name": "Temperature", + "name": "Top P", "step": 0.1, "type": "number", }, { - "defaultValue": 0.7, - "description": "In nucleus sampling, we compute the cumulative distribution over all the options for each subsequent token in decreasing probability order and cut it off once it reaches a particular probability specified by top_p. You should either alter temperature or top_p, but not both.", - "id": "top_p", - "max": 1, - "min": 0, - "name": "Top P", + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "id": "frequency_penalty", + "max": 2, + "min": -2, + "name": "Frequency penalty", "step": 0.1, "type": "number", }, { - "defaultValue": 5, - "description": "Only sample from the top K options for each subsequent token.", - "id": "top_k", - "max": 100, - "min": 0, - "name": "Top K", - "step": 1, + "defaultValue": 0, + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "id": "presence_penalty", + "max": 2, + "min": -2, + "name": "Presence penalty", + "step": 0.1, "type": "number", }, ], "settings": [ { - "description": "Your Open Router API key", - "id": "open_router/api_key", - "name": "Open Router API Key", + "description": "Your OpenAI API key", + "id": "openai/api_key", + "name": "API Key", }, ], + "streaming": true, }, - "babbage-002": { - "contextLength": 16384, - "description": "Replacement for the GPT-3 ada and babbage base models.", - "modelId": "babbage-002", + "gpt-4": { + "contextLength": 4096, + "description": "More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat.", + "modelId": "gpt-4", "parameters": [ { "defaultValue": 0.4, @@ -777,7 +1752,7 @@ exports[`getInputDefinition - no variables 1`] = ` "defaultValue": 1024, "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", "id": "max_tokens", - "max": 16384, + "max": 4096, "min": 1, "name": "Max Tokens", "step": 20, @@ -823,10 +1798,10 @@ exports[`getInputDefinition - no variables 1`] = ` ], "streaming": true, }, - "davinci-002": { - "contextLength": 16384, - "description": "Replacement for the GPT-3 curie and davinci base models.", - "modelId": "davinci-002", + "gpt-4-1106-preview": { + "contextLength": 4096, + "description": "More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat.", + "modelId": "gpt-4-1106-preview", "parameters": [ { "defaultValue": 0.4, @@ -842,7 +1817,7 @@ exports[`getInputDefinition - no variables 1`] = ` "defaultValue": 1024, "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", "id": "max_tokens", - "max": 16384, + "max": 4096, "min": 1, "name": "Max Tokens", "step": 20, @@ -888,10 +1863,10 @@ exports[`getInputDefinition - no variables 1`] = ` ], "streaming": true, }, - "gpt-3.5-turbo-instruct": { - "contextLength": 4096, - "description": "Similar capabilities as text-davinci-003 but compatible with legacy Completions endpoint and not Chat Completions.", - "modelId": "gpt-3.5-turbo-instruct", + "gpt-4-32k": { + "contextLength": 32768, + "description": "Same capabilities as the standard gpt-4 mode but with 4x the context length. ", + "modelId": "gpt-4-32k", "parameters": [ { "defaultValue": 0.4, @@ -907,7 +1882,7 @@ exports[`getInputDefinition - no variables 1`] = ` "defaultValue": 1024, "description": "The maximum number of tokens to generate in the completion. The total length of input tokens and generated tokens is limited by the model's context length.", "id": "max_tokens", - "max": 4096, + "max": 32768, "min": 1, "name": "Max Tokens", "step": 20, @@ -1039,7 +2014,7 @@ exports[`getInputDefinition - no variables 1`] = ` }, "description": "The model to use", "id": "model", - "name": "Model", + "name": "AI Settings", "type": "model", }, { From 6d5dc11207772867eee8a7f0f4037e7f10384e4c Mon Sep 17 00:00:00 2001 From: Aurelien Franky Date: Thu, 23 Nov 2023 11:21:47 +0100 Subject: [PATCH 3/3] v0.23.0 --- lerna.json | 2 +- package-lock.json | 30 +++++++++---------- packages/@pufflig/ps-chains/package.json | 4 +-- packages/@pufflig/ps-models/package.json | 4 +-- .../@pufflig/ps-nodes-config/package.json | 6 ++-- packages/@pufflig/ps-nodes/package.json | 10 +++---- packages/@pufflig/ps-sdk/package.json | 4 +-- packages/@pufflig/ps-types/package.json | 2 +- 8 files changed, 31 insertions(+), 31 deletions(-) diff --git a/lerna.json b/lerna.json index d550843..dacf34f 100644 --- a/lerna.json +++ b/lerna.json @@ -1,4 +1,4 @@ { "$schema": "node_modules/lerna/schemas/lerna-schema.json", - "version": "0.22.1" + "version": "0.23.0" } diff --git a/package-lock.json b/package-lock.json index 1ede9b8..44bbf7b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -21240,14 +21240,14 @@ } }, "packages/@pufflig/ps-chains": { - "version": "0.22.0", + "version": "0.23.0", "license": "MIT", "dependencies": { "mustache": "^4.2.0", "pino": "^8.15.0" }, "devDependencies": { - "@pufflig/ps-types": "^0.22.0", + "@pufflig/ps-types": "^0.23.0", "@types/react-dom": "^18.2.7", "immer": "^10.0.2", "prop-types": "^15.8.1", @@ -21291,23 +21291,23 @@ } }, "packages/@pufflig/ps-models": { - "version": "0.22.0", + "version": "0.23.0", "license": "MIT", "devDependencies": { - "@pufflig/ps-types": "^0.22.0", + "@pufflig/ps-types": "^0.23.0", "typescript": "^5.2.2", "vite": "^4.3.9", "vite-plugin-dts": "^2.3.0" } }, "packages/@pufflig/ps-nodes": { - "version": "0.22.1", + "version": "0.23.0", "license": "MIT", "dependencies": { "@dqbd/tiktoken": "^1.0.7", - "@pufflig/ps-models": "^0.22.0", - "@pufflig/ps-nodes-config": "^0.22.0", - "@pufflig/ps-sdk": "^0.22.0", + "@pufflig/ps-models": "^0.23.0", + "@pufflig/ps-nodes-config": "^0.23.0", + "@pufflig/ps-sdk": "^0.23.0", "axios": "^1.6.2", "langchain": "^0.0.193", "lodash": "^4.17.21", @@ -21316,7 +21316,7 @@ "uuid": "^9.0.1" }, "devDependencies": { - "@pufflig/ps-types": "^0.22.0", + "@pufflig/ps-types": "^0.23.0", "@types/jest": "^29.5.8", "@types/lodash": "^4.14.196", "@types/mustache": "^4.2.2", @@ -21329,13 +21329,13 @@ } }, "packages/@pufflig/ps-nodes-config": { - "version": "0.22.0", + "version": "0.23.0", "license": "MIT", "dependencies": { - "@pufflig/ps-models": "^0.22.0" + "@pufflig/ps-models": "^0.23.0" }, "devDependencies": { - "@pufflig/ps-types": "^0.22.0", + "@pufflig/ps-types": "^0.23.0", "@types/jest": "^29.5.8", "jest": "^29.7.0", "ts-jest": "^29.1.0", @@ -21866,10 +21866,10 @@ } }, "packages/@pufflig/ps-sdk": { - "version": "0.22.0", + "version": "0.23.0", "license": "MIT", "dependencies": { - "@pufflig/ps-models": "^0.22.0", + "@pufflig/ps-models": "^0.23.0", "axios": "^1.5.0" }, "devDependencies": { @@ -21904,7 +21904,7 @@ } }, "packages/@pufflig/ps-types": { - "version": "0.22.0", + "version": "0.23.0", "license": "MIT" }, "websites/blog": { diff --git a/packages/@pufflig/ps-chains/package.json b/packages/@pufflig/ps-chains/package.json index 44072b0..f571c11 100644 --- a/packages/@pufflig/ps-chains/package.json +++ b/packages/@pufflig/ps-chains/package.json @@ -1,6 +1,6 @@ { "name": "@pufflig/ps-chains", - "version": "0.22.0", + "version": "0.23.0", "license": "MIT", "main": "./dist/ps-chains.umd.js", "module": "./dist/ps-chains.es.js", @@ -16,7 +16,7 @@ "test": "jest" }, "devDependencies": { - "@pufflig/ps-types": "^0.22.0", + "@pufflig/ps-types": "^0.23.0", "@types/react-dom": "^18.2.7", "immer": "^10.0.2", "prop-types": "^15.8.1", diff --git a/packages/@pufflig/ps-models/package.json b/packages/@pufflig/ps-models/package.json index 82df9ae..07626e3 100644 --- a/packages/@pufflig/ps-models/package.json +++ b/packages/@pufflig/ps-models/package.json @@ -1,7 +1,7 @@ { "name": "@pufflig/ps-models", "private": false, - "version": "0.22.0", + "version": "0.23.0", "description": "Configuration of models used in Prompt Studio", "files": [ "dist" @@ -16,7 +16,7 @@ "author": "Pufflig AB", "license": "MIT", "devDependencies": { - "@pufflig/ps-types": "^0.22.0", + "@pufflig/ps-types": "^0.23.0", "typescript": "^5.2.2", "vite": "^4.3.9", "vite-plugin-dts": "^2.3.0" diff --git a/packages/@pufflig/ps-nodes-config/package.json b/packages/@pufflig/ps-nodes-config/package.json index 75d7cba..5f45b72 100644 --- a/packages/@pufflig/ps-nodes-config/package.json +++ b/packages/@pufflig/ps-nodes-config/package.json @@ -1,7 +1,7 @@ { "name": "@pufflig/ps-nodes-config", "private": false, - "version": "0.22.0", + "version": "0.23.0", "description": "Configuration files for nodes used in prompt studio.", "files": [ "dist" @@ -16,10 +16,10 @@ "author": "Pufflig AB", "license": "MIT", "dependencies": { - "@pufflig/ps-models": "^0.22.0" + "@pufflig/ps-models": "^0.23.0" }, "devDependencies": { - "@pufflig/ps-types": "^0.22.0", + "@pufflig/ps-types": "^0.23.0", "@types/jest": "^29.5.8", "jest": "^29.7.0", "ts-jest": "^29.1.0", diff --git a/packages/@pufflig/ps-nodes/package.json b/packages/@pufflig/ps-nodes/package.json index 12d9293..5efc282 100644 --- a/packages/@pufflig/ps-nodes/package.json +++ b/packages/@pufflig/ps-nodes/package.json @@ -1,7 +1,7 @@ { "name": "@pufflig/ps-nodes", "private": false, - "version": "0.22.1", + "version": "0.23.0", "description": "Collection of nodes used in Prompt Studio", "files": [ "dist" @@ -17,7 +17,7 @@ "author": "Pufflig AB", "license": "MIT", "devDependencies": { - "@pufflig/ps-types": "^0.22.0", + "@pufflig/ps-types": "^0.23.0", "@types/jest": "^29.5.8", "@types/lodash": "^4.14.196", "@types/mustache": "^4.2.2", @@ -33,9 +33,9 @@ }, "dependencies": { "@dqbd/tiktoken": "^1.0.7", - "@pufflig/ps-models": "^0.22.0", - "@pufflig/ps-nodes-config": "^0.22.0", - "@pufflig/ps-sdk": "^0.22.0", + "@pufflig/ps-models": "^0.23.0", + "@pufflig/ps-nodes-config": "^0.23.0", + "@pufflig/ps-sdk": "^0.23.0", "axios": "^1.6.2", "langchain": "^0.0.193", "lodash": "^4.17.21", diff --git a/packages/@pufflig/ps-sdk/package.json b/packages/@pufflig/ps-sdk/package.json index 19c1e20..7585daf 100644 --- a/packages/@pufflig/ps-sdk/package.json +++ b/packages/@pufflig/ps-sdk/package.json @@ -1,6 +1,6 @@ { "name": "@pufflig/ps-sdk", - "version": "0.22.0", + "version": "0.23.0", "license": "MIT", "main": "./dist/ps-sdk.umd.js", "module": "./dist/ps-sdk.es.js", @@ -21,7 +21,7 @@ "vite-plugin-dts": "^3.5.3" }, "dependencies": { - "@pufflig/ps-models": "^0.22.0", + "@pufflig/ps-models": "^0.23.0", "axios": "^1.5.0" }, "gitHead": "534ff07d186327e38133c0c14a5badd04ced0a9c" diff --git a/packages/@pufflig/ps-types/package.json b/packages/@pufflig/ps-types/package.json index d406ee1..0daae99 100644 --- a/packages/@pufflig/ps-types/package.json +++ b/packages/@pufflig/ps-types/package.json @@ -1,6 +1,6 @@ { "name": "@pufflig/ps-types", - "version": "0.22.0", + "version": "0.23.0", "description": "Typescript definitions for Prompt Studio", "main": "src/index.ts", "author": "Pufflig AB",