From 0cd327f0aa4147d7fda7a5bcd106722135023135 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 22 May 2024 12:50:08 +0200 Subject: [PATCH 1/4] Generate sample text-generation code from chat template --- packages/tasks/package.json | 3 + packages/tasks/pnpm-lock.yaml | 5 ++ packages/tasks/src/snippets/inputs.ts | 7 +- .../src/snippets/tasks/text-generation.ts | 64 +++++++++++++++++++ packages/tasks/src/snippets/types.ts | 7 +- pnpm-workspace.yaml | 2 +- 6 files changed, 82 insertions(+), 6 deletions(-) create mode 100644 packages/tasks/src/snippets/tasks/text-generation.ts diff --git a/packages/tasks/package.json b/packages/tasks/package.json index 094302b97..822d30366 100644 --- a/packages/tasks/package.json +++ b/packages/tasks/package.json @@ -43,6 +43,9 @@ ], "author": "Hugging Face", "license": "MIT", + "dependencies": { + "@huggingface/jinja": "workspace:^" + }, "devDependencies": { "@types/node": "^20.11.5", "quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz", diff --git a/packages/tasks/pnpm-lock.yaml b/packages/tasks/pnpm-lock.yaml index 741ee9a42..20a158909 100644 --- a/packages/tasks/pnpm-lock.yaml +++ b/packages/tasks/pnpm-lock.yaml @@ -4,6 +4,11 @@ settings: autoInstallPeers: true excludeLinksFromLockfile: false +dependencies: + '@huggingface/jinja': + specifier: workspace:^ + version: link:../jinja + devDependencies: '@types/node': specifier: ^20.11.5 diff --git a/packages/tasks/src/snippets/inputs.ts b/packages/tasks/src/snippets/inputs.ts index 746e21f14..9bb485d99 100644 --- a/packages/tasks/src/snippets/inputs.ts +++ b/packages/tasks/src/snippets/inputs.ts @@ -1,5 +1,6 @@ import type { PipelineType } from "../pipelines"; -import type { ModelDataMinimal } from "./types"; +import type { InputGenerator, ModelDataMinimal } from "./types"; +import inputsTextGeneration from "./tasks/text-generation"; const inputsZeroShotClassification = () => `"Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"`; @@ -40,8 +41,6 @@ const inputsTextClassification = () => `"I like you. I love you"`; const inputsTokenClassification = () => `"My name is Sarah Jessica Parker but you can call me Jessica"`; -const inputsTextGeneration = () => `"Can you please let us know more details about your "`; - const inputsText2TextGeneration = () => `"The answer to the universe is"`; const inputsFillMask = (model: ModelDataMinimal) => `"The answer to the universe is ${model.mask_token}."`; @@ -84,7 +83,7 @@ const inputsTabularPrediction = () => const inputsZeroShotImageClassification = () => `"cats.jpg"`; const modelInputSnippets: { - [key in PipelineType]?: (model: ModelDataMinimal) => string; + [key in PipelineType]?: InputGenerator; } = { "audio-to-audio": inputsAudioToAudio, "audio-classification": inputsAudioClassification, diff --git a/packages/tasks/src/snippets/tasks/text-generation.ts b/packages/tasks/src/snippets/tasks/text-generation.ts new file mode 100644 index 000000000..1205f9fca --- /dev/null +++ b/packages/tasks/src/snippets/tasks/text-generation.ts @@ -0,0 +1,64 @@ +import type { ModelDataMinimal } from "../types"; +import type { TokenizerConfig } from "../../tokenizer-data"; + +import { Template } from "@huggingface/jinja"; +import { SPECIAL_TOKENS_ATTRIBUTES } from "../../tokenizer-data"; + +// Define default text generation input +const DEFAULT_TEXT_GENERATION_INPUT = `"Can you please let us know more details about your "`; + +// Define defaults for chat models +const DEFAULT_SYSTEM_MESSAGE = { role: "system", content: "You are a helpful assistant." }; +const DEFAULT_USER_MESSAGE = { role: "user", content: "Tell me a joke." }; +const DEFAULT_MESSAGES = [DEFAULT_SYSTEM_MESSAGE, DEFAULT_USER_MESSAGE]; + +type SpecialTokensMap = Partial>; +const getSpecialTokensMap = (tokenizerConfig: TokenizerConfig): SpecialTokensMap => { + const specialTokensMap: SpecialTokensMap = {}; + for (const token of SPECIAL_TOKENS_ATTRIBUTES) { + const item = tokenizerConfig[token]; + if (typeof item === "string") { + specialTokensMap[token] = item; + } else if (item?.content) { + specialTokensMap[token] = item.content; + } + } + return specialTokensMap; +}; + +export default (model: ModelDataMinimal): string => { + const tokenizerConfig = model.config?.tokenizer_config; + if (!tokenizerConfig) { + return DEFAULT_TEXT_GENERATION_INPUT; + } + + let chat_template = tokenizerConfig.chat_template; + if (Array.isArray(chat_template)) { + // Find the default template + chat_template = chat_template.find((template) => template?.name === "default")?.template; + + // TODO: If no default template is found, use the first one + // However, many of these (e.g., https://huggingface.co/CohereForAI/c4ai-command-r-v01/blob/main/tokenizer_config.json) + // have non-default templates that require additional information (e.g., tools or documents) + } + + if (!chat_template) { + // Default text generation input + return DEFAULT_TEXT_GENERATION_INPUT; + } + + try { + const template = new Template(chat_template); + const rendered = template.render({ + messages: DEFAULT_MESSAGES, + // TODO: add default tools or documents + + // Add special tokens + ...getSpecialTokensMap(tokenizerConfig), + }); + return rendered; + } catch (e) { + // Some error occurred, so we just return default + return DEFAULT_TEXT_GENERATION_INPUT; + } +}; diff --git a/packages/tasks/src/snippets/types.ts b/packages/tasks/src/snippets/types.ts index 51966858c..b1eeb4a9c 100644 --- a/packages/tasks/src/snippets/types.ts +++ b/packages/tasks/src/snippets/types.ts @@ -5,4 +5,9 @@ import type { ModelData } from "../model-data"; * * Add more fields as needed. */ -export type ModelDataMinimal = Pick; +export type ModelDataMinimal = Pick; + +/** + * Input generator function. + */ +export type InputGenerator = (model: ModelDataMinimal) => string; diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 45131d274..11f6fa6ac 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -1,10 +1,10 @@ packages: - "packages/hub" - "packages/inference" + - "packages/jinja" - "packages/doc-internal" - "packages/agents" - "packages/languages" - "packages/tasks" - "packages/gguf" - - "packages/jinja" - "packages/widgets" From d8373b4107055fd88e0e6bcde05607e7e4788025 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 22 May 2024 13:16:56 +0200 Subject: [PATCH 2/4] Move input generators to separate folders Align with new text-generation generator and make codebase more future-proof (when we improve inputs across other tasks) --- packages/tasks/src/snippets/inputs.ts | 105 +++++------------- .../snippets/tasks/audio-classification.ts | 1 + .../src/snippets/tasks/audio-to-audio.ts | 1 + .../tasks/automatic-speech-recognition.ts | 1 + .../src/snippets/tasks/feature-extraction.ts | 1 + .../tasks/src/snippets/tasks/fill-mask.ts | 3 + .../snippets/tasks/image-classification.ts | 1 + .../src/snippets/tasks/image-segmentation.ts | 1 + .../tasks/src/snippets/tasks/image-to-text.ts | 1 + .../src/snippets/tasks/object-detection.ts | 1 + .../src/snippets/tasks/question-answering.ts | 5 + .../src/snippets/tasks/sentence-similarity.ts | 9 ++ .../tasks/src/snippets/tasks/summarization.ts | 2 + .../tasks/table-question-answering.ts | 14 +++ .../src/snippets/tasks/tabular-prediction.ts | 2 + .../src/snippets/tasks/text-classification.ts | 1 + .../tasks/src/snippets/tasks/text-to-audio.ts | 1 + .../tasks/src/snippets/tasks/text-to-image.ts | 1 + .../src/snippets/tasks/text-to-speech.ts | 1 + .../snippets/tasks/text2text-generation.ts | 1 + .../snippets/tasks/token-classification.ts | 1 + .../tasks/src/snippets/tasks/translation.ts | 1 + .../tasks/visual-question-answering.ts | 5 + .../tasks/zero-shot-classification.ts | 2 + .../tasks/zero-shot-image-classification.ts | 1 + 25 files changed, 83 insertions(+), 80 deletions(-) create mode 100644 packages/tasks/src/snippets/tasks/audio-classification.ts create mode 100644 packages/tasks/src/snippets/tasks/audio-to-audio.ts create mode 100644 packages/tasks/src/snippets/tasks/automatic-speech-recognition.ts create mode 100644 packages/tasks/src/snippets/tasks/feature-extraction.ts create mode 100644 packages/tasks/src/snippets/tasks/fill-mask.ts create mode 100644 packages/tasks/src/snippets/tasks/image-classification.ts create mode 100644 packages/tasks/src/snippets/tasks/image-segmentation.ts create mode 100644 packages/tasks/src/snippets/tasks/image-to-text.ts create mode 100644 packages/tasks/src/snippets/tasks/object-detection.ts create mode 100644 packages/tasks/src/snippets/tasks/question-answering.ts create mode 100644 packages/tasks/src/snippets/tasks/sentence-similarity.ts create mode 100644 packages/tasks/src/snippets/tasks/summarization.ts create mode 100644 packages/tasks/src/snippets/tasks/table-question-answering.ts create mode 100644 packages/tasks/src/snippets/tasks/tabular-prediction.ts create mode 100644 packages/tasks/src/snippets/tasks/text-classification.ts create mode 100644 packages/tasks/src/snippets/tasks/text-to-audio.ts create mode 100644 packages/tasks/src/snippets/tasks/text-to-image.ts create mode 100644 packages/tasks/src/snippets/tasks/text-to-speech.ts create mode 100644 packages/tasks/src/snippets/tasks/text2text-generation.ts create mode 100644 packages/tasks/src/snippets/tasks/token-classification.ts create mode 100644 packages/tasks/src/snippets/tasks/translation.ts create mode 100644 packages/tasks/src/snippets/tasks/visual-question-answering.ts create mode 100644 packages/tasks/src/snippets/tasks/zero-shot-classification.ts create mode 100644 packages/tasks/src/snippets/tasks/zero-shot-image-classification.ts diff --git a/packages/tasks/src/snippets/inputs.ts b/packages/tasks/src/snippets/inputs.ts index 9bb485d99..775b53226 100644 --- a/packages/tasks/src/snippets/inputs.ts +++ b/packages/tasks/src/snippets/inputs.ts @@ -1,86 +1,31 @@ import type { PipelineType } from "../pipelines"; import type { InputGenerator, ModelDataMinimal } from "./types"; -import inputsTextGeneration from "./tasks/text-generation"; - -const inputsZeroShotClassification = () => - `"Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"`; - -const inputsTranslation = () => `"Меня зовут Вольфганг и я живу в Берлине"`; - -const inputsSummarization = () => - `"The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."`; - -const inputsTableQuestionAnswering = () => - `{ - "query": "How many stars does the transformers repository have?", - "table": { - "Repository": ["Transformers", "Datasets", "Tokenizers"], - "Stars": ["36542", "4512", "3934"], - "Contributors": ["651", "77", "34"], - "Programming language": [ - "Python", - "Python", - "Rust, Python and NodeJS" - ] - } - }`; - -const inputsVisualQuestionAnswering = () => - `{ - "image": "cat.png", - "question": "What is in this image?" - }`; - -const inputsQuestionAnswering = () => - `{ - "question": "What is my name?", - "context": "My name is Clara and I live in Berkeley." - }`; - -const inputsTextClassification = () => `"I like you. I love you"`; - -const inputsTokenClassification = () => `"My name is Sarah Jessica Parker but you can call me Jessica"`; - -const inputsText2TextGeneration = () => `"The answer to the universe is"`; - -const inputsFillMask = (model: ModelDataMinimal) => `"The answer to the universe is ${model.mask_token}."`; - -const inputsSentenceSimilarity = () => - `{ - "source_sentence": "That is a happy person", - "sentences": [ - "That is a happy dog", - "That is a very happy person", - "Today is a sunny day" - ] - }`; -const inputsFeatureExtraction = () => `"Today is a sunny day and I will get some ice cream."`; - -const inputsImageClassification = () => `"cats.jpg"`; - -const inputsImageToText = () => `"cats.jpg"`; - -const inputsImageSegmentation = () => `"cats.jpg"`; - -const inputsObjectDetection = () => `"cats.jpg"`; - -const inputsAudioToAudio = () => `"sample1.flac"`; - -const inputsAudioClassification = () => `"sample1.flac"`; - -const inputsTextToImage = () => `"Astronaut riding a horse"`; - -const inputsTextToSpeech = () => `"The answer to the universe is 42"`; - -const inputsTextToAudio = () => `"liquid drum and bass, atmospheric synths, airy sounds"`; - -const inputsAutomaticSpeechRecognition = () => `"sample1.flac"`; - -const inputsTabularPrediction = () => - `'{"Height":[11.52,12.48],"Length1":[23.2,24.0],"Length2":[25.4,26.3],"Species": ["Bream","Bream"]}'`; - -const inputsZeroShotImageClassification = () => `"cats.jpg"`; +import inputsTextGeneration from "./tasks/text-generation"; +import inputsZeroShotClassification from "./tasks/zero-shot-classification"; +import inputsTranslation from "./tasks/translation"; +import inputsSummarization from "./tasks/summarization"; +import inputsTableQuestionAnswering from "./tasks/table-question-answering"; +import inputsVisualQuestionAnswering from "./tasks/visual-question-answering"; +import inputsQuestionAnswering from "./tasks/question-answering"; +import inputsTextClassification from "./tasks/text-classification"; +import inputsTokenClassification from "./tasks/token-classification"; +import inputsText2TextGeneration from "./tasks/text2text-generation"; +import inputsFillMask from "./tasks/fill-mask"; +import inputsSentenceSimilarity from "./tasks/sentence-similarity"; +import inputsFeatureExtraction from "./tasks/feature-extraction"; +import inputsImageClassification from "./tasks/image-classification"; +import inputsImageToText from "./tasks/image-to-text"; +import inputsImageSegmentation from "./tasks/image-segmentation"; +import inputsObjectDetection from "./tasks/object-detection"; +import inputsAudioToAudio from "./tasks/audio-to-audio"; +import inputsAudioClassification from "./tasks/audio-classification"; +import inputsTextToImage from "./tasks/text-to-image"; +import inputsTextToSpeech from "./tasks/text-to-speech"; +import inputsTextToAudio from "./tasks/text-to-audio"; +import inputsAutomaticSpeechRecognition from "./tasks/automatic-speech-recognition"; +import inputsTabularPrediction from "./tasks/tabular-prediction"; +import inputsZeroShotImageClassification from "./tasks/zero-shot-image-classification"; const modelInputSnippets: { [key in PipelineType]?: InputGenerator; diff --git a/packages/tasks/src/snippets/tasks/audio-classification.ts b/packages/tasks/src/snippets/tasks/audio-classification.ts new file mode 100644 index 000000000..20b9e5a8f --- /dev/null +++ b/packages/tasks/src/snippets/tasks/audio-classification.ts @@ -0,0 +1 @@ +export default (): string => `"sample1.flac"`; diff --git a/packages/tasks/src/snippets/tasks/audio-to-audio.ts b/packages/tasks/src/snippets/tasks/audio-to-audio.ts new file mode 100644 index 000000000..20b9e5a8f --- /dev/null +++ b/packages/tasks/src/snippets/tasks/audio-to-audio.ts @@ -0,0 +1 @@ +export default (): string => `"sample1.flac"`; diff --git a/packages/tasks/src/snippets/tasks/automatic-speech-recognition.ts b/packages/tasks/src/snippets/tasks/automatic-speech-recognition.ts new file mode 100644 index 000000000..20b9e5a8f --- /dev/null +++ b/packages/tasks/src/snippets/tasks/automatic-speech-recognition.ts @@ -0,0 +1 @@ +export default (): string => `"sample1.flac"`; diff --git a/packages/tasks/src/snippets/tasks/feature-extraction.ts b/packages/tasks/src/snippets/tasks/feature-extraction.ts new file mode 100644 index 000000000..6c42855e1 --- /dev/null +++ b/packages/tasks/src/snippets/tasks/feature-extraction.ts @@ -0,0 +1 @@ +export default (): string => `"Today is a sunny day and I will get some ice cream."`; diff --git a/packages/tasks/src/snippets/tasks/fill-mask.ts b/packages/tasks/src/snippets/tasks/fill-mask.ts new file mode 100644 index 000000000..47d0aa89d --- /dev/null +++ b/packages/tasks/src/snippets/tasks/fill-mask.ts @@ -0,0 +1,3 @@ +import type { ModelDataMinimal } from "../types"; + +export default (model: ModelDataMinimal): string => `"The answer to the universe is ${model.mask_token}."`; diff --git a/packages/tasks/src/snippets/tasks/image-classification.ts b/packages/tasks/src/snippets/tasks/image-classification.ts new file mode 100644 index 000000000..773344776 --- /dev/null +++ b/packages/tasks/src/snippets/tasks/image-classification.ts @@ -0,0 +1 @@ +export default (): string => `"cats.jpg"`; diff --git a/packages/tasks/src/snippets/tasks/image-segmentation.ts b/packages/tasks/src/snippets/tasks/image-segmentation.ts new file mode 100644 index 000000000..773344776 --- /dev/null +++ b/packages/tasks/src/snippets/tasks/image-segmentation.ts @@ -0,0 +1 @@ +export default (): string => `"cats.jpg"`; diff --git a/packages/tasks/src/snippets/tasks/image-to-text.ts b/packages/tasks/src/snippets/tasks/image-to-text.ts new file mode 100644 index 000000000..773344776 --- /dev/null +++ b/packages/tasks/src/snippets/tasks/image-to-text.ts @@ -0,0 +1 @@ +export default (): string => `"cats.jpg"`; diff --git a/packages/tasks/src/snippets/tasks/object-detection.ts b/packages/tasks/src/snippets/tasks/object-detection.ts new file mode 100644 index 000000000..773344776 --- /dev/null +++ b/packages/tasks/src/snippets/tasks/object-detection.ts @@ -0,0 +1 @@ +export default (): string => `"cats.jpg"`; diff --git a/packages/tasks/src/snippets/tasks/question-answering.ts b/packages/tasks/src/snippets/tasks/question-answering.ts new file mode 100644 index 000000000..668a7f39b --- /dev/null +++ b/packages/tasks/src/snippets/tasks/question-answering.ts @@ -0,0 +1,5 @@ +export default (): string => + `{ + "question": "What is my name?", + "context": "My name is Clara and I live in Berkeley." + }`; diff --git a/packages/tasks/src/snippets/tasks/sentence-similarity.ts b/packages/tasks/src/snippets/tasks/sentence-similarity.ts new file mode 100644 index 000000000..329ca5f1f --- /dev/null +++ b/packages/tasks/src/snippets/tasks/sentence-similarity.ts @@ -0,0 +1,9 @@ +export default (): string => + `{ + "source_sentence": "That is a happy person", + "sentences": [ + "That is a happy dog", + "That is a very happy person", + "Today is a sunny day" + ] + }`; diff --git a/packages/tasks/src/snippets/tasks/summarization.ts b/packages/tasks/src/snippets/tasks/summarization.ts new file mode 100644 index 000000000..e842eff6d --- /dev/null +++ b/packages/tasks/src/snippets/tasks/summarization.ts @@ -0,0 +1,2 @@ +export default (): string => + `"The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."`; diff --git a/packages/tasks/src/snippets/tasks/table-question-answering.ts b/packages/tasks/src/snippets/tasks/table-question-answering.ts new file mode 100644 index 000000000..4c7e65479 --- /dev/null +++ b/packages/tasks/src/snippets/tasks/table-question-answering.ts @@ -0,0 +1,14 @@ +export default (): string => + `{ + "query": "How many stars does the transformers repository have?", + "table": { + "Repository": ["Transformers", "Datasets", "Tokenizers"], + "Stars": ["36542", "4512", "3934"], + "Contributors": ["651", "77", "34"], + "Programming language": [ + "Python", + "Python", + "Rust, Python and NodeJS" + ] + } + }`; diff --git a/packages/tasks/src/snippets/tasks/tabular-prediction.ts b/packages/tasks/src/snippets/tasks/tabular-prediction.ts new file mode 100644 index 000000000..21ff8860e --- /dev/null +++ b/packages/tasks/src/snippets/tasks/tabular-prediction.ts @@ -0,0 +1,2 @@ +export default (): string => + `'{"Height":[11.52,12.48],"Length1":[23.2,24.0],"Length2":[25.4,26.3],"Species": ["Bream","Bream"]}'`; diff --git a/packages/tasks/src/snippets/tasks/text-classification.ts b/packages/tasks/src/snippets/tasks/text-classification.ts new file mode 100644 index 000000000..018ad9747 --- /dev/null +++ b/packages/tasks/src/snippets/tasks/text-classification.ts @@ -0,0 +1 @@ +export default (): string => `"I like you. I love you"`; diff --git a/packages/tasks/src/snippets/tasks/text-to-audio.ts b/packages/tasks/src/snippets/tasks/text-to-audio.ts new file mode 100644 index 000000000..8c24933ce --- /dev/null +++ b/packages/tasks/src/snippets/tasks/text-to-audio.ts @@ -0,0 +1 @@ +export default (): string => `"liquid drum and bass, atmospheric synths, airy sounds"`; diff --git a/packages/tasks/src/snippets/tasks/text-to-image.ts b/packages/tasks/src/snippets/tasks/text-to-image.ts new file mode 100644 index 000000000..3f478848f --- /dev/null +++ b/packages/tasks/src/snippets/tasks/text-to-image.ts @@ -0,0 +1 @@ +export default (): string => `"Astronaut riding a horse"`; diff --git a/packages/tasks/src/snippets/tasks/text-to-speech.ts b/packages/tasks/src/snippets/tasks/text-to-speech.ts new file mode 100644 index 000000000..515001303 --- /dev/null +++ b/packages/tasks/src/snippets/tasks/text-to-speech.ts @@ -0,0 +1 @@ +export default (): string => `"The answer to the universe is 42"`; diff --git a/packages/tasks/src/snippets/tasks/text2text-generation.ts b/packages/tasks/src/snippets/tasks/text2text-generation.ts new file mode 100644 index 000000000..48b8d18bd --- /dev/null +++ b/packages/tasks/src/snippets/tasks/text2text-generation.ts @@ -0,0 +1 @@ +export default (): string => `"The answer to the universe is"`; diff --git a/packages/tasks/src/snippets/tasks/token-classification.ts b/packages/tasks/src/snippets/tasks/token-classification.ts new file mode 100644 index 000000000..d039fa0c4 --- /dev/null +++ b/packages/tasks/src/snippets/tasks/token-classification.ts @@ -0,0 +1 @@ +export default (): string => `"My name is Sarah Jessica Parker but you can call me Jessica"`; diff --git a/packages/tasks/src/snippets/tasks/translation.ts b/packages/tasks/src/snippets/tasks/translation.ts new file mode 100644 index 000000000..0073a861d --- /dev/null +++ b/packages/tasks/src/snippets/tasks/translation.ts @@ -0,0 +1 @@ +export default (): string => `"Меня зовут Вольфганг и я живу в Берлине"`; diff --git a/packages/tasks/src/snippets/tasks/visual-question-answering.ts b/packages/tasks/src/snippets/tasks/visual-question-answering.ts new file mode 100644 index 000000000..9b7a60b91 --- /dev/null +++ b/packages/tasks/src/snippets/tasks/visual-question-answering.ts @@ -0,0 +1,5 @@ +export default (): string => + `{ + "image": "cat.png", + "question": "What is in this image?" + }`; diff --git a/packages/tasks/src/snippets/tasks/zero-shot-classification.ts b/packages/tasks/src/snippets/tasks/zero-shot-classification.ts new file mode 100644 index 000000000..325e19fb5 --- /dev/null +++ b/packages/tasks/src/snippets/tasks/zero-shot-classification.ts @@ -0,0 +1,2 @@ +export default (): string => + `"Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"`; diff --git a/packages/tasks/src/snippets/tasks/zero-shot-image-classification.ts b/packages/tasks/src/snippets/tasks/zero-shot-image-classification.ts new file mode 100644 index 000000000..773344776 --- /dev/null +++ b/packages/tasks/src/snippets/tasks/zero-shot-image-classification.ts @@ -0,0 +1 @@ +export default (): string => `"cats.jpg"`; From 22622079a848be21a5e6d7e2de42e1f844f72914 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 13 Jun 2024 19:31:09 +0200 Subject: [PATCH 3/4] revert + use messages API --- packages/tasks/package.json | 3 - packages/tasks/pnpm-lock.yaml | 5 - packages/tasks/src/snippets/inputs.ts | 112 +++++++++++++----- packages/tasks/src/snippets/js.ts | 58 ++++++++- packages/tasks/src/snippets/python.ts | 40 ++++++- .../snippets/tasks/audio-classification.ts | 1 - .../src/snippets/tasks/audio-to-audio.ts | 1 - .../tasks/automatic-speech-recognition.ts | 1 - .../src/snippets/tasks/feature-extraction.ts | 1 - .../tasks/src/snippets/tasks/fill-mask.ts | 3 - .../snippets/tasks/image-classification.ts | 1 - .../src/snippets/tasks/image-segmentation.ts | 1 - .../tasks/src/snippets/tasks/image-to-text.ts | 1 - .../src/snippets/tasks/object-detection.ts | 1 - .../src/snippets/tasks/question-answering.ts | 5 - .../src/snippets/tasks/sentence-similarity.ts | 9 -- .../tasks/src/snippets/tasks/summarization.ts | 2 - .../tasks/table-question-answering.ts | 14 --- .../src/snippets/tasks/tabular-prediction.ts | 2 - .../src/snippets/tasks/text-classification.ts | 1 - .../src/snippets/tasks/text-generation.ts | 64 ---------- .../tasks/src/snippets/tasks/text-to-audio.ts | 1 - .../tasks/src/snippets/tasks/text-to-image.ts | 1 - .../src/snippets/tasks/text-to-speech.ts | 1 - .../snippets/tasks/text2text-generation.ts | 1 - .../snippets/tasks/token-classification.ts | 1 - .../tasks/src/snippets/tasks/translation.ts | 1 - .../tasks/visual-question-answering.ts | 5 - .../tasks/zero-shot-classification.ts | 2 - .../tasks/zero-shot-image-classification.ts | 1 - packages/tasks/src/snippets/types.ts | 5 - pnpm-workspace.yaml | 2 +- 32 files changed, 174 insertions(+), 173 deletions(-) delete mode 100644 packages/tasks/src/snippets/tasks/audio-classification.ts delete mode 100644 packages/tasks/src/snippets/tasks/audio-to-audio.ts delete mode 100644 packages/tasks/src/snippets/tasks/automatic-speech-recognition.ts delete mode 100644 packages/tasks/src/snippets/tasks/feature-extraction.ts delete mode 100644 packages/tasks/src/snippets/tasks/fill-mask.ts delete mode 100644 packages/tasks/src/snippets/tasks/image-classification.ts delete mode 100644 packages/tasks/src/snippets/tasks/image-segmentation.ts delete mode 100644 packages/tasks/src/snippets/tasks/image-to-text.ts delete mode 100644 packages/tasks/src/snippets/tasks/object-detection.ts delete mode 100644 packages/tasks/src/snippets/tasks/question-answering.ts delete mode 100644 packages/tasks/src/snippets/tasks/sentence-similarity.ts delete mode 100644 packages/tasks/src/snippets/tasks/summarization.ts delete mode 100644 packages/tasks/src/snippets/tasks/table-question-answering.ts delete mode 100644 packages/tasks/src/snippets/tasks/tabular-prediction.ts delete mode 100644 packages/tasks/src/snippets/tasks/text-classification.ts delete mode 100644 packages/tasks/src/snippets/tasks/text-generation.ts delete mode 100644 packages/tasks/src/snippets/tasks/text-to-audio.ts delete mode 100644 packages/tasks/src/snippets/tasks/text-to-image.ts delete mode 100644 packages/tasks/src/snippets/tasks/text-to-speech.ts delete mode 100644 packages/tasks/src/snippets/tasks/text2text-generation.ts delete mode 100644 packages/tasks/src/snippets/tasks/token-classification.ts delete mode 100644 packages/tasks/src/snippets/tasks/translation.ts delete mode 100644 packages/tasks/src/snippets/tasks/visual-question-answering.ts delete mode 100644 packages/tasks/src/snippets/tasks/zero-shot-classification.ts delete mode 100644 packages/tasks/src/snippets/tasks/zero-shot-image-classification.ts diff --git a/packages/tasks/package.json b/packages/tasks/package.json index 822d30366..094302b97 100644 --- a/packages/tasks/package.json +++ b/packages/tasks/package.json @@ -43,9 +43,6 @@ ], "author": "Hugging Face", "license": "MIT", - "dependencies": { - "@huggingface/jinja": "workspace:^" - }, "devDependencies": { "@types/node": "^20.11.5", "quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz", diff --git a/packages/tasks/pnpm-lock.yaml b/packages/tasks/pnpm-lock.yaml index 20a158909..741ee9a42 100644 --- a/packages/tasks/pnpm-lock.yaml +++ b/packages/tasks/pnpm-lock.yaml @@ -4,11 +4,6 @@ settings: autoInstallPeers: true excludeLinksFromLockfile: false -dependencies: - '@huggingface/jinja': - specifier: workspace:^ - version: link:../jinja - devDependencies: '@types/node': specifier: ^20.11.5 diff --git a/packages/tasks/src/snippets/inputs.ts b/packages/tasks/src/snippets/inputs.ts index 775b53226..f3c76d12c 100644 --- a/packages/tasks/src/snippets/inputs.ts +++ b/packages/tasks/src/snippets/inputs.ts @@ -1,34 +1,90 @@ import type { PipelineType } from "../pipelines"; -import type { InputGenerator, ModelDataMinimal } from "./types"; - -import inputsTextGeneration from "./tasks/text-generation"; -import inputsZeroShotClassification from "./tasks/zero-shot-classification"; -import inputsTranslation from "./tasks/translation"; -import inputsSummarization from "./tasks/summarization"; -import inputsTableQuestionAnswering from "./tasks/table-question-answering"; -import inputsVisualQuestionAnswering from "./tasks/visual-question-answering"; -import inputsQuestionAnswering from "./tasks/question-answering"; -import inputsTextClassification from "./tasks/text-classification"; -import inputsTokenClassification from "./tasks/token-classification"; -import inputsText2TextGeneration from "./tasks/text2text-generation"; -import inputsFillMask from "./tasks/fill-mask"; -import inputsSentenceSimilarity from "./tasks/sentence-similarity"; -import inputsFeatureExtraction from "./tasks/feature-extraction"; -import inputsImageClassification from "./tasks/image-classification"; -import inputsImageToText from "./tasks/image-to-text"; -import inputsImageSegmentation from "./tasks/image-segmentation"; -import inputsObjectDetection from "./tasks/object-detection"; -import inputsAudioToAudio from "./tasks/audio-to-audio"; -import inputsAudioClassification from "./tasks/audio-classification"; -import inputsTextToImage from "./tasks/text-to-image"; -import inputsTextToSpeech from "./tasks/text-to-speech"; -import inputsTextToAudio from "./tasks/text-to-audio"; -import inputsAutomaticSpeechRecognition from "./tasks/automatic-speech-recognition"; -import inputsTabularPrediction from "./tasks/tabular-prediction"; -import inputsZeroShotImageClassification from "./tasks/zero-shot-image-classification"; +import type { ModelDataMinimal } from "./types"; + +const inputsZeroShotClassification = () => + `"Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"`; + +const inputsTranslation = () => `"Меня зовут Вольфганг и я живу в Берлине"`; + +const inputsSummarization = () => + `"The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."`; + +const inputsTableQuestionAnswering = () => + `{ + "query": "How many stars does the transformers repository have?", + "table": { + "Repository": ["Transformers", "Datasets", "Tokenizers"], + "Stars": ["36542", "4512", "3934"], + "Contributors": ["651", "77", "34"], + "Programming language": [ + "Python", + "Python", + "Rust, Python and NodeJS" + ] + } +}`; + +const inputsVisualQuestionAnswering = () => + `{ + "image": "cat.png", + "question": "What is in this image?" +}`; + +const inputsQuestionAnswering = () => + `{ + "question": "What is my name?", + "context": "My name is Clara and I live in Berkeley." +}`; + +const inputsTextClassification = () => `"I like you. I love you"`; + +const inputsTokenClassification = () => `"My name is Sarah Jessica Parker but you can call me Jessica"`; + +const inputsTextGeneration = () => `"Can you please let us know more details about your "`; + +const inputsText2TextGeneration = () => `"The answer to the universe is"`; + +const inputsFillMask = (model: ModelDataMinimal) => `"The answer to the universe is ${model.mask_token}."`; + +const inputsSentenceSimilarity = () => + `{ + "source_sentence": "That is a happy person", + "sentences": [ + "That is a happy dog", + "That is a very happy person", + "Today is a sunny day" + ] +}`; + +const inputsFeatureExtraction = () => `"Today is a sunny day and I will get some ice cream."`; + +const inputsImageClassification = () => `"cats.jpg"`; + +const inputsImageToText = () => `"cats.jpg"`; + +const inputsImageSegmentation = () => `"cats.jpg"`; + +const inputsObjectDetection = () => `"cats.jpg"`; + +const inputsAudioToAudio = () => `"sample1.flac"`; + +const inputsAudioClassification = () => `"sample1.flac"`; + +const inputsTextToImage = () => `"Astronaut riding a horse"`; + +const inputsTextToSpeech = () => `"The answer to the universe is 42"`; + +const inputsTextToAudio = () => `"liquid drum and bass, atmospheric synths, airy sounds"`; + +const inputsAutomaticSpeechRecognition = () => `"sample1.flac"`; + +const inputsTabularPrediction = () => + `'{"Height":[11.52,12.48],"Length1":[23.2,24.0],"Length2":[25.4,26.3],"Species": ["Bream","Bream"]}'`; + +const inputsZeroShotImageClassification = () => `"cats.jpg"`; const modelInputSnippets: { - [key in PipelineType]?: InputGenerator; + [key in PipelineType]?: (model: ModelDataMinimal) => string; } = { "audio-to-audio": inputsAudioToAudio, "audio-classification": inputsAudioClassification, diff --git a/packages/tasks/src/snippets/js.ts b/packages/tasks/src/snippets/js.ts index 4a6d8c331..5e5c32189 100644 --- a/packages/tasks/src/snippets/js.ts +++ b/packages/tasks/src/snippets/js.ts @@ -7,7 +7,10 @@ export const snippetBasic = (model: ModelDataMinimal, accessToken: string): stri const response = await fetch( "https://api-inference.huggingface.co/models/${model.id}", { - headers: { Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" }, + headers: { + Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" + "Content-Type": "application/json", + }, method: "POST", body: JSON.stringify(data), } @@ -20,12 +23,48 @@ query({"inputs": ${getModelInputSnippet(model)}}).then((response) => { console.log(JSON.stringify(response)); });`; +export const snippetTextGeneration = (model: ModelDataMinimal, accessToken: string): string => { + if (model.config?.tokenizer_config?.chat_template) { + // Conversational model detected, so we display a code snippet that features the OpenAI Messages API + // Code adapted from https://huggingface.co/blog/tgi-messages-api + return `// npm install openai +import OpenAI from "openai"; + +const openai = new OpenAI({ + baseURL: "https://api-inference.huggingface.co/models/${model.id}/v1/", + apiKey: "${accessToken || `{API_TOKEN}`}", +}); + +async function main() { + const stream = await openai.chat.completions.create({ + model: "tgi", + messages: [ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: "Tell me a funny joke." }, + ], + stream: true, + max_tokens: 500, + }); + for await (const chunk of stream) { + process.stdout.write(chunk.choices[0]?.delta?.content || ""); + } +} + +main(); +`; + } else { + return snippetBasic(model, accessToken);; + } +} export const snippetZeroShotClassification = (model: ModelDataMinimal, accessToken: string): string => `async function query(data) { const response = await fetch( "https://api-inference.huggingface.co/models/${model.id}", { - headers: { Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" }, + headers: { + Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" + "Content-Type": "application/json", + }, method: "POST", body: JSON.stringify(data), } @@ -45,7 +84,10 @@ export const snippetTextToImage = (model: ModelDataMinimal, accessToken: string) const response = await fetch( "https://api-inference.huggingface.co/models/${model.id}", { - headers: { Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" }, + headers: { + Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" + "Content-Type": "application/json", + }, method: "POST", body: JSON.stringify(data), } @@ -62,7 +104,10 @@ export const snippetTextToAudio = (model: ModelDataMinimal, accessToken: string) const response = await fetch( "https://api-inference.huggingface.co/models/${model.id}", { - headers: { Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" }, + headers: { + Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" + "Content-Type": "application/json", + }, method: "POST", body: JSON.stringify(data), } @@ -99,7 +144,10 @@ export const snippetFile = (model: ModelDataMinimal, accessToken: string): strin const response = await fetch( "https://api-inference.huggingface.co/models/${model.id}", { - headers: { Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" }, + headers: { + Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" + "Content-Type": "application/json", + }, method: "POST", body: data, } diff --git a/packages/tasks/src/snippets/python.ts b/packages/tasks/src/snippets/python.ts index faaec0ecd..59ac2edb7 100644 --- a/packages/tasks/src/snippets/python.ts +++ b/packages/tasks/src/snippets/python.ts @@ -2,6 +2,29 @@ import type { PipelineType } from "../pipelines.js"; import { getModelInputSnippet } from "./inputs.js"; import type { ModelDataMinimal } from "./types.js"; +export const snippetConversational = (model: ModelDataMinimal, accessToken: string): string => `# pip install openai +from openai import OpenAI + +# initialize the client and point it to TGI +client = OpenAI( + base_url="https://api-inference.huggingface.co/models/${model.id}/v1/", + api_key="${accessToken || '{API_TOKEN}'}", +) +chat_completion = client.chat.completions.create( + model="tgi", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Tell me a funny joke."}, + ], + stream=True, + max_tokens=500 +) + +# iterate and print stream +for message in chat_completion: + print(message.choices[0].delta.content, end="") +`; + export const snippetZeroShotClassification = (model: ModelDataMinimal): string => `def query(payload): response = requests.post(API_URL, headers=headers, json=payload) @@ -107,7 +130,7 @@ output = query({ "inputs": ${getModelInputSnippet(model)}, })`; -export const pythonSnippets: Partial string>> = { +export const pythonSnippets: Partial string>> = { // Same order as in tasks/src/pipelines.ts "text-classification": snippetBasic, "token-classification": snippetBasic, @@ -138,15 +161,24 @@ export const pythonSnippets: Partial `"sample1.flac"`; diff --git a/packages/tasks/src/snippets/tasks/audio-to-audio.ts b/packages/tasks/src/snippets/tasks/audio-to-audio.ts deleted file mode 100644 index 20b9e5a8f..000000000 --- a/packages/tasks/src/snippets/tasks/audio-to-audio.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"sample1.flac"`; diff --git a/packages/tasks/src/snippets/tasks/automatic-speech-recognition.ts b/packages/tasks/src/snippets/tasks/automatic-speech-recognition.ts deleted file mode 100644 index 20b9e5a8f..000000000 --- a/packages/tasks/src/snippets/tasks/automatic-speech-recognition.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"sample1.flac"`; diff --git a/packages/tasks/src/snippets/tasks/feature-extraction.ts b/packages/tasks/src/snippets/tasks/feature-extraction.ts deleted file mode 100644 index 6c42855e1..000000000 --- a/packages/tasks/src/snippets/tasks/feature-extraction.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"Today is a sunny day and I will get some ice cream."`; diff --git a/packages/tasks/src/snippets/tasks/fill-mask.ts b/packages/tasks/src/snippets/tasks/fill-mask.ts deleted file mode 100644 index 47d0aa89d..000000000 --- a/packages/tasks/src/snippets/tasks/fill-mask.ts +++ /dev/null @@ -1,3 +0,0 @@ -import type { ModelDataMinimal } from "../types"; - -export default (model: ModelDataMinimal): string => `"The answer to the universe is ${model.mask_token}."`; diff --git a/packages/tasks/src/snippets/tasks/image-classification.ts b/packages/tasks/src/snippets/tasks/image-classification.ts deleted file mode 100644 index 773344776..000000000 --- a/packages/tasks/src/snippets/tasks/image-classification.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"cats.jpg"`; diff --git a/packages/tasks/src/snippets/tasks/image-segmentation.ts b/packages/tasks/src/snippets/tasks/image-segmentation.ts deleted file mode 100644 index 773344776..000000000 --- a/packages/tasks/src/snippets/tasks/image-segmentation.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"cats.jpg"`; diff --git a/packages/tasks/src/snippets/tasks/image-to-text.ts b/packages/tasks/src/snippets/tasks/image-to-text.ts deleted file mode 100644 index 773344776..000000000 --- a/packages/tasks/src/snippets/tasks/image-to-text.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"cats.jpg"`; diff --git a/packages/tasks/src/snippets/tasks/object-detection.ts b/packages/tasks/src/snippets/tasks/object-detection.ts deleted file mode 100644 index 773344776..000000000 --- a/packages/tasks/src/snippets/tasks/object-detection.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"cats.jpg"`; diff --git a/packages/tasks/src/snippets/tasks/question-answering.ts b/packages/tasks/src/snippets/tasks/question-answering.ts deleted file mode 100644 index 668a7f39b..000000000 --- a/packages/tasks/src/snippets/tasks/question-answering.ts +++ /dev/null @@ -1,5 +0,0 @@ -export default (): string => - `{ - "question": "What is my name?", - "context": "My name is Clara and I live in Berkeley." - }`; diff --git a/packages/tasks/src/snippets/tasks/sentence-similarity.ts b/packages/tasks/src/snippets/tasks/sentence-similarity.ts deleted file mode 100644 index 329ca5f1f..000000000 --- a/packages/tasks/src/snippets/tasks/sentence-similarity.ts +++ /dev/null @@ -1,9 +0,0 @@ -export default (): string => - `{ - "source_sentence": "That is a happy person", - "sentences": [ - "That is a happy dog", - "That is a very happy person", - "Today is a sunny day" - ] - }`; diff --git a/packages/tasks/src/snippets/tasks/summarization.ts b/packages/tasks/src/snippets/tasks/summarization.ts deleted file mode 100644 index e842eff6d..000000000 --- a/packages/tasks/src/snippets/tasks/summarization.ts +++ /dev/null @@ -1,2 +0,0 @@ -export default (): string => - `"The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."`; diff --git a/packages/tasks/src/snippets/tasks/table-question-answering.ts b/packages/tasks/src/snippets/tasks/table-question-answering.ts deleted file mode 100644 index 4c7e65479..000000000 --- a/packages/tasks/src/snippets/tasks/table-question-answering.ts +++ /dev/null @@ -1,14 +0,0 @@ -export default (): string => - `{ - "query": "How many stars does the transformers repository have?", - "table": { - "Repository": ["Transformers", "Datasets", "Tokenizers"], - "Stars": ["36542", "4512", "3934"], - "Contributors": ["651", "77", "34"], - "Programming language": [ - "Python", - "Python", - "Rust, Python and NodeJS" - ] - } - }`; diff --git a/packages/tasks/src/snippets/tasks/tabular-prediction.ts b/packages/tasks/src/snippets/tasks/tabular-prediction.ts deleted file mode 100644 index 21ff8860e..000000000 --- a/packages/tasks/src/snippets/tasks/tabular-prediction.ts +++ /dev/null @@ -1,2 +0,0 @@ -export default (): string => - `'{"Height":[11.52,12.48],"Length1":[23.2,24.0],"Length2":[25.4,26.3],"Species": ["Bream","Bream"]}'`; diff --git a/packages/tasks/src/snippets/tasks/text-classification.ts b/packages/tasks/src/snippets/tasks/text-classification.ts deleted file mode 100644 index 018ad9747..000000000 --- a/packages/tasks/src/snippets/tasks/text-classification.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"I like you. I love you"`; diff --git a/packages/tasks/src/snippets/tasks/text-generation.ts b/packages/tasks/src/snippets/tasks/text-generation.ts deleted file mode 100644 index 1205f9fca..000000000 --- a/packages/tasks/src/snippets/tasks/text-generation.ts +++ /dev/null @@ -1,64 +0,0 @@ -import type { ModelDataMinimal } from "../types"; -import type { TokenizerConfig } from "../../tokenizer-data"; - -import { Template } from "@huggingface/jinja"; -import { SPECIAL_TOKENS_ATTRIBUTES } from "../../tokenizer-data"; - -// Define default text generation input -const DEFAULT_TEXT_GENERATION_INPUT = `"Can you please let us know more details about your "`; - -// Define defaults for chat models -const DEFAULT_SYSTEM_MESSAGE = { role: "system", content: "You are a helpful assistant." }; -const DEFAULT_USER_MESSAGE = { role: "user", content: "Tell me a joke." }; -const DEFAULT_MESSAGES = [DEFAULT_SYSTEM_MESSAGE, DEFAULT_USER_MESSAGE]; - -type SpecialTokensMap = Partial>; -const getSpecialTokensMap = (tokenizerConfig: TokenizerConfig): SpecialTokensMap => { - const specialTokensMap: SpecialTokensMap = {}; - for (const token of SPECIAL_TOKENS_ATTRIBUTES) { - const item = tokenizerConfig[token]; - if (typeof item === "string") { - specialTokensMap[token] = item; - } else if (item?.content) { - specialTokensMap[token] = item.content; - } - } - return specialTokensMap; -}; - -export default (model: ModelDataMinimal): string => { - const tokenizerConfig = model.config?.tokenizer_config; - if (!tokenizerConfig) { - return DEFAULT_TEXT_GENERATION_INPUT; - } - - let chat_template = tokenizerConfig.chat_template; - if (Array.isArray(chat_template)) { - // Find the default template - chat_template = chat_template.find((template) => template?.name === "default")?.template; - - // TODO: If no default template is found, use the first one - // However, many of these (e.g., https://huggingface.co/CohereForAI/c4ai-command-r-v01/blob/main/tokenizer_config.json) - // have non-default templates that require additional information (e.g., tools or documents) - } - - if (!chat_template) { - // Default text generation input - return DEFAULT_TEXT_GENERATION_INPUT; - } - - try { - const template = new Template(chat_template); - const rendered = template.render({ - messages: DEFAULT_MESSAGES, - // TODO: add default tools or documents - - // Add special tokens - ...getSpecialTokensMap(tokenizerConfig), - }); - return rendered; - } catch (e) { - // Some error occurred, so we just return default - return DEFAULT_TEXT_GENERATION_INPUT; - } -}; diff --git a/packages/tasks/src/snippets/tasks/text-to-audio.ts b/packages/tasks/src/snippets/tasks/text-to-audio.ts deleted file mode 100644 index 8c24933ce..000000000 --- a/packages/tasks/src/snippets/tasks/text-to-audio.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"liquid drum and bass, atmospheric synths, airy sounds"`; diff --git a/packages/tasks/src/snippets/tasks/text-to-image.ts b/packages/tasks/src/snippets/tasks/text-to-image.ts deleted file mode 100644 index 3f478848f..000000000 --- a/packages/tasks/src/snippets/tasks/text-to-image.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"Astronaut riding a horse"`; diff --git a/packages/tasks/src/snippets/tasks/text-to-speech.ts b/packages/tasks/src/snippets/tasks/text-to-speech.ts deleted file mode 100644 index 515001303..000000000 --- a/packages/tasks/src/snippets/tasks/text-to-speech.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"The answer to the universe is 42"`; diff --git a/packages/tasks/src/snippets/tasks/text2text-generation.ts b/packages/tasks/src/snippets/tasks/text2text-generation.ts deleted file mode 100644 index 48b8d18bd..000000000 --- a/packages/tasks/src/snippets/tasks/text2text-generation.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"The answer to the universe is"`; diff --git a/packages/tasks/src/snippets/tasks/token-classification.ts b/packages/tasks/src/snippets/tasks/token-classification.ts deleted file mode 100644 index d039fa0c4..000000000 --- a/packages/tasks/src/snippets/tasks/token-classification.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"My name is Sarah Jessica Parker but you can call me Jessica"`; diff --git a/packages/tasks/src/snippets/tasks/translation.ts b/packages/tasks/src/snippets/tasks/translation.ts deleted file mode 100644 index 0073a861d..000000000 --- a/packages/tasks/src/snippets/tasks/translation.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"Меня зовут Вольфганг и я живу в Берлине"`; diff --git a/packages/tasks/src/snippets/tasks/visual-question-answering.ts b/packages/tasks/src/snippets/tasks/visual-question-answering.ts deleted file mode 100644 index 9b7a60b91..000000000 --- a/packages/tasks/src/snippets/tasks/visual-question-answering.ts +++ /dev/null @@ -1,5 +0,0 @@ -export default (): string => - `{ - "image": "cat.png", - "question": "What is in this image?" - }`; diff --git a/packages/tasks/src/snippets/tasks/zero-shot-classification.ts b/packages/tasks/src/snippets/tasks/zero-shot-classification.ts deleted file mode 100644 index 325e19fb5..000000000 --- a/packages/tasks/src/snippets/tasks/zero-shot-classification.ts +++ /dev/null @@ -1,2 +0,0 @@ -export default (): string => - `"Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"`; diff --git a/packages/tasks/src/snippets/tasks/zero-shot-image-classification.ts b/packages/tasks/src/snippets/tasks/zero-shot-image-classification.ts deleted file mode 100644 index 773344776..000000000 --- a/packages/tasks/src/snippets/tasks/zero-shot-image-classification.ts +++ /dev/null @@ -1 +0,0 @@ -export default (): string => `"cats.jpg"`; diff --git a/packages/tasks/src/snippets/types.ts b/packages/tasks/src/snippets/types.ts index b1eeb4a9c..658c3ebf8 100644 --- a/packages/tasks/src/snippets/types.ts +++ b/packages/tasks/src/snippets/types.ts @@ -6,8 +6,3 @@ import type { ModelData } from "../model-data"; * Add more fields as needed. */ export type ModelDataMinimal = Pick; - -/** - * Input generator function. - */ -export type InputGenerator = (model: ModelDataMinimal) => string; diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 11f6fa6ac..45131d274 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -1,10 +1,10 @@ packages: - "packages/hub" - "packages/inference" - - "packages/jinja" - "packages/doc-internal" - "packages/agents" - "packages/languages" - "packages/tasks" - "packages/gguf" + - "packages/jinja" - "packages/widgets" From 048cfb2c17d93aadbd445b08bd754a7fa355cf47 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sat, 15 Jun 2024 20:21:54 +0200 Subject: [PATCH 4/4] Update code snippets --- packages/tasks/src/snippets/curl.ts | 20 +++++++++++++- packages/tasks/src/snippets/js.ts | 38 +++++++++----------------- packages/tasks/src/snippets/python.ts | 39 +++++++++++---------------- 3 files changed, 46 insertions(+), 51 deletions(-) diff --git a/packages/tasks/src/snippets/curl.ts b/packages/tasks/src/snippets/curl.ts index 93231e98f..d3fbc0bcb 100644 --- a/packages/tasks/src/snippets/curl.ts +++ b/packages/tasks/src/snippets/curl.ts @@ -10,6 +10,24 @@ export const snippetBasic = (model: ModelDataMinimal, accessToken: string): stri -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}" `; +export const snippetTextGeneration = (model: ModelDataMinimal, accessToken: string): string => { + if (model.config?.tokenizer_config?.chat_template) { + // Conversational model detected, so we display a code snippet that features the Messages API + return `curl 'https://api-inference.huggingface.co/models/${model.id}/v1/chat/completions' \\ +-H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}" \\ +-H 'Content-Type: application/json' \\ +-d '{ + "model": "${model.id}", + "messages": [{"role": "user", "content": "What is the capital of France?"}], + "max_tokens": 500, + "stream": false +}' +`; + } else { + return snippetBasic(model, accessToken); + } +}; + export const snippetZeroShotClassification = (model: ModelDataMinimal, accessToken: string): string => `curl https://api-inference.huggingface.co/models/${model.id} \\ -X POST \\ @@ -35,7 +53,7 @@ export const curlSnippets: Partial { export const snippetTextGeneration = (model: ModelDataMinimal, accessToken: string): string => { if (model.config?.tokenizer_config?.chat_template) { - // Conversational model detected, so we display a code snippet that features the OpenAI Messages API - // Code adapted from https://huggingface.co/blog/tgi-messages-api - return `// npm install openai -import OpenAI from "openai"; + // Conversational model detected, so we display a code snippet that features the Messages API + return `import { HfInference } from "@huggingface/inference"; -const openai = new OpenAI({ - baseURL: "https://api-inference.huggingface.co/models/${model.id}/v1/", - apiKey: "${accessToken || `{API_TOKEN}`}", -}); +const inference = new HfInference("${accessToken || `{API_TOKEN}`}"); -async function main() { - const stream = await openai.chat.completions.create({ - model: "tgi", - messages: [ - { role: "system", content: "You are a helpful assistant." }, - { role: "user", content: "Tell me a funny joke." }, - ], - stream: true, - max_tokens: 500, - }); - for await (const chunk of stream) { - process.stdout.write(chunk.choices[0]?.delta?.content || ""); - } +for await (const chunk of inference.chatCompletionStream({ + model: "${model.id}", + messages: [{ role: "user", content: "What is the capital of France?" }], + max_tokens: 500, +})) { + process.stdout.write(chunk.choices[0]?.delta?.content || ""); } - -main(); `; } else { - return snippetBasic(model, accessToken);; + return snippetBasic(model, accessToken); } -} +}; export const snippetZeroShotClassification = (model: ModelDataMinimal, accessToken: string): string => `async function query(data) { const response = await fetch( @@ -170,7 +156,7 @@ export const jsSnippets: Partial `# pip install openai -from openai import OpenAI +export const snippetConversational = (model: ModelDataMinimal, accessToken: string): string => + `from huggingface_hub import InferenceClient -# initialize the client and point it to TGI -client = OpenAI( - base_url="https://api-inference.huggingface.co/models/${model.id}/v1/", - api_key="${accessToken || '{API_TOKEN}'}", -) -chat_completion = client.chat.completions.create( - model="tgi", - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Tell me a funny joke."}, - ], - stream=True, - max_tokens=500 +client = InferenceClient( + "${model.id}", + token="${accessToken || "{API_TOKEN}"}", ) -# iterate and print stream -for message in chat_completion: +for message in client.chat_completion( + messages=[{"role": "user", "content": "What is the capital of France?"}], + max_tokens=500, + stream=True, +): print(message.choices[0].delta.content, end="") `; @@ -161,16 +154,14 @@ export const pythonSnippets: Partial