-
Notifications
You must be signed in to change notification settings - Fork 736
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add documentation for NVIDIA LLM API integration
Add NVIDIA LLM API - OpenAI Integration Test.
- Loading branch information
Showing
7 changed files
with
581 additions
and
1 deletion.
There are no files selected for viewing
328 changes: 328 additions & 0 deletions
328
...i/src/test/java/org/springframework/ai/openai/chat/proxy/NvidiaWithOpenAiChatModelIT.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,328 @@ | ||
/* | ||
* Copyright 2024 - 2024 the original author or authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.springframework.ai.openai.chat.proxy; | ||
|
||
import static org.assertj.core.api.Assertions.assertThat; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.stream.Collectors; | ||
|
||
import org.junit.jupiter.api.Test; | ||
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import org.springframework.ai.chat.client.ChatClient; | ||
import org.springframework.ai.chat.messages.AssistantMessage; | ||
import org.springframework.ai.chat.messages.Message; | ||
import org.springframework.ai.chat.messages.UserMessage; | ||
import org.springframework.ai.chat.model.ChatResponse; | ||
import org.springframework.ai.chat.model.Generation; | ||
import org.springframework.ai.chat.prompt.Prompt; | ||
import org.springframework.ai.chat.prompt.PromptTemplate; | ||
import org.springframework.ai.chat.prompt.SystemPromptTemplate; | ||
import org.springframework.ai.converter.BeanOutputConverter; | ||
import org.springframework.ai.converter.ListOutputConverter; | ||
import org.springframework.ai.converter.MapOutputConverter; | ||
import org.springframework.ai.model.function.FunctionCallbackWrapper; | ||
import org.springframework.ai.openai.OpenAiChatModel; | ||
import org.springframework.ai.openai.OpenAiChatOptions; | ||
import org.springframework.ai.openai.api.OpenAiApi; | ||
import org.springframework.ai.openai.api.tool.MockWeatherService; | ||
import org.springframework.ai.openai.chat.ActorsFilms; | ||
import org.springframework.beans.factory.annotation.Autowired; | ||
import org.springframework.beans.factory.annotation.Value; | ||
import org.springframework.boot.SpringBootConfiguration; | ||
import org.springframework.boot.test.context.SpringBootTest; | ||
import org.springframework.context.annotation.Bean; | ||
import org.springframework.core.convert.support.DefaultConversionService; | ||
import org.springframework.core.io.Resource; | ||
|
||
import reactor.core.publisher.Flux; | ||
|
||
/** | ||
* @author Christian Tzolov | ||
* @since 1.0.0 | ||
*/ | ||
@SpringBootTest(classes = NvidiaWithOpenAiChatModelIT.Config.class) | ||
@EnabledIfEnvironmentVariable(named = "NVIDIA_API_KEY", matches = ".+") | ||
class NvidiaWithOpenAiChatModelIT { | ||
|
||
private static final Logger logger = LoggerFactory.getLogger(NvidiaWithOpenAiChatModelIT.class); | ||
|
||
private static final String NVIDIA_BASE_URL = "https://integrate.api.nvidia.com"; | ||
|
||
private static final String DEFAULT_NVIDIA_MODEL = "meta/llama-3.1-70b-instruct"; | ||
|
||
@Value("classpath:/prompts/system-message.st") | ||
private Resource systemResource; | ||
|
||
@Autowired | ||
private OpenAiChatModel chatModel; | ||
|
||
@Test | ||
void roleTest() { | ||
UserMessage userMessage = new UserMessage( | ||
"Tell me about 3 famous pirates from the Golden Age of Piracy and what they did."); | ||
SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate(systemResource); | ||
Message systemMessage = systemPromptTemplate.createMessage(Map.of("name", "Bob", "voice", "pirate")); | ||
Prompt prompt = new Prompt(List.of(userMessage, systemMessage)); | ||
ChatResponse response = chatModel.call(prompt); | ||
assertThat(response.getResults()).hasSize(1); | ||
assertThat(response.getResults().get(0).getOutput().getContent()).contains("Blackbeard"); | ||
} | ||
|
||
@Test | ||
void streamRoleTest() { | ||
UserMessage userMessage = new UserMessage( | ||
"Tell me about 3 famous pirates from the Golden Age of Piracy and what they did."); | ||
SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate(systemResource); | ||
Message systemMessage = systemPromptTemplate.createMessage(Map.of("name", "Bob", "voice", "pirate")); | ||
Prompt prompt = new Prompt(List.of(userMessage, systemMessage)); | ||
Flux<ChatResponse> flux = chatModel.stream(prompt); | ||
|
||
List<ChatResponse> responses = flux.collectList().block(); | ||
assertThat(responses.size()).isGreaterThan(1); | ||
|
||
String stitchedResponseContent = responses.stream() | ||
.map(ChatResponse::getResults) | ||
.flatMap(List::stream) | ||
.map(Generation::getOutput) | ||
.map(AssistantMessage::getContent) | ||
.collect(Collectors.joining()); | ||
|
||
assertThat(stitchedResponseContent).contains("Blackbeard"); | ||
} | ||
|
||
@Test | ||
void streamingWithTokenUsage() { | ||
var promptOptions = OpenAiChatOptions.builder().withStreamUsage(true).withSeed(1).build(); | ||
|
||
var prompt = new Prompt("List two colors of the Polish flag. Be brief.", promptOptions); | ||
|
||
var streamingTokenUsage = this.chatModel.stream(prompt).blockLast().getMetadata().getUsage(); | ||
var referenceTokenUsage = this.chatModel.call(prompt).getMetadata().getUsage(); | ||
|
||
assertThat(streamingTokenUsage.getPromptTokens()).isGreaterThan(0); | ||
assertThat(streamingTokenUsage.getGenerationTokens()).isGreaterThan(0); | ||
assertThat(streamingTokenUsage.getTotalTokens()).isGreaterThan(0); | ||
|
||
assertThat(streamingTokenUsage.getPromptTokens()).isEqualTo(referenceTokenUsage.getPromptTokens()); | ||
assertThat(streamingTokenUsage.getGenerationTokens()).isEqualTo(referenceTokenUsage.getGenerationTokens()); | ||
assertThat(streamingTokenUsage.getTotalTokens()).isEqualTo(referenceTokenUsage.getTotalTokens()); | ||
|
||
} | ||
|
||
@Test | ||
void listOutputConverter() { | ||
DefaultConversionService conversionService = new DefaultConversionService(); | ||
ListOutputConverter outputConverter = new ListOutputConverter(conversionService); | ||
|
||
String format = outputConverter.getFormat(); | ||
String template = """ | ||
List five {subject} | ||
{format} | ||
"""; | ||
PromptTemplate promptTemplate = new PromptTemplate(template, | ||
Map.of("subject", "ice cream flavors", "format", format)); | ||
Prompt prompt = new Prompt(promptTemplate.createMessage()); | ||
Generation generation = this.chatModel.call(prompt).getResult(); | ||
|
||
List<String> list = outputConverter.convert(generation.getOutput().getContent()); | ||
assertThat(list).hasSize(5); | ||
|
||
} | ||
|
||
@Test | ||
void mapOutputConverter() { | ||
MapOutputConverter outputConverter = new MapOutputConverter(); | ||
|
||
String format = outputConverter.getFormat(); | ||
String template = """ | ||
Provide me a List of {subject} | ||
{format} | ||
"""; | ||
PromptTemplate promptTemplate = new PromptTemplate(template, | ||
Map.of("subject", "numbers from 1 to 9 under they key name 'numbers'", "format", format)); | ||
Prompt prompt = new Prompt(promptTemplate.createMessage()); | ||
Generation generation = chatModel.call(prompt).getResult(); | ||
|
||
Map<String, Object> result = outputConverter.convert(generation.getOutput().getContent()); | ||
assertThat(result.get("numbers")).isEqualTo(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9)); | ||
|
||
} | ||
|
||
@Test | ||
void beanOutputConverter() { | ||
|
||
BeanOutputConverter<ActorsFilms> outputConverter = new BeanOutputConverter<>(ActorsFilms.class); | ||
|
||
String format = outputConverter.getFormat(); | ||
String template = """ | ||
Generate the filmography for a random actor. | ||
{format} | ||
"""; | ||
PromptTemplate promptTemplate = new PromptTemplate(template, Map.of("format", format)); | ||
Prompt prompt = new Prompt(promptTemplate.createMessage()); | ||
Generation generation = chatModel.call(prompt).getResult(); | ||
|
||
ActorsFilms actorsFilms = outputConverter.convert(generation.getOutput().getContent()); | ||
assertThat(actorsFilms.getActor()).isNotEmpty(); | ||
} | ||
|
||
record ActorsFilmsRecord(String actor, List<String> movies) { | ||
} | ||
|
||
@Test | ||
void beanOutputConverterRecords() { | ||
|
||
BeanOutputConverter<ActorsFilmsRecord> outputConverter = new BeanOutputConverter<>(ActorsFilmsRecord.class); | ||
|
||
String format = outputConverter.getFormat(); | ||
String template = """ | ||
Generate the filmography of 5 movies for Tom Hanks. | ||
{format} | ||
"""; | ||
PromptTemplate promptTemplate = new PromptTemplate(template, Map.of("format", format)); | ||
Prompt prompt = new Prompt(promptTemplate.createMessage()); | ||
Generation generation = chatModel.call(prompt).getResult(); | ||
|
||
ActorsFilmsRecord actorsFilms = outputConverter.convert(generation.getOutput().getContent()); | ||
logger.info("" + actorsFilms); | ||
assertThat(actorsFilms.actor()).isEqualTo("Tom Hanks"); | ||
assertThat(actorsFilms.movies()).hasSize(5); | ||
} | ||
|
||
@Test | ||
void beanStreamOutputConverterRecords() { | ||
|
||
BeanOutputConverter<ActorsFilmsRecord> outputConverter = new BeanOutputConverter<>(ActorsFilmsRecord.class); | ||
|
||
String format = outputConverter.getFormat(); | ||
String template = """ | ||
Generate the filmography of 5 movies for Tom Hanks. | ||
{format} | ||
"""; | ||
PromptTemplate promptTemplate = new PromptTemplate(template, Map.of("format", format)); | ||
Prompt prompt = new Prompt(promptTemplate.createMessage()); | ||
|
||
String generationTextFromStream = chatModel.stream(prompt) | ||
.collectList() | ||
.block() | ||
.stream() | ||
.map(ChatResponse::getResults) | ||
.flatMap(List::stream) | ||
.map(Generation::getOutput) | ||
.map(AssistantMessage::getContent) | ||
.filter(c -> c != null) | ||
.collect(Collectors.joining()); | ||
|
||
ActorsFilmsRecord actorsFilms = outputConverter.convert(generationTextFromStream); | ||
logger.info("" + actorsFilms); | ||
assertThat(actorsFilms.actor()).isEqualTo("Tom Hanks"); | ||
assertThat(actorsFilms.movies()).hasSize(5); | ||
} | ||
|
||
@Test | ||
void functionCallTest() { | ||
|
||
UserMessage userMessage = new UserMessage("What's the weather like in San Francisco, Tokyo, and Paris?"); | ||
|
||
List<Message> messages = new ArrayList<>(List.of(userMessage)); | ||
|
||
var promptOptions = OpenAiChatOptions.builder() | ||
.withFunctionCallbacks(List.of(FunctionCallbackWrapper.builder(new MockWeatherService()) | ||
.withName("getCurrentWeather") | ||
.withDescription("Get the weather in location") | ||
.withResponseConverter((response) -> "" + response.temp() + response.unit()) | ||
.build())) | ||
.build(); | ||
|
||
ChatResponse response = chatModel.call(new Prompt(messages, promptOptions)); | ||
|
||
logger.info("Response: {}", response); | ||
|
||
assertThat(response.getResult().getOutput().getContent()).contains("30", "10", "15"); | ||
} | ||
|
||
@Test | ||
void streamFunctionCallTest() { | ||
|
||
UserMessage userMessage = new UserMessage( | ||
"What's the weather like in San Francisco, Tokyo, and Paris? Return the temperature in Celsius."); | ||
|
||
List<Message> messages = new ArrayList<>(List.of(userMessage)); | ||
|
||
var promptOptions = OpenAiChatOptions.builder() | ||
.withFunctionCallbacks(List.of(FunctionCallbackWrapper.builder(new MockWeatherService()) | ||
.withName("getCurrentWeather") | ||
.withDescription("Get the weather in location") | ||
.withResponseConverter((response) -> "" + response.temp() + response.unit()) | ||
.build())) | ||
.build(); | ||
|
||
Flux<ChatResponse> response = chatModel.stream(new Prompt(messages, promptOptions)); | ||
|
||
String content = response.collectList() | ||
.block() | ||
.stream() | ||
.map(ChatResponse::getResults) | ||
.flatMap(List::stream) | ||
.map(Generation::getOutput) | ||
.map(AssistantMessage::getContent) | ||
.collect(Collectors.joining()); | ||
logger.info("Response: {}", content); | ||
|
||
assertThat(content).contains("30", "10", "15"); | ||
} | ||
|
||
@Test | ||
void validateCallResponseMetadata() { | ||
// @formatter:off | ||
ChatResponse response = ChatClient.create(chatModel).prompt() | ||
.options(OpenAiChatOptions.builder().withModel(DEFAULT_NVIDIA_MODEL).build()) | ||
.user("Tell me about 3 famous pirates from the Golden Age of Piracy and what they did") | ||
.call() | ||
.chatResponse(); | ||
// @formatter:on | ||
|
||
logger.info(response.toString()); | ||
assertThat(response.getMetadata().getId()).isNotEmpty(); | ||
assertThat(response.getMetadata().getModel()).containsIgnoringCase(DEFAULT_NVIDIA_MODEL); | ||
assertThat(response.getMetadata().getUsage().getPromptTokens()).isPositive(); | ||
assertThat(response.getMetadata().getUsage().getGenerationTokens()).isPositive(); | ||
assertThat(response.getMetadata().getUsage().getTotalTokens()).isPositive(); | ||
} | ||
|
||
@SpringBootConfiguration | ||
static class Config { | ||
|
||
@Bean | ||
public OpenAiApi chatCompletionApi() { | ||
return new OpenAiApi(NVIDIA_BASE_URL, System.getenv("NVIDIA_API_KEY")); | ||
} | ||
|
||
@Bean | ||
public OpenAiChatModel openAiClient(OpenAiApi openAiApi) { | ||
return new OpenAiChatModel(openAiApi, | ||
OpenAiChatOptions.builder().withMaxTokens(2048).withModel(DEFAULT_NVIDIA_MODEL).build()); | ||
} | ||
|
||
} | ||
|
||
} |
Binary file added
BIN
+414 KB
...-docs/src/main/antora/modules/ROOT/images/spring-ai-nvidia-function-calling.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+875 KB
spring-ai-docs/src/main/antora/modules/ROOT/images/spring-ai-nvidia-llm-api.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+688 KB
...g-ai-docs/src/main/antora/modules/ROOT/images/spring-ai-nvidia-registration.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/groq-chat.adoc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.