Skip to content

Commit

Permalink
Add documentation for NVIDIA LLM API integration
Browse files Browse the repository at this point in the history
   Add NVIDIA LLM API - OpenAI Integration Test.
  • Loading branch information
tzolov committed Aug 19, 2024
1 parent 58c292a commit 46893b0
Show file tree
Hide file tree
Showing 7 changed files with 581 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
/*
* Copyright 2024 - 2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.openai.chat.proxy;

import static org.assertj.core.api.Assertions.assertThat;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.messages.AssistantMessage;
import org.springframework.ai.chat.messages.Message;
import org.springframework.ai.chat.messages.UserMessage;
import org.springframework.ai.chat.model.ChatResponse;
import org.springframework.ai.chat.model.Generation;
import org.springframework.ai.chat.prompt.Prompt;
import org.springframework.ai.chat.prompt.PromptTemplate;
import org.springframework.ai.chat.prompt.SystemPromptTemplate;
import org.springframework.ai.converter.BeanOutputConverter;
import org.springframework.ai.converter.ListOutputConverter;
import org.springframework.ai.converter.MapOutputConverter;
import org.springframework.ai.model.function.FunctionCallbackWrapper;
import org.springframework.ai.openai.OpenAiChatModel;
import org.springframework.ai.openai.OpenAiChatOptions;
import org.springframework.ai.openai.api.OpenAiApi;
import org.springframework.ai.openai.api.tool.MockWeatherService;
import org.springframework.ai.openai.chat.ActorsFilms;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringBootConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.context.annotation.Bean;
import org.springframework.core.convert.support.DefaultConversionService;
import org.springframework.core.io.Resource;

import reactor.core.publisher.Flux;

/**
* @author Christian Tzolov
* @since 1.0.0
*/
@SpringBootTest(classes = NvidiaWithOpenAiChatModelIT.Config.class)
@EnabledIfEnvironmentVariable(named = "NVIDIA_API_KEY", matches = ".+")
class NvidiaWithOpenAiChatModelIT {

private static final Logger logger = LoggerFactory.getLogger(NvidiaWithOpenAiChatModelIT.class);

private static final String NVIDIA_BASE_URL = "https://integrate.api.nvidia.com";

private static final String DEFAULT_NVIDIA_MODEL = "meta/llama-3.1-70b-instruct";

@Value("classpath:/prompts/system-message.st")
private Resource systemResource;

@Autowired
private OpenAiChatModel chatModel;

@Test
void roleTest() {
UserMessage userMessage = new UserMessage(
"Tell me about 3 famous pirates from the Golden Age of Piracy and what they did.");
SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate(systemResource);
Message systemMessage = systemPromptTemplate.createMessage(Map.of("name", "Bob", "voice", "pirate"));
Prompt prompt = new Prompt(List.of(userMessage, systemMessage));
ChatResponse response = chatModel.call(prompt);
assertThat(response.getResults()).hasSize(1);
assertThat(response.getResults().get(0).getOutput().getContent()).contains("Blackbeard");
}

@Test
void streamRoleTest() {
UserMessage userMessage = new UserMessage(
"Tell me about 3 famous pirates from the Golden Age of Piracy and what they did.");
SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate(systemResource);
Message systemMessage = systemPromptTemplate.createMessage(Map.of("name", "Bob", "voice", "pirate"));
Prompt prompt = new Prompt(List.of(userMessage, systemMessage));
Flux<ChatResponse> flux = chatModel.stream(prompt);

List<ChatResponse> responses = flux.collectList().block();
assertThat(responses.size()).isGreaterThan(1);

String stitchedResponseContent = responses.stream()
.map(ChatResponse::getResults)
.flatMap(List::stream)
.map(Generation::getOutput)
.map(AssistantMessage::getContent)
.collect(Collectors.joining());

assertThat(stitchedResponseContent).contains("Blackbeard");
}

@Test
void streamingWithTokenUsage() {
var promptOptions = OpenAiChatOptions.builder().withStreamUsage(true).withSeed(1).build();

var prompt = new Prompt("List two colors of the Polish flag. Be brief.", promptOptions);

var streamingTokenUsage = this.chatModel.stream(prompt).blockLast().getMetadata().getUsage();
var referenceTokenUsage = this.chatModel.call(prompt).getMetadata().getUsage();

assertThat(streamingTokenUsage.getPromptTokens()).isGreaterThan(0);
assertThat(streamingTokenUsage.getGenerationTokens()).isGreaterThan(0);
assertThat(streamingTokenUsage.getTotalTokens()).isGreaterThan(0);

assertThat(streamingTokenUsage.getPromptTokens()).isEqualTo(referenceTokenUsage.getPromptTokens());
assertThat(streamingTokenUsage.getGenerationTokens()).isEqualTo(referenceTokenUsage.getGenerationTokens());
assertThat(streamingTokenUsage.getTotalTokens()).isEqualTo(referenceTokenUsage.getTotalTokens());

}

@Test
void listOutputConverter() {
DefaultConversionService conversionService = new DefaultConversionService();
ListOutputConverter outputConverter = new ListOutputConverter(conversionService);

String format = outputConverter.getFormat();
String template = """
List five {subject}
{format}
""";
PromptTemplate promptTemplate = new PromptTemplate(template,
Map.of("subject", "ice cream flavors", "format", format));
Prompt prompt = new Prompt(promptTemplate.createMessage());
Generation generation = this.chatModel.call(prompt).getResult();

List<String> list = outputConverter.convert(generation.getOutput().getContent());
assertThat(list).hasSize(5);

}

@Test
void mapOutputConverter() {
MapOutputConverter outputConverter = new MapOutputConverter();

String format = outputConverter.getFormat();
String template = """
Provide me a List of {subject}
{format}
""";
PromptTemplate promptTemplate = new PromptTemplate(template,
Map.of("subject", "numbers from 1 to 9 under they key name 'numbers'", "format", format));
Prompt prompt = new Prompt(promptTemplate.createMessage());
Generation generation = chatModel.call(prompt).getResult();

Map<String, Object> result = outputConverter.convert(generation.getOutput().getContent());
assertThat(result.get("numbers")).isEqualTo(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9));

}

@Test
void beanOutputConverter() {

BeanOutputConverter<ActorsFilms> outputConverter = new BeanOutputConverter<>(ActorsFilms.class);

String format = outputConverter.getFormat();
String template = """
Generate the filmography for a random actor.
{format}
""";
PromptTemplate promptTemplate = new PromptTemplate(template, Map.of("format", format));
Prompt prompt = new Prompt(promptTemplate.createMessage());
Generation generation = chatModel.call(prompt).getResult();

ActorsFilms actorsFilms = outputConverter.convert(generation.getOutput().getContent());
assertThat(actorsFilms.getActor()).isNotEmpty();
}

record ActorsFilmsRecord(String actor, List<String> movies) {
}

@Test
void beanOutputConverterRecords() {

BeanOutputConverter<ActorsFilmsRecord> outputConverter = new BeanOutputConverter<>(ActorsFilmsRecord.class);

String format = outputConverter.getFormat();
String template = """
Generate the filmography of 5 movies for Tom Hanks.
{format}
""";
PromptTemplate promptTemplate = new PromptTemplate(template, Map.of("format", format));
Prompt prompt = new Prompt(promptTemplate.createMessage());
Generation generation = chatModel.call(prompt).getResult();

ActorsFilmsRecord actorsFilms = outputConverter.convert(generation.getOutput().getContent());
logger.info("" + actorsFilms);
assertThat(actorsFilms.actor()).isEqualTo("Tom Hanks");
assertThat(actorsFilms.movies()).hasSize(5);
}

@Test
void beanStreamOutputConverterRecords() {

BeanOutputConverter<ActorsFilmsRecord> outputConverter = new BeanOutputConverter<>(ActorsFilmsRecord.class);

String format = outputConverter.getFormat();
String template = """
Generate the filmography of 5 movies for Tom Hanks.
{format}
""";
PromptTemplate promptTemplate = new PromptTemplate(template, Map.of("format", format));
Prompt prompt = new Prompt(promptTemplate.createMessage());

String generationTextFromStream = chatModel.stream(prompt)
.collectList()
.block()
.stream()
.map(ChatResponse::getResults)
.flatMap(List::stream)
.map(Generation::getOutput)
.map(AssistantMessage::getContent)
.filter(c -> c != null)
.collect(Collectors.joining());

ActorsFilmsRecord actorsFilms = outputConverter.convert(generationTextFromStream);
logger.info("" + actorsFilms);
assertThat(actorsFilms.actor()).isEqualTo("Tom Hanks");
assertThat(actorsFilms.movies()).hasSize(5);
}

@Test
void functionCallTest() {

UserMessage userMessage = new UserMessage("What's the weather like in San Francisco, Tokyo, and Paris?");

List<Message> messages = new ArrayList<>(List.of(userMessage));

var promptOptions = OpenAiChatOptions.builder()
.withFunctionCallbacks(List.of(FunctionCallbackWrapper.builder(new MockWeatherService())
.withName("getCurrentWeather")
.withDescription("Get the weather in location")
.withResponseConverter((response) -> "" + response.temp() + response.unit())
.build()))
.build();

ChatResponse response = chatModel.call(new Prompt(messages, promptOptions));

logger.info("Response: {}", response);

assertThat(response.getResult().getOutput().getContent()).contains("30", "10", "15");
}

@Test
void streamFunctionCallTest() {

UserMessage userMessage = new UserMessage(
"What's the weather like in San Francisco, Tokyo, and Paris? Return the temperature in Celsius.");

List<Message> messages = new ArrayList<>(List.of(userMessage));

var promptOptions = OpenAiChatOptions.builder()
.withFunctionCallbacks(List.of(FunctionCallbackWrapper.builder(new MockWeatherService())
.withName("getCurrentWeather")
.withDescription("Get the weather in location")
.withResponseConverter((response) -> "" + response.temp() + response.unit())
.build()))
.build();

Flux<ChatResponse> response = chatModel.stream(new Prompt(messages, promptOptions));

String content = response.collectList()
.block()
.stream()
.map(ChatResponse::getResults)
.flatMap(List::stream)
.map(Generation::getOutput)
.map(AssistantMessage::getContent)
.collect(Collectors.joining());
logger.info("Response: {}", content);

assertThat(content).contains("30", "10", "15");
}

@Test
void validateCallResponseMetadata() {
// @formatter:off
ChatResponse response = ChatClient.create(chatModel).prompt()
.options(OpenAiChatOptions.builder().withModel(DEFAULT_NVIDIA_MODEL).build())
.user("Tell me about 3 famous pirates from the Golden Age of Piracy and what they did")
.call()
.chatResponse();
// @formatter:on

logger.info(response.toString());
assertThat(response.getMetadata().getId()).isNotEmpty();
assertThat(response.getMetadata().getModel()).containsIgnoringCase(DEFAULT_NVIDIA_MODEL);
assertThat(response.getMetadata().getUsage().getPromptTokens()).isPositive();
assertThat(response.getMetadata().getUsage().getGenerationTokens()).isPositive();
assertThat(response.getMetadata().getUsage().getTotalTokens()).isPositive();
}

@SpringBootConfiguration
static class Config {

@Bean
public OpenAiApi chatCompletionApi() {
return new OpenAiApi(NVIDIA_BASE_URL, System.getenv("NVIDIA_API_KEY"));
}

@Bean
public OpenAiChatModel openAiClient(OpenAiApi openAiApi) {
return new OpenAiChatModel(openAiApi,
OpenAiChatOptions.builder().withMaxTokens(2048).withModel(DEFAULT_NVIDIA_MODEL).build());
}

}

}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions spring-ai-docs/src/main/antora/modules/ROOT/nav.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
**** xref:api/chat/functions/minimax-chat-functions.adoc[Function Calling]
*** xref:api/chat/moonshot-chat.adoc[Moonshot AI]
//// **** xref:api/chat/functions/moonshot-chat-functions.adoc[Function Calling]
*** xref:api/chat/nvidia-chat.adoc[NVIDIA]
*** xref:api/chat/ollama-chat.adoc[Ollama]
**** xref:api/chat/functions/ollama-chat-functions.adoc[Function Calling]
*** xref:api/chat/openai-chat.adoc[OpenAI]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
= Groq Chat

https://groq.com/[Groq] is an extreally fast, LPU™ based, AI Inference Engine that support various https://console.groq.com/docs/models[AI Models],
https://groq.com/[Groq] is an extremely fast, LPU™ based, AI Inference Engine that support various https://console.groq.com/docs/models[AI Models],
supports `Tool/Function Calling` and exposes a `OpenAI API` compatible endpoint.

Spring AI integrates with the https://groq.com/[Groq] by reusing the existing xref::api/chat/openai-chat.adoc[OpenAI] client.
Expand Down
Loading

0 comments on commit 46893b0

Please sign in to comment.