Add documentation for NVIDIA LLM API integration

Add NVIDIA LLM API - OpenAI Integration Test.
spring-projects · Aug 19, 2024 · 46893b0 · 46893b0
1 parent 58c292a
commit 46893b0
Show file tree

Hide file tree

Showing 7 changed files with 581 additions and 1 deletion.
diff --git a/...i/src/test/java/org/springframework/ai/openai/chat/proxy/NvidiaWithOpenAiChatModelIT.java b/...i/src/test/java/org/springframework/ai/openai/chat/proxy/NvidiaWithOpenAiChatModelIT.java
@@ -0,0 +1,328 @@
+/*
+ * Copyright 2024 - 2024 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.springframework.ai.openai.chat.proxy;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.ai.chat.messages.AssistantMessage;
+import org.springframework.ai.chat.messages.Message;
+import org.springframework.ai.chat.messages.UserMessage;
+import org.springframework.ai.chat.model.ChatResponse;
+import org.springframework.ai.chat.model.Generation;
+import org.springframework.ai.chat.prompt.Prompt;
+import org.springframework.ai.chat.prompt.PromptTemplate;
+import org.springframework.ai.chat.prompt.SystemPromptTemplate;
+import org.springframework.ai.converter.BeanOutputConverter;
+import org.springframework.ai.converter.ListOutputConverter;
+import org.springframework.ai.converter.MapOutputConverter;
+import org.springframework.ai.model.function.FunctionCallbackWrapper;
+import org.springframework.ai.openai.OpenAiChatModel;
+import org.springframework.ai.openai.OpenAiChatOptions;
+import org.springframework.ai.openai.api.OpenAiApi;
+import org.springframework.ai.openai.api.tool.MockWeatherService;
+import org.springframework.ai.openai.chat.ActorsFilms;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.boot.SpringBootConfiguration;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.context.annotation.Bean;
+import org.springframework.core.convert.support.DefaultConversionService;
+import org.springframework.core.io.Resource;
+
+import reactor.core.publisher.Flux;
+
+/**
+ * @author Christian Tzolov
+ * @since 1.0.0
+ */
+@SpringBootTest(classes = NvidiaWithOpenAiChatModelIT.Config.class)
+@EnabledIfEnvironmentVariable(named = "NVIDIA_API_KEY", matches = ".+")
+class NvidiaWithOpenAiChatModelIT {
+
+	private static final Logger logger = LoggerFactory.getLogger(NvidiaWithOpenAiChatModelIT.class);
+
+	private static final String NVIDIA_BASE_URL = "https://integrate.api.nvidia.com";
+
+	private static final String DEFAULT_NVIDIA_MODEL = "meta/llama-3.1-70b-instruct";
+
+	@Value("classpath:/prompts/system-message.st")
+	private Resource systemResource;
+
+	@Autowired
+	private OpenAiChatModel chatModel;
+
+	@Test
+	void roleTest() {
+		UserMessage userMessage = new UserMessage(
+				"Tell me about 3 famous pirates from the Golden Age of Piracy and what they did.");
+		SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate(systemResource);
+		Message systemMessage = systemPromptTemplate.createMessage(Map.of("name", "Bob", "voice", "pirate"));
+		Prompt prompt = new Prompt(List.of(userMessage, systemMessage));
+		ChatResponse response = chatModel.call(prompt);
+		assertThat(response.getResults()).hasSize(1);
+		assertThat(response.getResults().get(0).getOutput().getContent()).contains("Blackbeard");
+	}
+
+	@Test
+	void streamRoleTest() {
+		UserMessage userMessage = new UserMessage(
+				"Tell me about 3 famous pirates from the Golden Age of Piracy and what they did.");
+		SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate(systemResource);
+		Message systemMessage = systemPromptTemplate.createMessage(Map.of("name", "Bob", "voice", "pirate"));
+		Prompt prompt = new Prompt(List.of(userMessage, systemMessage));
+		Flux<ChatResponse> flux = chatModel.stream(prompt);
+
+		List<ChatResponse> responses = flux.collectList().block();
+		assertThat(responses.size()).isGreaterThan(1);
+
+		String stitchedResponseContent = responses.stream()
+			.map(ChatResponse::getResults)
+			.flatMap(List::stream)
+			.map(Generation::getOutput)
+			.map(AssistantMessage::getContent)
+			.collect(Collectors.joining());
+
+		assertThat(stitchedResponseContent).contains("Blackbeard");
+	}
+
+	@Test
+	void streamingWithTokenUsage() {
+		var promptOptions = OpenAiChatOptions.builder().withStreamUsage(true).withSeed(1).build();
+
+		var prompt = new Prompt("List two colors of the Polish flag. Be brief.", promptOptions);
+
+		var streamingTokenUsage = this.chatModel.stream(prompt).blockLast().getMetadata().getUsage();
+		var referenceTokenUsage = this.chatModel.call(prompt).getMetadata().getUsage();
+
+		assertThat(streamingTokenUsage.getPromptTokens()).isGreaterThan(0);
+		assertThat(streamingTokenUsage.getGenerationTokens()).isGreaterThan(0);
+		assertThat(streamingTokenUsage.getTotalTokens()).isGreaterThan(0);
+
+		assertThat(streamingTokenUsage.getPromptTokens()).isEqualTo(referenceTokenUsage.getPromptTokens());
+		assertThat(streamingTokenUsage.getGenerationTokens()).isEqualTo(referenceTokenUsage.getGenerationTokens());
+		assertThat(streamingTokenUsage.getTotalTokens()).isEqualTo(referenceTokenUsage.getTotalTokens());
+
+	}
+
+	@Test
+	void listOutputConverter() {
+		DefaultConversionService conversionService = new DefaultConversionService();
+		ListOutputConverter outputConverter = new ListOutputConverter(conversionService);
+
+		String format = outputConverter.getFormat();
+		String template = """
+				List five {subject}
+				{format}
+				""";
+		PromptTemplate promptTemplate = new PromptTemplate(template,
+				Map.of("subject", "ice cream flavors", "format", format));
+		Prompt prompt = new Prompt(promptTemplate.createMessage());
+		Generation generation = this.chatModel.call(prompt).getResult();
+
+		List<String> list = outputConverter.convert(generation.getOutput().getContent());
+		assertThat(list).hasSize(5);
+
+	}
+
+	@Test
+	void mapOutputConverter() {
+		MapOutputConverter outputConverter = new MapOutputConverter();
+
+		String format = outputConverter.getFormat();
+		String template = """
+				Provide me a List of {subject}
+				{format}
+				""";
+		PromptTemplate promptTemplate = new PromptTemplate(template,
+				Map.of("subject", "numbers from 1 to 9 under they key name 'numbers'", "format", format));
+		Prompt prompt = new Prompt(promptTemplate.createMessage());
+		Generation generation = chatModel.call(prompt).getResult();
+
+		Map<String, Object> result = outputConverter.convert(generation.getOutput().getContent());
+		assertThat(result.get("numbers")).isEqualTo(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9));
+
+	}
+
+	@Test
+	void beanOutputConverter() {
+
+		BeanOutputConverter<ActorsFilms> outputConverter = new BeanOutputConverter<>(ActorsFilms.class);
+
+		String format = outputConverter.getFormat();
+		String template = """
+				Generate the filmography for a random actor.
+				{format}
+				""";
+		PromptTemplate promptTemplate = new PromptTemplate(template, Map.of("format", format));
+		Prompt prompt = new Prompt(promptTemplate.createMessage());
+		Generation generation = chatModel.call(prompt).getResult();
+
+		ActorsFilms actorsFilms = outputConverter.convert(generation.getOutput().getContent());
+		assertThat(actorsFilms.getActor()).isNotEmpty();
+	}
+
+	record ActorsFilmsRecord(String actor, List<String> movies) {
+	}
+
+	@Test
+	void beanOutputConverterRecords() {
+
+		BeanOutputConverter<ActorsFilmsRecord> outputConverter = new BeanOutputConverter<>(ActorsFilmsRecord.class);
+
+		String format = outputConverter.getFormat();
+		String template = """
+				Generate the filmography of 5 movies for Tom Hanks.
+				{format}
+				""";
+		PromptTemplate promptTemplate = new PromptTemplate(template, Map.of("format", format));
+		Prompt prompt = new Prompt(promptTemplate.createMessage());
+		Generation generation = chatModel.call(prompt).getResult();
+
+		ActorsFilmsRecord actorsFilms = outputConverter.convert(generation.getOutput().getContent());
+		logger.info("" + actorsFilms);
+		assertThat(actorsFilms.actor()).isEqualTo("Tom Hanks");
+		assertThat(actorsFilms.movies()).hasSize(5);
+	}
+
+	@Test
+	void beanStreamOutputConverterRecords() {
+
+		BeanOutputConverter<ActorsFilmsRecord> outputConverter = new BeanOutputConverter<>(ActorsFilmsRecord.class);
+
+		String format = outputConverter.getFormat();
+		String template = """
+				Generate the filmography of 5 movies for Tom Hanks.
+				{format}
+				""";
+		PromptTemplate promptTemplate = new PromptTemplate(template, Map.of("format", format));
+		Prompt prompt = new Prompt(promptTemplate.createMessage());
+
+		String generationTextFromStream = chatModel.stream(prompt)
+			.collectList()
+			.block()
+			.stream()
+			.map(ChatResponse::getResults)
+			.flatMap(List::stream)
+			.map(Generation::getOutput)
+			.map(AssistantMessage::getContent)
+			.filter(c -> c != null)
+			.collect(Collectors.joining());
+
+		ActorsFilmsRecord actorsFilms = outputConverter.convert(generationTextFromStream);
+		logger.info("" + actorsFilms);
+		assertThat(actorsFilms.actor()).isEqualTo("Tom Hanks");
+		assertThat(actorsFilms.movies()).hasSize(5);
+	}
+
+	@Test
+	void functionCallTest() {
+
+		UserMessage userMessage = new UserMessage("What's the weather like in San Francisco, Tokyo, and Paris?");
+
+		List<Message> messages = new ArrayList<>(List.of(userMessage));
+
+		var promptOptions = OpenAiChatOptions.builder()
+			.withFunctionCallbacks(List.of(FunctionCallbackWrapper.builder(new MockWeatherService())
+				.withName("getCurrentWeather")
+				.withDescription("Get the weather in location")
+				.withResponseConverter((response) -> "" + response.temp() + response.unit())
+				.build()))
+			.build();
+
+		ChatResponse response = chatModel.call(new Prompt(messages, promptOptions));
+
+		logger.info("Response: {}", response);
+
+		assertThat(response.getResult().getOutput().getContent()).contains("30", "10", "15");
+	}
+
+	@Test
+	void streamFunctionCallTest() {
+
+		UserMessage userMessage = new UserMessage(
+				"What's the weather like in San Francisco, Tokyo, and Paris? Return the temperature in Celsius.");
+
+		List<Message> messages = new ArrayList<>(List.of(userMessage));
+
+		var promptOptions = OpenAiChatOptions.builder()
+			.withFunctionCallbacks(List.of(FunctionCallbackWrapper.builder(new MockWeatherService())
+				.withName("getCurrentWeather")
+				.withDescription("Get the weather in location")
+				.withResponseConverter((response) -> "" + response.temp() + response.unit())
+				.build()))
+			.build();
+
+		Flux<ChatResponse> response = chatModel.stream(new Prompt(messages, promptOptions));
+
+		String content = response.collectList()
+			.block()
+			.stream()
+			.map(ChatResponse::getResults)
+			.flatMap(List::stream)
+			.map(Generation::getOutput)
+			.map(AssistantMessage::getContent)
+			.collect(Collectors.joining());
+		logger.info("Response: {}", content);
+
+		assertThat(content).contains("30", "10", "15");
+	}
+
+	@Test
+	void validateCallResponseMetadata() {
+		// @formatter:off
+		ChatResponse response = ChatClient.create(chatModel).prompt()
+				.options(OpenAiChatOptions.builder().withModel(DEFAULT_NVIDIA_MODEL).build())
+				.user("Tell me about 3 famous pirates from the Golden Age of Piracy and what they did")
+				.call()
+				.chatResponse();
+		// @formatter:on
+
+		logger.info(response.toString());
+		assertThat(response.getMetadata().getId()).isNotEmpty();
+		assertThat(response.getMetadata().getModel()).containsIgnoringCase(DEFAULT_NVIDIA_MODEL);
+		assertThat(response.getMetadata().getUsage().getPromptTokens()).isPositive();
+		assertThat(response.getMetadata().getUsage().getGenerationTokens()).isPositive();
+		assertThat(response.getMetadata().getUsage().getTotalTokens()).isPositive();
+	}
+
+	@SpringBootConfiguration
+	static class Config {
+
+		@Bean
+		public OpenAiApi chatCompletionApi() {
+			return new OpenAiApi(NVIDIA_BASE_URL, System.getenv("NVIDIA_API_KEY"));
+		}
+
+		@Bean
+		public OpenAiChatModel openAiClient(OpenAiApi openAiApi) {
+			return new OpenAiChatModel(openAiApi,
+					OpenAiChatOptions.builder().withMaxTokens(2048).withModel(DEFAULT_NVIDIA_MODEL).build());
+		}
+
+	}
+
+}
diff --git a/...-docs/src/main/antora/modules/ROOT/images/spring-ai-nvidia-function-calling.jpg b/...-docs/src/main/antora/modules/ROOT/images/spring-ai-nvidia-function-calling.jpg
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/images/spring-ai-nvidia-llm-api.jpg b/spring-ai-docs/src/main/antora/modules/ROOT/images/spring-ai-nvidia-llm-api.jpg
diff --git a/...g-ai-docs/src/main/antora/modules/ROOT/images/spring-ai-nvidia-registration.jpg b/...g-ai-docs/src/main/antora/modules/ROOT/images/spring-ai-nvidia-registration.jpg
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/nav.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/nav.adoc
@@ -27,6 +27,7 @@
 **** xref:api/chat/functions/minimax-chat-functions.adoc[Function Calling]
 *** xref:api/chat/moonshot-chat.adoc[Moonshot AI]
 //// **** xref:api/chat/functions/moonshot-chat-functions.adoc[Function Calling]
+*** xref:api/chat/nvidia-chat.adoc[NVIDIA]
 *** xref:api/chat/ollama-chat.adoc[Ollama]
 **** xref:api/chat/functions/ollama-chat-functions.adoc[Function Calling]
 *** xref:api/chat/openai-chat.adoc[OpenAI]

diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/groq-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/groq-chat.adoc
@@ -1,6 +1,6 @@
 = Groq Chat
 
-https://groq.com/[Groq] is an extreally fast,  LPU™ based, AI Inference Engine that support various https://console.groq.com/docs/models[AI Models], 
+https://groq.com/[Groq] is an extremely fast,  LPU™ based, AI Inference Engine that support various https://console.groq.com/docs/models[AI Models], 
 supports `Tool/Function Calling` and exposes a `OpenAI API` compatible endpoint.
 
 Spring AI integrates with the https://groq.com/[Groq] by reusing the existing xref::api/chat/openai-chat.adoc[OpenAI] client.