@@ -19,7 +19,11 @@ Does your company depend on this project? [Contact me at markus@maragu.dk](mailt
19
19
20
20
## Usage
21
21
22
- This test will only run with ` go test -run TestEval ./... ` and otherwise be skipped:
22
+ Evals will only run with ` go test -run TestEval ./... ` and otherwise be skipped.
23
+
24
+ ### Simple example
25
+
26
+ Eval a mocked LLM, construct a sample, score it with a lexical similarity scorer, and log the result.
23
27
24
28
``` go
25
29
package examples_test
@@ -66,3 +70,138 @@ func (l *powerfulLLM) Prompt(request string) string {
66
70
return l.response
67
71
}
68
72
```
73
+
74
+ ### Advanced example
75
+
76
+ This eval uses real LLMs (OpenAI GPT4o mini, Google Gemini 1.5 Flash, Anthropic 3.5 Haiku)
77
+ and compares the response to an expected response using both lexical similarity (with Levenshtein distance)
78
+ and semantic similarity (with an OpenAI embedding model and cosine similarity comparison).
79
+
80
+ ``` go
81
+ package examples_test
82
+
83
+ import (
84
+ " context"
85
+ " errors"
86
+ " fmt"
87
+ " strings"
88
+ " testing"
89
+
90
+ " github.com/anthropics/anthropic-sdk-go"
91
+ " github.com/google/generative-ai-go/genai"
92
+ " github.com/openai/openai-go"
93
+ " github.com/openai/openai-go/shared"
94
+ " maragu.dev/env"
95
+
96
+ " maragu.dev/llm"
97
+ " maragu.dev/llm/eval"
98
+ )
99
+
100
+ // TestEvalLLMs evaluates different LLMs with the same prompts.
101
+ func TestEvalLLMs (t *testing .T ) {
102
+ _ = env.Load (" ../../.env.test.local" )
103
+
104
+ tests := []struct {
105
+ name string
106
+ prompt func (prompt string ) string
107
+ expected string
108
+ }{
109
+ {
110
+ name: " gpt-4o-mini" ,
111
+ prompt: gpt4oMini,
112
+ expected: " Hello! How can I assist you today?" ,
113
+ },
114
+ {
115
+ name: " gemini-1.5-flash" ,
116
+ prompt: gemini15Flash,
117
+ expected: " Hi there! How can I help you today?" ,
118
+ },
119
+ {
120
+ name: " claude-3.5-haiku" ,
121
+ prompt: claude35Haiku,
122
+ expected: " Hello! How are you doing today? Is there anything I can help you with?" ,
123
+ },
124
+ }
125
+
126
+ for _ , test := range tests {
127
+ eval.Run (t, test.name , func (e *eval.E ) {
128
+ input := " Hi!"
129
+ output := test.prompt (input)
130
+
131
+ sample := eval.Sample {
132
+ Input: input,
133
+ Output: output,
134
+ Expected: test.expected ,
135
+ }
136
+
137
+ result := e.Score (sample, eval.LexicalSimilarityScorer (eval.LevenshteinDistance ))
138
+ e.Log (sample, result)
139
+
140
+ result = e.Score (sample, eval.SemanticSimilarityScorer (&embeddingGetter{}, eval.CosineSimilarity ))
141
+ e.Log (sample, result)
142
+ })
143
+ }
144
+ }
145
+
146
+ func gpt4oMini (prompt string ) string {
147
+ client := llm.NewOpenAIClient (llm.NewOpenAIClientOptions {Key: env.GetStringOrDefault (" OPENAI_KEY" , " " )})
148
+ res , err := client.Client .Chat .Completions .New (context.Background (), openai.ChatCompletionNewParams {
149
+ Messages: openai.F ([]openai.ChatCompletionMessageParamUnion {
150
+ openai.UserMessage (prompt),
151
+ }),
152
+ Model: openai.F (openai.ChatModelGPT4oMini ),
153
+ Temperature: openai.F (0.0 ),
154
+ })
155
+ if err != nil {
156
+ panic (err)
157
+ }
158
+ return res.Choices [0 ].Message .Content
159
+ }
160
+
161
+ func gemini15Flash (prompt string ) string {
162
+ client := llm.NewGoogleClient (llm.NewGoogleClientOptions {Key: env.GetStringOrDefault (" GOOGLE_KEY" , " " )})
163
+ model := client.Client .GenerativeModel (" models/gemini-1.5-flash-latest" )
164
+ var temperature float32 = 0
165
+ model.Temperature = &temperature
166
+ res , err := model.GenerateContent (context.Background (), genai.Text (prompt))
167
+ if err != nil {
168
+ panic (err)
169
+ }
170
+ return strings.TrimSpace (fmt.Sprint (res.Candidates [0 ].Content .Parts [0 ]))
171
+ }
172
+
173
+ func claude35Haiku (prompt string ) string {
174
+ client := llm.NewAnthropicClient (llm.NewAnthropicClientOptions {Key: env.GetStringOrDefault (" ANTHROPIC_KEY" , " " )})
175
+ res , err := client.Client .Messages .New (context.Background (), anthropic.MessageNewParams {
176
+ Messages: anthropic.F ([]anthropic.MessageParam {
177
+ anthropic.NewUserMessage (anthropic.NewTextBlock (prompt)),
178
+ }),
179
+ Model: anthropic.F (anthropic.ModelClaude3_5HaikuLatest ),
180
+ MaxTokens: anthropic.F (int64 (1024 )),
181
+ Temperature: anthropic.F (0.0 ),
182
+ })
183
+ if err != nil {
184
+ panic (err)
185
+ }
186
+ return fmt.Sprint (res.Content [0 ].Text )
187
+ }
188
+
189
+ type embeddingGetter struct {}
190
+
191
+ func (e *embeddingGetter ) GetEmbedding (v string ) ([]float64 , error ) {
192
+ client := llm.NewOpenAIClient (llm.NewOpenAIClientOptions {Key: env.GetStringOrDefault (" OPENAI_KEY" , " " )})
193
+ res , err := client.Client .Embeddings .New (context.Background (), openai.EmbeddingNewParams {
194
+ Input: openai.F [openai.EmbeddingNewParamsInputUnion ](shared.UnionString (v)),
195
+ Model: openai.F (openai.EmbeddingModelTextEmbedding3Small ),
196
+ EncodingFormat: openai.F (openai.EmbeddingNewParamsEncodingFormatFloat ),
197
+ Dimensions: openai.F (int64 (128 )),
198
+ })
199
+ if err != nil {
200
+ return nil , err
201
+ }
202
+ if len (res.Data ) == 0 {
203
+ return nil , errors.New (" no embeddings returned" )
204
+ }
205
+ return res.Data [0 ].Embedding , nil
206
+ }
207
+ ```
0 commit comments