From d7f0b19b7d8b168454c4ad77c12a65ce3956fcd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Thu, 9 Jan 2025 11:34:11 +0100 Subject: [PATCH 01/21] Log evals as JSON Also, test out evals tool in CI. --- .github/workflows/ci.yml | 5 ++++- Makefile | 2 +- eval/run.go | 25 ++++++++++++++++++++++++- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9b70a0a..35a84d7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,11 +57,14 @@ jobs: go-version-file: go.mod check-latest: true + - name: Install evals + run: go install maragu.dev/evals + - name: Get dependencies run: go mod download - name: Evaluate - run: go test -json -run TestEval ./... | jq 'select(.Test != null and .Action == "output" and (.Output | contains("result"))) | del(.Action)' + run: go test -json -run TestEval ./... | evals >> $GITHUB_STEP_SUMMARY lint: name: Lint diff --git a/Makefile b/Makefile index 2d81c81..ffb5730 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ cover: .PHONY: evaluate evaluate: - go test -json -run TestEval ./... | jq 'select(.Test != null and .Action == "output" and (.Output | contains("result"))) | del(.Action)' + @go test -json -run TestEval ./... | evals .PHONY: lint lint: diff --git a/eval/run.go b/eval/run.go index fceb2e9..8f629f3 100644 --- a/eval/run.go +++ b/eval/run.go @@ -1,6 +1,7 @@ package eval import ( + "encoding/json" "os" "strings" "testing" @@ -70,10 +71,32 @@ func (e *E) Score(s Sample, scorer Scorer) Result { return r } +type logLine struct { + Sample Sample + Result Result + Duration time.Duration +} + // Log a [Sample] and [Result]. // This effectively logs the eval name, sample, and result, along with timing information. // TODO include token information? func (e *E) Log(s Sample, r Result) { e.T.Helper() - e.T.Logf("sample=%+v result=%+v duration=%v", s, r, time.Since(e.start)) + + l := logLine{ + Sample: s, + Result: r, + Duration: time.Since(e.start), + } + + e.T.Log(mustJSON(l)) +} + +func mustJSON(l logLine) string { + b, err := json.Marshal(l) + if err != nil { + panic(err) + } + + return string(b) } From 1c85389d9aac262d1603bdc1a5acfe73afdd5557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Thu, 9 Jan 2025 11:40:21 +0100 Subject: [PATCH 02/21] Add tools.go --- go.mod | 1 + go.sum | 2 ++ tools.go | 3 +++ 3 files changed, 6 insertions(+) create mode 100644 tools.go diff --git a/go.mod b/go.mod index 7884f20..9209d93 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/openai/openai-go v0.1.0-alpha.43 google.golang.org/api v0.214.0 maragu.dev/env v0.2.0 + maragu.dev/evals v0.0.0-20250109103910-6c5f7970dee6 maragu.dev/is v0.2.0 ) diff --git a/go.sum b/go.sum index 21cd008..167f688 100644 --- a/go.sum +++ b/go.sum @@ -93,5 +93,7 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= maragu.dev/env v0.2.0 h1:nQKitDEB65ArZsh6E7vxzodOqY9bxEVFdBg+tskS1ys= maragu.dev/env v0.2.0/go.mod h1:t5CCbaEnjCM5mewiAVVzTS4N+oXTus2+SRnzKQbQVME= +maragu.dev/evals v0.0.0-20250109103910-6c5f7970dee6 h1:xjvKOhmzj8zik3/ctTRHelUVoxuMDaXys6gtQa0sJHw= +maragu.dev/evals v0.0.0-20250109103910-6c5f7970dee6/go.mod h1:gnwJ/4RJ7BrVOL1Sz6kqpXATEXlrSEunVKpKn3hiGeA= maragu.dev/is v0.2.0 h1:poeuVEA5GG3vrDpGmzo2KjWtIMZmqUyvGnOB0/pemig= maragu.dev/is v0.2.0/go.mod h1:bviaM5S0fBshCw7wuumFGTju/izopZ/Yvq4g7Klc7y8= diff --git a/tools.go b/tools.go new file mode 100644 index 0000000..2af5bcc --- /dev/null +++ b/tools.go @@ -0,0 +1,3 @@ +package llm + +import _ "maragu.dev/evals/tool" From be8df04940f2f4afb93bf5378988ec13c70b4342 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Thu, 9 Jan 2025 11:40:37 +0100 Subject: [PATCH 03/21] Reorder CI --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 35a84d7..5adc557 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,12 +57,12 @@ jobs: go-version-file: go.mod check-latest: true - - name: Install evals - run: go install maragu.dev/evals - - name: Get dependencies run: go mod download + - name: Install evals + run: go install maragu.dev/evals + - name: Evaluate run: go test -json -run TestEval ./... | evals >> $GITHUB_STEP_SUMMARY From fce368a0eef46d433d3b2c59b82eb5d0a6bfd17b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Thu, 9 Jan 2025 11:57:08 +0100 Subject: [PATCH 04/21] upgrade evals --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 9209d93..a7c9224 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/openai/openai-go v0.1.0-alpha.43 google.golang.org/api v0.214.0 maragu.dev/env v0.2.0 - maragu.dev/evals v0.0.0-20250109103910-6c5f7970dee6 + maragu.dev/evals v0.0.0-20250109105608-6dbec6c971e0 maragu.dev/is v0.2.0 ) diff --git a/go.sum b/go.sum index 167f688..bd637cd 100644 --- a/go.sum +++ b/go.sum @@ -93,7 +93,7 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= maragu.dev/env v0.2.0 h1:nQKitDEB65ArZsh6E7vxzodOqY9bxEVFdBg+tskS1ys= maragu.dev/env v0.2.0/go.mod h1:t5CCbaEnjCM5mewiAVVzTS4N+oXTus2+SRnzKQbQVME= -maragu.dev/evals v0.0.0-20250109103910-6c5f7970dee6 h1:xjvKOhmzj8zik3/ctTRHelUVoxuMDaXys6gtQa0sJHw= -maragu.dev/evals v0.0.0-20250109103910-6c5f7970dee6/go.mod h1:gnwJ/4RJ7BrVOL1Sz6kqpXATEXlrSEunVKpKn3hiGeA= +maragu.dev/evals v0.0.0-20250109105608-6dbec6c971e0 h1:LLeIjk/o/0XWCF/rr9N2iF2NddFrNcsLN6z1xV2ma6w= +maragu.dev/evals v0.0.0-20250109105608-6dbec6c971e0/go.mod h1:gnwJ/4RJ7BrVOL1Sz6kqpXATEXlrSEunVKpKn3hiGeA= maragu.dev/is v0.2.0 h1:poeuVEA5GG3vrDpGmzo2KjWtIMZmqUyvGnOB0/pemig= maragu.dev/is v0.2.0/go.mod h1:bviaM5S0fBshCw7wuumFGTju/izopZ/Yvq4g7Klc7y8= From 37dcdb3f7a0afd25b8c742db88cb5cc92fa3f1e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Thu, 9 Jan 2025 13:33:54 +0100 Subject: [PATCH 05/21] test evals action --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5adc557..14cfef4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,6 +66,9 @@ jobs: - name: Evaluate run: go test -json -run TestEval ./... | evals >> $GITHUB_STEP_SUMMARY + - name: Evals + uses: maragudk/evals-action@main + lint: name: Lint runs-on: ubuntu-latest From ce9c357ba1160b23175426368506aa1a353bc1d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Thu, 9 Jan 2025 15:06:57 +0100 Subject: [PATCH 06/21] Add evals.db as artifact --- .github/workflows/ci.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 14cfef4..10d9f6f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,9 +66,10 @@ jobs: - name: Evaluate run: go test -json -run TestEval ./... | evals >> $GITHUB_STEP_SUMMARY - - name: Evals - uses: maragudk/evals-action@main - + - uses: actions/upload-artifact@v4 + with: + name: evals.db + path: evals.db lint: name: Lint runs-on: ubuntu-latest From 739bc12be431a661ef350c97d43c7d092aba7dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Thu, 9 Jan 2025 15:09:16 +0100 Subject: [PATCH 07/21] evals --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index a7c9224..3b9a468 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/openai/openai-go v0.1.0-alpha.43 google.golang.org/api v0.214.0 maragu.dev/env v0.2.0 - maragu.dev/evals v0.0.0-20250109105608-6dbec6c971e0 + maragu.dev/evals v0.0.0-20250109140843-7d1a2af9fa7b maragu.dev/is v0.2.0 ) diff --git a/go.sum b/go.sum index bd637cd..b72b96a 100644 --- a/go.sum +++ b/go.sum @@ -93,7 +93,7 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= maragu.dev/env v0.2.0 h1:nQKitDEB65ArZsh6E7vxzodOqY9bxEVFdBg+tskS1ys= maragu.dev/env v0.2.0/go.mod h1:t5CCbaEnjCM5mewiAVVzTS4N+oXTus2+SRnzKQbQVME= -maragu.dev/evals v0.0.0-20250109105608-6dbec6c971e0 h1:LLeIjk/o/0XWCF/rr9N2iF2NddFrNcsLN6z1xV2ma6w= -maragu.dev/evals v0.0.0-20250109105608-6dbec6c971e0/go.mod h1:gnwJ/4RJ7BrVOL1Sz6kqpXATEXlrSEunVKpKn3hiGeA= +maragu.dev/evals v0.0.0-20250109140843-7d1a2af9fa7b h1:Lo68WUguMs2LVXYo2RmqgulT5HvWDEYkGYaeTSCv7b8= +maragu.dev/evals v0.0.0-20250109140843-7d1a2af9fa7b/go.mod h1:Qyj56mYe9ApUvas0pVm5M7BbxH5KSyMmifSDFyaUSco= maragu.dev/is v0.2.0 h1:poeuVEA5GG3vrDpGmzo2KjWtIMZmqUyvGnOB0/pemig= maragu.dev/is v0.2.0/go.mod h1:bviaM5S0fBshCw7wuumFGTju/izopZ/Yvq4g7Klc7y8= From 6efdfc23d7c09e6c40b75d000eb725a8b1e07137 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:03:57 +0100 Subject: [PATCH 08/21] gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 011f851..32613f1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /cover.out /.env*.local +evals.db From 0b2c9e918b80ad720566dedb2eebae572d00f01e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:04:08 +0100 Subject: [PATCH 09/21] gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 32613f1..2ff9792 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -/cover.out /.env*.local +/cover.out evals.db From d3fcd6a6564560b03f4fec844c95033c5a5bdfd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:05:53 +0100 Subject: [PATCH 10/21] fix tool transitive dependency --- go.mod | 6 +++++- go.sum | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 3b9a468..f2f2029 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/openai/openai-go v0.1.0-alpha.43 google.golang.org/api v0.214.0 maragu.dev/env v0.2.0 - maragu.dev/evals v0.0.0-20250109140843-7d1a2af9fa7b + maragu.dev/evals v0.0.0-20250110090454-18cd820491c8 maragu.dev/is v0.2.0 ) @@ -27,6 +27,8 @@ require ( github.com/google/uuid v1.6.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect github.com/googleapis/gax-go/v2 v2.14.0 // indirect + github.com/jmoiron/sqlx v1.4.0 // indirect + github.com/mattn/go-sqlite3 v1.14.24 // indirect github.com/tidwall/gjson v1.14.4 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect @@ -47,4 +49,6 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 // indirect google.golang.org/grpc v1.67.1 // indirect google.golang.org/protobuf v1.35.2 // indirect + maragu.dev/errors v0.3.0 // indirect + maragu.dev/migrate v0.6.0 // indirect ) diff --git a/go.sum b/go.sum index b72b96a..7389a31 100644 --- a/go.sum +++ b/go.sum @@ -10,6 +10,8 @@ cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4 cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg= cloud.google.com/go/longrunning v0.5.7 h1:WLbHekDbjK1fVFD3ibpFFVoyizlLRl73I7YKuAKilhU= cloud.google.com/go/longrunning v0.5.7/go.mod h1:8GClkudohy1Fxm3owmBGid8W0pSgodEMwEAztp38Xng= +filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= +filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= github.com/agnivade/levenshtein v1.2.0 h1:U9L4IOT0Y3i0TIlUIDJ7rVUziKi/zPbrJGaFrtYH3SY= github.com/agnivade/levenshtein v1.2.0/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU= github.com/anthropics/anthropic-sdk-go v0.2.0-alpha.8 h1:ss/c/eeyILgoK2sMsTJdcdLdhY3wZSt//+nanM41B9w= @@ -27,6 +29,8 @@ github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y= +github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= github.com/google/generative-ai-go v0.19.0 h1:R71szggh8wHMCUlEMsW2A/3T+5LdEIkiaHSYgSpUgdg= github.com/google/generative-ai-go v0.19.0/go.mod h1:JYolL13VG7j79kM5BtHz4qwONHkeJQzOCkKXnpqtS/E= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= @@ -39,6 +43,29 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gT github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= github.com/googleapis/gax-go/v2 v2.14.0 h1:f+jMrjBPl+DL9nI4IQzLUxMq7XrAqFYB7hBPqMNIe8o= github.com/googleapis/gax-go/v2 v2.14.0/go.mod h1:lhBCnjdLrWRaPvLWhmc8IS24m9mr07qSYnHncrgo+zk= +github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8= +github.com/jackc/chunkreader/v2 v2.0.1/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk= +github.com/jackc/pgconn v1.14.3 h1:bVoTr12EGANZz66nZPkMInAV/KHD2TxH9npjXXgiB3w= +github.com/jackc/pgconn v1.14.3/go.mod h1:RZbme4uasqzybK2RK5c65VsHxoyaml09lx3tXOcO/VM= +github.com/jackc/pgio v1.0.0 h1:g12B9UwVnzGhueNavwioyEEpAmqMe1E/BN9ES+8ovkE= +github.com/jackc/pgio v1.0.0/go.mod h1:oP+2QK2wFfUWgr+gxjoBH9KGBb31Eio69xUb0w5bYf8= +github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= +github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= +github.com/jackc/pgproto3/v2 v2.3.3 h1:1HLSx5H+tXR9pW3in3zaztoEwQYRC9SQaYUHjTSUOag= +github.com/jackc/pgproto3/v2 v2.3.3/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= +github.com/jackc/pgtype v1.14.0 h1:y+xUdabmyMkJLyApYuPj38mW+aAIqCe5uuBB51rH3Vw= +github.com/jackc/pgtype v1.14.0/go.mod h1:LUMuVrfsFfdKGLw+AFFVv6KtHOFMwRgDDzBt76IqCA4= +github.com/jackc/pgx/v4 v4.18.2 h1:xVpYkNR5pk5bMCZGfClbO962UIqVABcAGt7ha1s/FeU= +github.com/jackc/pgx/v4 v4.18.2/go.mod h1:Ey4Oru5tH5sB6tV7hDmfWFahwF15Eb7DNXlRKx2CkVw= +github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= +github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY= +github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM= +github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/openai/openai-go v0.1.0-alpha.43 h1:6XWGUsrHSaPyh8U6ocs/XJGb/UX7jhQRK2bYefvTuAg= github.com/openai/openai-go v0.1.0-alpha.43/go.mod h1:3SdE6BffOX9HPEQv8IL/fi3LYZ5TUpRYaqGQZbyk11A= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -93,7 +120,11 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= maragu.dev/env v0.2.0 h1:nQKitDEB65ArZsh6E7vxzodOqY9bxEVFdBg+tskS1ys= maragu.dev/env v0.2.0/go.mod h1:t5CCbaEnjCM5mewiAVVzTS4N+oXTus2+SRnzKQbQVME= -maragu.dev/evals v0.0.0-20250109140843-7d1a2af9fa7b h1:Lo68WUguMs2LVXYo2RmqgulT5HvWDEYkGYaeTSCv7b8= -maragu.dev/evals v0.0.0-20250109140843-7d1a2af9fa7b/go.mod h1:Qyj56mYe9ApUvas0pVm5M7BbxH5KSyMmifSDFyaUSco= +maragu.dev/errors v0.3.0 h1:huI+n+ddMfVgQFD+cEqIPaozUlfz3TkfgpkssNip5G0= +maragu.dev/errors v0.3.0/go.mod h1:cygLiyNnq4ofF3whYscilo2ecUADCaUQXwvwFrMOhmM= +maragu.dev/evals v0.0.0-20250110090454-18cd820491c8 h1:04jcfKTCUUW0iU1v6leNl3nWmE/w3l/es1niEjX8JRs= +maragu.dev/evals v0.0.0-20250110090454-18cd820491c8/go.mod h1:Qyj56mYe9ApUvas0pVm5M7BbxH5KSyMmifSDFyaUSco= maragu.dev/is v0.2.0 h1:poeuVEA5GG3vrDpGmzo2KjWtIMZmqUyvGnOB0/pemig= maragu.dev/is v0.2.0/go.mod h1:bviaM5S0fBshCw7wuumFGTju/izopZ/Yvq4g7Klc7y8= +maragu.dev/migrate v0.6.0 h1:gJLAIVaRh9z9sN55Q2sWwScpEH+JsT6N0L1DnzedXFE= +maragu.dev/migrate v0.6.0/go.mod h1:TdZBD5wRvBbzLocsSV08kyvLiLCn0Q6DvgYHmyygWVQ= From 739d826670571227067e0274038242dd861b443e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:21:52 +0100 Subject: [PATCH 11/21] also add comment to PR --- .github/workflows/ci.yml | 15 +++++++++++++++ go.mod | 2 +- go.sum | 2 ++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 10d9f6f..55b8a2c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,10 +66,25 @@ jobs: - name: Evaluate run: go test -json -run TestEval ./... | evals >> $GITHUB_STEP_SUMMARY + - name: Add evals comment to PR + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs') + const summary = fs.readFileSync(process.env.GITHUB_STEP_SUMMARY, 'utf8') + + github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: summary + }) + - uses: actions/upload-artifact@v4 with: name: evals.db path: evals.db + if-no-files-found: error lint: name: Lint runs-on: ubuntu-latest diff --git a/go.mod b/go.mod index f2f2029..3f5968b 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/openai/openai-go v0.1.0-alpha.43 google.golang.org/api v0.214.0 maragu.dev/env v0.2.0 - maragu.dev/evals v0.0.0-20250110090454-18cd820491c8 + maragu.dev/evals v0.0.0-20250110091058-dfafc4ece961 maragu.dev/is v0.2.0 ) diff --git a/go.sum b/go.sum index 7389a31..b180f2a 100644 --- a/go.sum +++ b/go.sum @@ -124,6 +124,8 @@ maragu.dev/errors v0.3.0 h1:huI+n+ddMfVgQFD+cEqIPaozUlfz3TkfgpkssNip5G0= maragu.dev/errors v0.3.0/go.mod h1:cygLiyNnq4ofF3whYscilo2ecUADCaUQXwvwFrMOhmM= maragu.dev/evals v0.0.0-20250110090454-18cd820491c8 h1:04jcfKTCUUW0iU1v6leNl3nWmE/w3l/es1niEjX8JRs= maragu.dev/evals v0.0.0-20250110090454-18cd820491c8/go.mod h1:Qyj56mYe9ApUvas0pVm5M7BbxH5KSyMmifSDFyaUSco= +maragu.dev/evals v0.0.0-20250110091058-dfafc4ece961 h1:H7iZNzGQ7RZ+Se/OqiKsj4bCFUPPWhauk8iY8HtjwhQ= +maragu.dev/evals v0.0.0-20250110091058-dfafc4ece961/go.mod h1:Qyj56mYe9ApUvas0pVm5M7BbxH5KSyMmifSDFyaUSco= maragu.dev/is v0.2.0 h1:poeuVEA5GG3vrDpGmzo2KjWtIMZmqUyvGnOB0/pemig= maragu.dev/is v0.2.0/go.mod h1:bviaM5S0fBshCw7wuumFGTju/izopZ/Yvq4g7Klc7y8= maragu.dev/migrate v0.6.0 h1:gJLAIVaRh9z9sN55Q2sWwScpEH+JsT6N0L1DnzedXFE= From a70eb5d4c4846fa867733b1d1d43ec7277597fda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:29:32 +0100 Subject: [PATCH 12/21] debug --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 55b8a2c..bcd999e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,6 +66,10 @@ jobs: - name: Evaluate run: go test -json -run TestEval ./... | evals >> $GITHUB_STEP_SUMMARY + - run: echo $GITHUB_STEP_SUMMARY + + - run: cat $GITHUB_STEP_SUMMARY + - name: Add evals comment to PR uses: actions/github-script@v7 with: From 5002ccd3fa5759cf38d66294c0f58a73da50ef6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:33:34 +0100 Subject: [PATCH 13/21] fix? --- .github/workflows/ci.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bcd999e..6fe6a4b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,6 +57,11 @@ jobs: go-version-file: go.mod check-latest: true + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: "20" + - name: Get dependencies run: go mod download @@ -66,10 +71,6 @@ jobs: - name: Evaluate run: go test -json -run TestEval ./... | evals >> $GITHUB_STEP_SUMMARY - - run: echo $GITHUB_STEP_SUMMARY - - - run: cat $GITHUB_STEP_SUMMARY - - name: Add evals comment to PR uses: actions/github-script@v7 with: From 997a0892f0fbabc76c9bf5116c3e511fcfa5a368 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:36:44 +0100 Subject: [PATCH 14/21] log --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6fe6a4b..e9e6c38 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -78,6 +78,8 @@ jobs: const fs = require('fs') const summary = fs.readFileSync(process.env.GITHUB_STEP_SUMMARY, 'utf8') + console.log(summary) + github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, From 0f9a2ec85fd907db2fc040fa2b53a0b9c43d7cb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:40:52 +0100 Subject: [PATCH 15/21] fix --- .github/workflows/ci.yml | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e9e6c38..351f2aa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,11 +57,6 @@ jobs: go-version-file: go.mod check-latest: true - - name: Setup Node - uses: actions/setup-node@v4 - with: - node-version: "20" - - name: Get dependencies run: go mod download @@ -71,20 +66,19 @@ jobs: - name: Evaluate run: go test -json -run TestEval ./... | evals >> $GITHUB_STEP_SUMMARY + - name: Read $GITHUB_STEP_SUMMARY file + id: getsummary + run: echo "summary=$(cat $GITHUB_STEP_SUMMARY)" >> $GITHUB_OUTPUT + - name: Add evals comment to PR uses: actions/github-script@v7 with: script: | - const fs = require('fs') - const summary = fs.readFileSync(process.env.GITHUB_STEP_SUMMARY, 'utf8') - - console.log(summary) - github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, - body: summary + body: ${{ steps.getsummary.outputs.summary }} }) - uses: actions/upload-artifact@v4 From ddb66549bbc21cb13d868ebe9bc368d04291b462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:42:28 +0100 Subject: [PATCH 16/21] quotes? --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 351f2aa..bf881c7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -78,7 +78,7 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, - body: ${{ steps.getsummary.outputs.summary }} + body: "${{ steps.getsummary.outputs.summary }}" }) - uses: actions/upload-artifact@v4 From 5e9d7f435dcd9bf192395c3823c37cdc2de68d42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:45:20 +0100 Subject: [PATCH 17/21] =?UTF-8?q?=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/ci.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bf881c7..c975c60 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,7 +68,12 @@ jobs: - name: Read $GITHUB_STEP_SUMMARY file id: getsummary - run: echo "summary=$(cat $GITHUB_STEP_SUMMARY)" >> $GITHUB_OUTPUT + run: | + content=$(cat $GITHUB_STEP_SUMMARY) + content="${content//'%'/'%25'}" + content="${content//$'\n'/'%0A'}" + content="${content//$'\r'/'%0D'}" + echo "summary=$content" >> $GITHUB_OUTPUT - name: Add evals comment to PR uses: actions/github-script@v7 @@ -78,7 +83,7 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, - body: "${{ steps.getsummary.outputs.summary }}" + body: `${{ steps.getsummary.outputs.summary }}` }) - uses: actions/upload-artifact@v4 From c518e5387e5260d85a6db0efaab6ca559e67c4e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:49:25 +0100 Subject: [PATCH 18/21] remove --- .github/workflows/ci.yml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c975c60..f6341ce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,15 +66,6 @@ jobs: - name: Evaluate run: go test -json -run TestEval ./... | evals >> $GITHUB_STEP_SUMMARY - - name: Read $GITHUB_STEP_SUMMARY file - id: getsummary - run: | - content=$(cat $GITHUB_STEP_SUMMARY) - content="${content//'%'/'%25'}" - content="${content//$'\n'/'%0A'}" - content="${content//$'\r'/'%0D'}" - echo "summary=$content" >> $GITHUB_OUTPUT - - name: Add evals comment to PR uses: actions/github-script@v7 with: @@ -83,7 +74,7 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, - body: `${{ steps.getsummary.outputs.summary }}` + body: `Meh` }) - uses: actions/upload-artifact@v4 From e5eff8f4446b826f7e5214bba7dd3232b49925cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:52:20 +0100 Subject: [PATCH 19/21] fix? --- .github/workflows/ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f6341ce..a1fe852 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -70,11 +70,13 @@ jobs: uses: actions/github-script@v7 with: script: | + const fs = require('fs') + const summary = fs.readFileSync(`${process.env.GITHUB_STEP_SUMMARY}`, 'utf8') github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, - body: `Meh` + body: `Evals:\n\n${summary}` }) - uses: actions/upload-artifact@v4 From a93b4e8303bb6618dfe9a851e8ba3c8367c8fbd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:54:07 +0100 Subject: [PATCH 20/21] again --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a1fe852..765f5f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,19 +64,19 @@ jobs: run: go install maragu.dev/evals - name: Evaluate - run: go test -json -run TestEval ./... | evals >> $GITHUB_STEP_SUMMARY + run: go test -json -run TestEval ./... | evals | tee evals.txt >> $GITHUB_STEP_SUMMARY - name: Add evals comment to PR uses: actions/github-script@v7 with: script: | const fs = require('fs') - const summary = fs.readFileSync(`${process.env.GITHUB_STEP_SUMMARY}`, 'utf8') + const summary = fs.readFileSync('evals.txt', 'utf8') github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, - body: `Evals:\n\n${summary}` + body: summary }) - uses: actions/upload-artifact@v4 From 4345f9f10bb7df5d6cd8527b80647c6dd17debf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20W=C3=BCstenberg?= Date: Fri, 10 Jan 2025 10:57:25 +0100 Subject: [PATCH 21/21] link --- .github/workflows/ci.yml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 765f5f1..c408d06 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,24 +66,26 @@ jobs: - name: Evaluate run: go test -json -run TestEval ./... | evals | tee evals.txt >> $GITHUB_STEP_SUMMARY + - uses: actions/upload-artifact@v4 + id: evalsdb + with: + name: evals.db + path: evals.db + if-no-files-found: error + - name: Add evals comment to PR uses: actions/github-script@v7 with: script: | const fs = require('fs') - const summary = fs.readFileSync('evals.txt', 'utf8') + const table = fs.readFileSync('evals.txt', 'utf8') github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, - body: summary + body: `# Evals\n\n${table}\n\n[Download evals.db](${{ steps.evalsdb.outputs.artifact-url }})` }) - - uses: actions/upload-artifact@v4 - with: - name: evals.db - path: evals.db - if-no-files-found: error lint: name: Lint runs-on: ubuntu-latest