From dc9dde78a9e1a322e4111fb5bcec5e334733f525 Mon Sep 17 00:00:00 2001
From: Juan Arboleda <35846576+alzeck@users.noreply.github.com>
Date: Thu, 27 Nov 2025 18:04:56 +0000
Subject: [PATCH 1/5] add support for evals

---
 lib/openai.rb                                 |    1 +
 lib/openai/client.rb                          |    4 +
 lib/openai/evals.rb                           |   71 +
 spec/fixtures/cassettes/evals_create.yml      |  277 +
 spec/fixtures/cassettes/evals_delete.yml      |   74 +
 .../fixtures/cassettes/evals_delete_setup.yml |  277 +
 spec/fixtures/cassettes/evals_list.yml        | 5411 +++++++++++++++++
 spec/fixtures/cassettes/evals_list_setup.yml  |  277 +
 spec/fixtures/cassettes/evals_retrieve.yml    |  274 +
 .../cassettes/evals_retrieve_setup.yml        |  277 +
 spec/fixtures/cassettes/evals_runs_cancel.yml |  125 +
 .../cassettes/evals_runs_cancel_run_setup.yml |  125 +
 .../cassettes/evals_runs_cancel_setup.yml     |  277 +
 spec/fixtures/cassettes/evals_runs_create.yml |  125 +
 .../cassettes/evals_runs_create_setup.yml     |  277 +
 spec/fixtures/cassettes/evals_runs_delete.yml |  196 +
 .../cassettes/evals_runs_delete_run_setup.yml |  125 +
 .../cassettes/evals_runs_delete_setup.yml     |  277 +
 .../evals_runs_output_items_list.yml          |   76 +
 ...ns_output_items_list_output_item_setup.yml |   76 +
 ...evals_runs_output_items_list_run_setup.yml |  125 +
 .../evals_runs_output_items_list_setup.yml    |  277 +
 .../evals_runs_output_items_retrieve.yml      |  303 +
 ...s_runs_output_items_retrieve_run_setup.yml |  125 +
 ...evals_runs_output_items_retrieve_setup.yml |  277 +
 .../cassettes/evals_runs_retrieve.yml         |  123 +
 .../evals_runs_retrieve_run_setup.yml         |  125 +
 .../cassettes/evals_runs_retrieve_setup.yml   |  277 +
 spec/fixtures/cassettes/evals_update.yml      |  276 +
 .../fixtures/cassettes/evals_update_setup.yml |  277 +
 spec/openai/client/evals_spec.rb              |  274 +
 31 files changed, 11081 insertions(+)
 create mode 100644 lib/openai/evals.rb
 create mode 100644 spec/fixtures/cassettes/evals_create.yml
 create mode 100644 spec/fixtures/cassettes/evals_delete.yml
 create mode 100644 spec/fixtures/cassettes/evals_delete_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_list.yml
 create mode 100644 spec/fixtures/cassettes/evals_list_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_retrieve.yml
 create mode 100644 spec/fixtures/cassettes/evals_retrieve_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_cancel.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_cancel_run_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_cancel_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_create.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_create_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_delete.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_delete_run_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_delete_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_output_items_list.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_output_items_list_output_item_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_output_items_list_run_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_output_items_list_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_output_items_retrieve.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_output_items_retrieve_run_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_output_items_retrieve_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_retrieve.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_retrieve_run_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_retrieve_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_update.yml
 create mode 100644 spec/fixtures/cassettes/evals_update_setup.yml
 create mode 100644 spec/openai/client/evals_spec.rb

diff --git a/lib/openai.rb b/lib/openai.rb
index d5880c90..9128bfaf 100644
--- a/lib/openai.rb
+++ b/lib/openai.rb
@@ -22,6 +22,7 @@
 require_relative "openai/batches"
 require_relative "openai/usage"
 require_relative "openai/conversations"
+require_relative "openai/evals"
 
 module OpenAI
   class Error < StandardError; end
diff --git a/lib/openai/client.rb b/lib/openai/client.rb
index 6054af0f..c9127861 100644
--- a/lib/openai/client.rb
+++ b/lib/openai/client.rb
@@ -109,6 +109,10 @@ def conversations
       @conversations ||= OpenAI::Conversations.new(client: self)
     end
 
+    def evals
+      @evals ||= OpenAI::Evals.new(client: self)
+    end
+
     def azure?
       @api_type&.to_sym == :azure
     end
diff --git a/lib/openai/evals.rb b/lib/openai/evals.rb
new file mode 100644
index 00000000..bbbbdb95
--- /dev/null
+++ b/lib/openai/evals.rb
@@ -0,0 +1,71 @@
+module OpenAI
+  class Evals
+    def initialize(client:)
+      @client = client
+    end
+
+    def create(parameters: {})
+      @client.json_post(path: "/evals", parameters: parameters)
+    end
+
+    def retrieve(id:)
+      @client.get(path: "/evals/#{id}")
+    end
+
+    def update(id:, parameters: {})
+      @client.json_post(path: "/evals/#{id}", parameters: parameters)
+    end
+
+    def delete(id:)
+      @client.delete(path: "/evals/#{id}")
+    end
+
+    def list(parameters: {})
+      @client.get(path: "/evals", parameters: parameters)
+    end
+
+    def runs
+      @runs ||= Runs.new(client: @client)
+    end
+
+    class Runs
+      def initialize(client:)
+        @client = client
+      end
+
+      def create(eval_id:, parameters: {})
+        @client.json_post(path: "/evals/#{eval_id}/runs", parameters: parameters)
+      end
+
+      def retrieve(eval_id:, id:)
+        @client.get(path: "/evals/#{eval_id}/runs/#{id}")
+      end
+
+      def cancel(eval_id:, id:)
+        @client.post(path: "/evals/#{eval_id}/runs/#{id}/cancel")
+      end
+
+      def delete(eval_id:, id:)
+        @client.delete(path: "/evals/#{eval_id}/runs/#{id}")
+      end
+
+      def output_items
+        @output_items ||= OutputItems.new(client: @client)
+      end
+
+      class OutputItems
+        def initialize(client:)
+          @client = client
+        end
+
+        def list(eval_id:, run_id:, parameters: {})
+          @client.get(path: "/evals/#{eval_id}/runs/#{run_id}/output_items", parameters: parameters)
+        end
+
+        def retrieve(eval_id:, run_id:, id:)
+          @client.get(path: "/evals/#{eval_id}/runs/#{run_id}/output_items/#{id}")
+        end
+      end
+    end
+  end
+end
diff --git a/spec/fixtures/cassettes/evals_create.yml b/spec/fixtures/cassettes/evals_create.yml
new file mode 100644
index 00000000..919537ef
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_create.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:43 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_ffb92a5fdd9ce2220567f011a43dba2c
+      Openai-Processing-Ms:
+      - '555'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '557'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=G4GcoNWM5rT0kajORKFsYNgcXAz8HMsb5rZ8Zzkw_LI-1764265063-1.0.1.1-Dp9PBpRTcl2U.a5OAYevFIabxr3OC6hE7.9O3KIiWz6tB1_9SQ1VMKna9wP6_3b8KkVh3uQtSLsQ0_BXDzRXOfZyUuFUaoabN67UbMP4q64;
+        path=/; expires=Thu, 27-Nov-25 18:07:43 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=x0rOeErtaRsMGn4UZqdIWs3b9skWEly1bRr442iHDDE-1764265063034-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53651eddc83c84-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c66d7bc8191a3a478e4e5b174f7",
+          "object": "eval",
+          "created_at": 1764265062,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-8c5ece96-5e5d-4051-87a1-9af9a668be70",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:43 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_delete.yml b/spec/fixtures/cassettes/evals_delete.yml
new file mode 100644
index 00000000..4760d4fd
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_delete.yml
@@ -0,0 +1,74 @@
+---
+http_interactions:
+- request:
+    method: delete
+    uri: https://api.openai.com/v1/evals/eval_69288c753da081918baccc440dfb3cea
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:57 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_5b2c7c66080f297227416bf472b45866
+      Openai-Processing-Ms:
+      - '267'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '269'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=FDivFwBSqSQaDjz.Sv2hSLAw1c.5lyvVl0nLrxBNXHo-1764265077-1.0.1.1-KkM8oLJRZq5PU2QFlnlZFTTWvrS77ZQkSbdDyJTCxO_5sOqJZ8VB8KXJw4lN0JxBa06O3AaCfEBncyzc78mA7scuegGDTEebpLCiXE4iF3U;
+        path=/; expires=Thu, 27-Nov-25 18:07:57 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=I7CzEHguum6GIZRgNj0yySn3bEm20mjveBzKgnyKSA0-1764265077911-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53657e4af94d65-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "eval.deleted",
+          "deleted": true,
+          "eval_id": "eval_69288c753da081918baccc440dfb3cea"
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:57 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_delete_setup.yml b/spec/fixtures/cassettes/evals_delete_setup.yml
new file mode 100644
index 00000000..2afd5f32
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_delete_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:57 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_a48649a5da6f373a7dd1c03ad7727a2d
+      Openai-Processing-Ms:
+      - '181'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '184'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=gaAv6K1dIpaWENMBQCC5oWFC7WYA15BUV5ufk1SWx_Q-1764265077-1.0.1.1-_Rc0A7giRZmUc2b5hF6pZCpHFLyZscP93icF_6Lp9NDWaJhiafMjpahB2ETzDa9c6vS1QBMbHTvgr04kIRJZpwg7DX8lHQ4Mwbm6d5z6S7E;
+        path=/; expires=Thu, 27-Nov-25 18:07:57 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=1rBJq8YPuzzVcpl.U_aK0HikFl_72LTQ.QmqttbpqPk-1764265077383-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53657b9e2fcc9e-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c753da081918baccc440dfb3cea",
+          "object": "eval",
+          "created_at": 1764265077,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-a4525dea-03ff-497b-bdd4-7d1d1bdbb3d1",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:57 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_list.yml b/spec/fixtures/cassettes/evals_list.yml
new file mode 100644
index 00000000..54873fc8
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_list.yml
@@ -0,0 +1,5411 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:45 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_09908134b92384fc64c2e1a044fc1b8f
+      Openai-Processing-Ms:
+      - '349'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '365'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=a9lHm_ooC54LUqxpoUst1vazPY71OQaHdrCBcnTLJ8Y-1764265065-1.0.1.1-PvPVksJlQfOKII3YuqWv76rdDKlmg3Af7t.kcEILylSWKihcvR2SUFc.At3ilkSNU3DxtN1PWnAFTzSeGiuIxObyz7ifqWs2aR6jOudJDM0;
+        path=/; expires=Thu, 27-Nov-25 18:07:45 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=7IH9dxNsKoIkP3ir_C4yjxfvA8TzZJyRidP6c.KKy7w-1764265065684-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365315dbb97f8-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "list",
+          "data": [
+            {
+              "id": "eval_692886ca71948191a94a46ef2866fa38",
+              "object": "eval",
+              "created_at": 1764263626,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-9ec23aee-5784-4532-b734-4eaa1441c1b4",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_692886c81f0c81918301673fd48074e3",
+              "object": "eval",
+              "created_at": 1764263624,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-b605af03-d2b3-4219-aabc-bdbbd02864c3",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_692886c607dc819198c3b71cbf375088",
+              "object": "eval",
+              "created_at": 1764263622,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-5d95bb83-b098-4c8c-9ccc-1f327b7c680f",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_692886c4402c8191b16daf0a13927d55",
+              "object": "eval",
+              "created_at": 1764263620,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-a5f01088-9cd6-484c-b16c-4239b8ce247f",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_692886c286c08191a59229de96f2519c",
+              "object": "eval",
+              "created_at": 1764263618,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-6ab333ba-8b63-4868-ba3d-8444b9f0b788",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_692886c0c2e88191b175224e13418b17",
+              "object": "eval",
+              "created_at": 1764263616,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-ea13e537-3285-4116-87ea-d26232d4e433",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_692886bf65fc8191991a4848d677e502",
+              "object": "eval",
+              "created_at": 1764263615,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-ecffb0f6-e04c-470b-a089-71512d3f7e2e",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_692886be83748191aa1ecb05c5db958e",
+              "object": "eval",
+              "created_at": 1764263614,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-ec2a502e-e03d-4b4e-920f-c9996764023a",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {
+                "modified": "true"
+              }
+            },
+            {
+              "id": "eval_692886bd90d8819198391114178d4134",
+              "object": "eval",
+              "created_at": 1764263613,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-a8d9c70b-5062-40ad-b882-6d7d2b4835d7",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_692886bc33dc81919ec1696f9e931ff4",
+              "object": "eval",
+              "created_at": 1764263612,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-1bbd590e-ef4f-462e-a050-094b3925482c",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_69288535779c8191a0bed3405c7a72ab",
+              "object": "eval",
+              "created_at": 1764263221,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-9f9e21ad-adcd-4f8e-b359-b9aa6a34a974",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_692885332d10819180c74603e6462a4c",
+              "object": "eval",
+              "created_at": 1764263219,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-3b0cec65-bd94-41a5-a67c-894300044c4a",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_69288530792c819181f87315e2cf7a98",
+              "object": "eval",
+              "created_at": 1764263216,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-8d9f1ee8-2ca7-4ee7-9775-84fd81f188ac",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_6928852eacfc8191939c094b7bf768a3",
+              "object": "eval",
+              "created_at": 1764263214,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-261fabf9-7bb5-497b-8103-a989a3d5780a",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_6928852cfc9c81918d7e5fee3762e782",
+              "object": "eval",
+              "created_at": 1764263212,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-6b2951b4-82b6-4471-b494-2f362485c8ba",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_6928852ae6588191801b3c819b2ec864",
+              "object": "eval",
+              "created_at": 1764263210,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-27b6b470-b897-47a2-84a5-bae598fe8475",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_692885294e408191883f13c67b96cc77",
+              "object": "eval",
+              "created_at": 1764263209,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-724f7a30-b74b-49d5-8fbc-abe93f386347",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_69288527f2548191a4a1550aa8ae4962",
+              "object": "eval",
+              "created_at": 1764263207,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-0abe0176-b62b-4dd6-96a1-3f30623a53f4",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {
+                "modified": "true"
+              }
+            },
+            {
+              "id": "eval_692885254d4c8191b44f0366019b69c4",
+              "object": "eval",
+              "created_at": 1764263205,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-d2a3c749-e56a-425d-8adf-8ec9ae2d2b45",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            },
+            {
+              "id": "eval_69288313e4408191922bf1863f2ba432",
+              "object": "eval",
+              "created_at": 1764262675,
+              "data_source_config": {
+                "type": "logs",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input",
+                        "output"
+                      ],
+                      "title": "LogsItemSchema",
+                      "properties": {
+                        "output": {
+                          "items": {
+                            "required": [
+                              "model",
+                              "output"
+                            ],
+                            "title": "ResponseInputSample",
+                            "properties": {
+                              "model": {
+                                "title": "Model",
+                                "type": "string"
+                              },
+                              "output": {
+                                "items": {
+                                  "required": [
+                                    "role",
+                                    "content"
+                                  ],
+                                  "title": "ChatMessage",
+                                  "properties": {
+                                    "tool_call_id": {
+                                      "default": null,
+                                      "title": "Tool Call Id",
+                                      "anyOf": [
+                                        {
+                                          "type": "string"
+                                        },
+                                        {
+                                          "type": "null"
+                                        }
+                                      ]
+                                    },
+                                    "finish_reason": {
+                                      "default": null,
+                                      "title": "Finish Reason",
+                                      "anyOf": [
+                                        {
+                                          "enum": [
+                                            "stop",
+                                            "length",
+                                            "tool_calls",
+                                            "content_filter",
+                                            "function_call"
+                                          ],
+                                          "type": "string"
+                                        },
+                                        {
+                                          "type": "null"
+                                        }
+                                      ]
+                                    },
+                                    "content": {
+                                      "title": "Content",
+                                      "anyOf": [
+                                        {
+                                          "type": "string"
+                                        },
+                                        {
+                                          "items": {
+                                            "anyOf": [
+                                              {
+                                                "required": [
+                                                  "text",
+                                                  "type"
+                                                ],
+                                                "additionalProperties": true,
+                                                "title": "ResponseInputText",
+                                                "properties": {
+                                                  "text": {
+                                                    "title": "Text",
+                                                    "type": "string"
+                                                  },
+                                                  "type": {
+                                                    "const": "input_text",
+                                                    "title": "Type",
+                                                    "type": "string"
+                                                  }
+                                                },
+                                                "type": "object"
+                                              },
+                                              {
+                                                "required": [
+                                                  "detail",
+                                                  "type"
+                                                ],
+                                                "additionalProperties": true,
+                                                "title": "ResponseInputImage",
+                                                "properties": {
+                                                  "file_id": {
+                                                    "default": null,
+                                                    "title": "File Id",
+                                                    "anyOf": [
+                                                      {
+                                                        "type": "string"
+                                                      },
+                                                      {
+                                                        "type": "null"
+                                                      }
+                                                    ]
+                                                  },
+                                                  "detail": {
+                                                    "title": "Detail",
+                                                    "enum": [
+                                                      "low",
+                                                      "high",
+                                                      "auto"
+                                                    ],
+                                                    "type": "string"
+                                                  },
+                                                  "type": {
+                                                    "const": "input_image",
+                                                    "title": "Type",
+                                                    "type": "string"
+                                                  },
+                                                  "image_url": {
+                                                    "default": null,
+                                                    "title": "Image Url",
+                                                    "anyOf": [
+                                                      {
+                                                        "type": "string"
+                                                      },
+                                                      {
+                                                        "type": "null"
+                                                      }
+                                                    ]
+                                                  }
+                                                },
+                                                "type": "object"
+                                              },
+                                              {
+                                                "required": [
+                                                  "annotations",
+                                                  "text",
+                                                  "type"
+                                                ],
+                                                "additionalProperties": true,
+                                                "title": "ResponseOutputText",
+                                                "properties": {
+                                                  "text": {
+                                                    "title": "Text",
+                                                    "type": "string"
+                                                  },
+                                                  "type": {
+                                                    "const": "output_text",
+                                                    "title": "Type",
+                                                    "type": "string"
+                                                  },
+                                                  "logprobs": {
+                                                    "default": null,
+                                                    "title": "Logprobs",
+                                                    "anyOf": [
+                                                      {
+                                                        "items": {
+                                                          "required": [
+                                                            "token",
+                                                            "bytes",
+                                                            "logprob",
+                                                            "top_logprobs"
+                                                          ],
+                                                          "additionalProperties": true,
+                                                          "title": "Logprob",
+                                                          "properties": {
+                                                            "bytes": {
+                                                              "items": {
+                                                                "type": "integer"
+                                                              },
+                                                              "title": "Bytes",
+                                                              "type": "array"
+                                                            },
+                                                            "token": {
+                                                              "title": "Token",
+                                                              "type": "string"
+                                                            },
+                                                            "top_logprobs": {
+                                                              "items": {
+                                                                "required": [
+                                                                  "token",
+                                                                  "bytes",
+                                                                  "logprob"
+                                                                ],
+                                                                "additionalProperties": true,
+                                                                "title": "LogprobTopLogprob",
+                                                                "properties": {
+                                                                  "bytes": {
+                                                                    "items": {
+                                                                      "type": "integer"
+                                                                    },
+                                                                    "title": "Bytes",
+                                                                    "type": "array"
+                                                                  },
+                                                                  "token": {
+                                                                    "title": "Token",
+                                                                    "type": "string"
+                                                                  },
+                                                                  "logprob": {
+                                                                    "title": "Logprob",
+                                                                    "type": "number"
+                                                                  }
+                                                                },
+                                                                "type": "object"
+                                                              },
+                                                              "title": "Top Logprobs",
+                                                              "type": "array"
+                                                            },
+                                                            "logprob": {
+                                                              "title": "Logprob",
+                                                              "type": "number"
+                                                            }
+                                                          },
+                                                          "type": "object"
+                                                        },
+                                                        "type": "array"
+                                                      },
+                                                      {
+                                                        "type": "null"
+                                                      }
+                                                    ]
+                                                  },
+                                                  "annotations": {
+                                                    "items": {
+                                                      "anyOf": [
+                                                        {
+                                                          "required": [
+                                                            "file_id",
+                                                            "filename",
+                                                            "index",
+                                                            "type"
+                                                          ],
+                                                          "additionalProperties": true,
+                                                          "title": "AnnotationFileCitation",
+                                                          "properties": {
+                                                            "file_id": {
+                                                              "title": "File Id",
+                                                              "type": "string"
+                                                            },
+                                                            "index": {
+                                                              "title": "Index",
+                                                              "type": "integer"
+                                                            },
+                                                            "type": {
+                                                              "const": "file_citation",
+                                                              "title": "Type",
+                                                              "type": "string"
+                                                            },
+                                                            "filename": {
+                                                              "title": "Filename",
+                                                              "type": "string"
+                                                            }
+                                                          },
+                                                          "type": "object"
+                                                        },
+                                                        {
+                                                          "required": [
+                                                            "end_index",
+                                                            "start_index",
+                                                            "title",
+                                                            "type",
+                                                            "url"
+                                                          ],
+                                                          "additionalProperties": true,
+                                                          "title": "AnnotationURLCitation",
+                                                          "properties": {
+                                                            "start_index": {
+                                                              "title": "Start Index",
+                                                              "type": "integer"
+                                                            },
+                                                            "end_index": {
+                                                              "title": "End Index",
+                                                              "type": "integer"
+                                                            },
+                                                            "title": {
+                                                              "title": "Title",
+                                                              "type": "string"
+                                                            },
+                                                            "type": {
+                                                              "const": "url_citation",
+                                                              "title": "Type",
+                                                              "type": "string"
+                                                            },
+                                                            "url": {
+                                                              "title": "Url",
+                                                              "type": "string"
+                                                            }
+                                                          },
+                                                          "type": "object"
+                                                        },
+                                                        {
+                                                          "required": [
+                                                            "container_id",
+                                                            "end_index",
+                                                            "file_id",
+                                                            "filename",
+                                                            "start_index",
+                                                            "type"
+                                                          ],
+                                                          "additionalProperties": true,
+                                                          "title": "AnnotationContainerFileCitation",
+                                                          "properties": {
+                                                            "start_index": {
+                                                              "title": "Start Index",
+                                                              "type": "integer"
+                                                            },
+                                                            "end_index": {
+                                                              "title": "End Index",
+                                                              "type": "integer"
+                                                            },
+                                                            "type": {
+                                                              "const": "container_file_citation",
+                                                              "title": "Type",
+                                                              "type": "string"
+                                                            },
+                                                            "filename": {
+                                                              "title": "Filename",
+                                                              "type": "string"
+                                                            },
+                                                            "file_id": {
+                                                              "title": "File Id",
+                                                              "type": "string"
+                                                            },
+                                                            "container_id": {
+                                                              "title": "Container Id",
+                                                              "type": "string"
+                                                            }
+                                                          },
+                                                          "type": "object"
+                                                        },
+                                                        {
+                                                          "required": [
+                                                            "file_id",
+                                                            "index",
+                                                            "type"
+                                                          ],
+                                                          "additionalProperties": true,
+                                                          "title": "AnnotationFilePath",
+                                                          "properties": {
+                                                            "file_id": {
+                                                              "title": "File Id",
+                                                              "type": "string"
+                                                            },
+                                                            "index": {
+                                                              "title": "Index",
+                                                              "type": "integer"
+                                                            },
+                                                            "type": {
+                                                              "const": "file_path",
+                                                              "title": "Type",
+                                                              "type": "string"
+                                                            }
+                                                          },
+                                                          "type": "object"
+                                                        }
+                                                      ]
+                                                    },
+                                                    "title": "Annotations",
+                                                    "type": "array"
+                                                  }
+                                                },
+                                                "type": "object"
+                                              },
+                                              {
+                                                "required": [
+                                                  "type",
+                                                  "input_audio"
+                                                ],
+                                                "title": "ResponseInputAudio",
+                                                "properties": {
+                                                  "type": {
+                                                    "const": "input_audio",
+                                                    "title": "Type",
+                                                    "type": "string"
+                                                  },
+                                                  "input_audio": {
+                                                    "required": [
+                                                      "data"
+                                                    ],
+                                                    "title": "AudioData",
+                                                    "properties": {
+                                                      "data": {
+                                                        "title": "Data",
+                                                        "type": "string"
+                                                      },
+                                                      "format": {
+                                                        "default": "wav",
+                                                        "title": "Format",
+                                                        "enum": [
+                                                          "wav",
+                                                          "mp3"
+                                                        ],
+                                                        "type": "string"
+                                                      }
+                                                    },
+                                                    "type": "object"
+                                                  }
+                                                },
+                                                "type": "object"
+                                              },
+                                              {
+                                                "required": [
+                                                  "type",
+                                                  "output_audio"
+                                                ],
+                                                "title": "ResponseOutputAudio",
+                                                "properties": {
+                                                  "audio_transcript": {
+                                                    "default": null,
+                                                    "title": "Audio Transcript",
+                                                    "anyOf": [
+                                                      {
+                                                        "type": "string"
+                                                      },
+                                                      {
+                                                        "type": "null"
+                                                      }
+                                                    ]
+                                                  },
+                                                  "output_audio": {
+                                                    "required": [
+                                                      "data"
+                                                    ],
+                                                    "title": "AudioData",
+                                                    "properties": {
+                                                      "data": {
+                                                        "title": "Data",
+                                                        "type": "string"
+                                                      },
+                                                      "format": {
+                                                        "default": "wav",
+                                                        "title": "Format",
+                                                        "enum": [
+                                                          "wav",
+                                                          "mp3"
+                                                        ],
+                                                        "type": "string"
+                                                      }
+                                                    },
+                                                    "type": "object"
+                                                  },
+                                                  "type": {
+                                                    "const": "output_audio",
+                                                    "title": "Type",
+                                                    "type": "string"
+                                                  }
+                                                },
+                                                "type": "object"
+                                              }
+                                            ]
+                                          },
+                                          "type": "array"
+                                        }
+                                      ]
+                                    },
+                                    "role": {
+                                      "title": "Role",
+                                      "enum": [
+                                        "system",
+                                        "user",
+                                        "assistant",
+                                        "developer",
+                                        "tool",
+                                        "function"
+                                      ],
+                                      "type": "string"
+                                    },
+                                    "tool_calls": {
+                                      "default": null,
+                                      "title": "Tool Calls",
+                                      "anyOf": [
+                                        {
+                                          "items": {
+                                            "required": [
+                                              "type",
+                                              "function",
+                                              "id"
+                                            ],
+                                            "title": "FunctionCall",
+                                            "properties": {
+                                              "function": {
+                                                "required": [
+                                                  "name",
+                                                  "arguments"
+                                                ],
+                                                "title": "Function",
+                                                "properties": {
+                                                  "return_value": {
+                                                    "default": null,
+                                                    "title": "Return Value",
+                                                    "anyOf": [
+                                                      {
+                                                        "type": "string"
+                                                      },
+                                                      {
+                                                        "type": "null"
+                                                      }
+                                                    ]
+                                                  },
+                                                  "name": {
+                                                    "title": "Name",
+                                                    "type": "string"
+                                                  },
+                                                  "arguments": {
+                                                    "title": "Arguments",
+                                                    "type": "string"
+                                                  }
+                                                },
+                                                "type": "object"
+                                              },
+                                              "id": {
+                                                "title": "Id",
+                                                "type": "string"
+                                              },
+                                              "type": {
+                                                "const": "function",
+                                                "title": "Type",
+                                                "type": "string"
+                                              }
+                                            },
+                                            "type": "object"
+                                          },
+                                          "type": "array"
+                                        },
+                                        {
+                                          "type": "null"
+                                        }
+                                      ]
+                                    },
+                                    "reasoning_summary": {
+                                      "default": null,
+                                      "title": "Reasoning Summary",
+                                      "anyOf": [
+                                        {
+                                          "type": "string"
+                                        },
+                                        {
+                                          "type": "null"
+                                        }
+                                      ]
+                                    },
+                                    "name": {
+                                      "default": null,
+                                      "title": "Name",
+                                      "anyOf": [
+                                        {
+                                          "type": "string"
+                                        },
+                                        {
+                                          "type": "null"
+                                        }
+                                      ]
+                                    },
+                                    "function_call": {
+                                      "default": null,
+                                      "anyOf": [
+                                        {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "title": "Function",
+                                          "properties": {
+                                            "return_value": {
+                                              "default": null,
+                                              "title": "Return Value",
+                                              "anyOf": [
+                                                {
+                                                  "type": "string"
+                                                },
+                                                {
+                                                  "type": "null"
+                                                }
+                                              ]
+                                            },
+                                            "name": {
+                                              "title": "Name",
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "title": "Arguments",
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        {
+                                          "type": "null"
+                                        }
+                                      ]
+                                    },
+                                    "refusal": {
+                                      "default": null,
+                                      "title": "Refusal",
+                                      "anyOf": [
+                                        {
+                                          "type": "string"
+                                        },
+                                        {
+                                          "type": "null"
+                                        }
+                                      ]
+                                    },
+                                    "trace_id": {
+                                      "default": null,
+                                      "title": "Trace Id",
+                                      "anyOf": [
+                                        {
+                                          "type": "string"
+                                        },
+                                        {
+                                          "type": "null"
+                                        }
+                                      ]
+                                    }
+                                  },
+                                  "type": "object"
+                                },
+                                "title": "Output",
+                                "type": "array"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "title": "Output",
+                          "type": "array"
+                        },
+                        "input": {
+                          "items": {
+                            "required": [
+                              "role",
+                              "content"
+                            ],
+                            "title": "ChatMessage",
+                            "properties": {
+                              "tool_call_id": {
+                                "default": null,
+                                "title": "Tool Call Id",
+                                "anyOf": [
+                                  {
+                                    "type": "string"
+                                  },
+                                  {
+                                    "type": "null"
+                                  }
+                                ]
+                              },
+                              "finish_reason": {
+                                "default": null,
+                                "title": "Finish Reason",
+                                "anyOf": [
+                                  {
+                                    "enum": [
+                                      "stop",
+                                      "length",
+                                      "tool_calls",
+                                      "content_filter",
+                                      "function_call"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  {
+                                    "type": "null"
+                                  }
+                                ]
+                              },
+                              "content": {
+                                "title": "Content",
+                                "anyOf": [
+                                  {
+                                    "type": "string"
+                                  },
+                                  {
+                                    "items": {
+                                      "anyOf": [
+                                        {
+                                          "required": [
+                                            "text",
+                                            "type"
+                                          ],
+                                          "additionalProperties": true,
+                                          "title": "ResponseInputText",
+                                          "properties": {
+                                            "text": {
+                                              "title": "Text",
+                                              "type": "string"
+                                            },
+                                            "type": {
+                                              "const": "input_text",
+                                              "title": "Type",
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        {
+                                          "required": [
+                                            "detail",
+                                            "type"
+                                          ],
+                                          "additionalProperties": true,
+                                          "title": "ResponseInputImage",
+                                          "properties": {
+                                            "file_id": {
+                                              "default": null,
+                                              "title": "File Id",
+                                              "anyOf": [
+                                                {
+                                                  "type": "string"
+                                                },
+                                                {
+                                                  "type": "null"
+                                                }
+                                              ]
+                                            },
+                                            "detail": {
+                                              "title": "Detail",
+                                              "enum": [
+                                                "low",
+                                                "high",
+                                                "auto"
+                                              ],
+                                              "type": "string"
+                                            },
+                                            "type": {
+                                              "const": "input_image",
+                                              "title": "Type",
+                                              "type": "string"
+                                            },
+                                            "image_url": {
+                                              "default": null,
+                                              "title": "Image Url",
+                                              "anyOf": [
+                                                {
+                                                  "type": "string"
+                                                },
+                                                {
+                                                  "type": "null"
+                                                }
+                                              ]
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        {
+                                          "required": [
+                                            "annotations",
+                                            "text",
+                                            "type"
+                                          ],
+                                          "additionalProperties": true,
+                                          "title": "ResponseOutputText",
+                                          "properties": {
+                                            "text": {
+                                              "title": "Text",
+                                              "type": "string"
+                                            },
+                                            "type": {
+                                              "const": "output_text",
+                                              "title": "Type",
+                                              "type": "string"
+                                            },
+                                            "logprobs": {
+                                              "default": null,
+                                              "title": "Logprobs",
+                                              "anyOf": [
+                                                {
+                                                  "items": {
+                                                    "required": [
+                                                      "token",
+                                                      "bytes",
+                                                      "logprob",
+                                                      "top_logprobs"
+                                                    ],
+                                                    "additionalProperties": true,
+                                                    "title": "Logprob",
+                                                    "properties": {
+                                                      "bytes": {
+                                                        "items": {
+                                                          "type": "integer"
+                                                        },
+                                                        "title": "Bytes",
+                                                        "type": "array"
+                                                      },
+                                                      "token": {
+                                                        "title": "Token",
+                                                        "type": "string"
+                                                      },
+                                                      "top_logprobs": {
+                                                        "items": {
+                                                          "required": [
+                                                            "token",
+                                                            "bytes",
+                                                            "logprob"
+                                                          ],
+                                                          "additionalProperties": true,
+                                                          "title": "LogprobTopLogprob",
+                                                          "properties": {
+                                                            "bytes": {
+                                                              "items": {
+                                                                "type": "integer"
+                                                              },
+                                                              "title": "Bytes",
+                                                              "type": "array"
+                                                            },
+                                                            "token": {
+                                                              "title": "Token",
+                                                              "type": "string"
+                                                            },
+                                                            "logprob": {
+                                                              "title": "Logprob",
+                                                              "type": "number"
+                                                            }
+                                                          },
+                                                          "type": "object"
+                                                        },
+                                                        "title": "Top Logprobs",
+                                                        "type": "array"
+                                                      },
+                                                      "logprob": {
+                                                        "title": "Logprob",
+                                                        "type": "number"
+                                                      }
+                                                    },
+                                                    "type": "object"
+                                                  },
+                                                  "type": "array"
+                                                },
+                                                {
+                                                  "type": "null"
+                                                }
+                                              ]
+                                            },
+                                            "annotations": {
+                                              "items": {
+                                                "anyOf": [
+                                                  {
+                                                    "required": [
+                                                      "file_id",
+                                                      "filename",
+                                                      "index",
+                                                      "type"
+                                                    ],
+                                                    "additionalProperties": true,
+                                                    "title": "AnnotationFileCitation",
+                                                    "properties": {
+                                                      "file_id": {
+                                                        "title": "File Id",
+                                                        "type": "string"
+                                                      },
+                                                      "index": {
+                                                        "title": "Index",
+                                                        "type": "integer"
+                                                      },
+                                                      "type": {
+                                                        "const": "file_citation",
+                                                        "title": "Type",
+                                                        "type": "string"
+                                                      },
+                                                      "filename": {
+                                                        "title": "Filename",
+                                                        "type": "string"
+                                                      }
+                                                    },
+                                                    "type": "object"
+                                                  },
+                                                  {
+                                                    "required": [
+                                                      "end_index",
+                                                      "start_index",
+                                                      "title",
+                                                      "type",
+                                                      "url"
+                                                    ],
+                                                    "additionalProperties": true,
+                                                    "title": "AnnotationURLCitation",
+                                                    "properties": {
+                                                      "start_index": {
+                                                        "title": "Start Index",
+                                                        "type": "integer"
+                                                      },
+                                                      "end_index": {
+                                                        "title": "End Index",
+                                                        "type": "integer"
+                                                      },
+                                                      "title": {
+                                                        "title": "Title",
+                                                        "type": "string"
+                                                      },
+                                                      "type": {
+                                                        "const": "url_citation",
+                                                        "title": "Type",
+                                                        "type": "string"
+                                                      },
+                                                      "url": {
+                                                        "title": "Url",
+                                                        "type": "string"
+                                                      }
+                                                    },
+                                                    "type": "object"
+                                                  },
+                                                  {
+                                                    "required": [
+                                                      "container_id",
+                                                      "end_index",
+                                                      "file_id",
+                                                      "filename",
+                                                      "start_index",
+                                                      "type"
+                                                    ],
+                                                    "additionalProperties": true,
+                                                    "title": "AnnotationContainerFileCitation",
+                                                    "properties": {
+                                                      "start_index": {
+                                                        "title": "Start Index",
+                                                        "type": "integer"
+                                                      },
+                                                      "end_index": {
+                                                        "title": "End Index",
+                                                        "type": "integer"
+                                                      },
+                                                      "type": {
+                                                        "const": "container_file_citation",
+                                                        "title": "Type",
+                                                        "type": "string"
+                                                      },
+                                                      "filename": {
+                                                        "title": "Filename",
+                                                        "type": "string"
+                                                      },
+                                                      "file_id": {
+                                                        "title": "File Id",
+                                                        "type": "string"
+                                                      },
+                                                      "container_id": {
+                                                        "title": "Container Id",
+                                                        "type": "string"
+                                                      }
+                                                    },
+                                                    "type": "object"
+                                                  },
+                                                  {
+                                                    "required": [
+                                                      "file_id",
+                                                      "index",
+                                                      "type"
+                                                    ],
+                                                    "additionalProperties": true,
+                                                    "title": "AnnotationFilePath",
+                                                    "properties": {
+                                                      "file_id": {
+                                                        "title": "File Id",
+                                                        "type": "string"
+                                                      },
+                                                      "index": {
+                                                        "title": "Index",
+                                                        "type": "integer"
+                                                      },
+                                                      "type": {
+                                                        "const": "file_path",
+                                                        "title": "Type",
+                                                        "type": "string"
+                                                      }
+                                                    },
+                                                    "type": "object"
+                                                  }
+                                                ]
+                                              },
+                                              "title": "Annotations",
+                                              "type": "array"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        {
+                                          "required": [
+                                            "type",
+                                            "input_audio"
+                                          ],
+                                          "title": "ResponseInputAudio",
+                                          "properties": {
+                                            "type": {
+                                              "const": "input_audio",
+                                              "title": "Type",
+                                              "type": "string"
+                                            },
+                                            "input_audio": {
+                                              "required": [
+                                                "data"
+                                              ],
+                                              "title": "AudioData",
+                                              "properties": {
+                                                "data": {
+                                                  "title": "Data",
+                                                  "type": "string"
+                                                },
+                                                "format": {
+                                                  "default": "wav",
+                                                  "title": "Format",
+                                                  "enum": [
+                                                    "wav",
+                                                    "mp3"
+                                                  ],
+                                                  "type": "string"
+                                                }
+                                              },
+                                              "type": "object"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        {
+                                          "required": [
+                                            "type",
+                                            "output_audio"
+                                          ],
+                                          "title": "ResponseOutputAudio",
+                                          "properties": {
+                                            "audio_transcript": {
+                                              "default": null,
+                                              "title": "Audio Transcript",
+                                              "anyOf": [
+                                                {
+                                                  "type": "string"
+                                                },
+                                                {
+                                                  "type": "null"
+                                                }
+                                              ]
+                                            },
+                                            "output_audio": {
+                                              "required": [
+                                                "data"
+                                              ],
+                                              "title": "AudioData",
+                                              "properties": {
+                                                "data": {
+                                                  "title": "Data",
+                                                  "type": "string"
+                                                },
+                                                "format": {
+                                                  "default": "wav",
+                                                  "title": "Format",
+                                                  "enum": [
+                                                    "wav",
+                                                    "mp3"
+                                                  ],
+                                                  "type": "string"
+                                                }
+                                              },
+                                              "type": "object"
+                                            },
+                                            "type": {
+                                              "const": "output_audio",
+                                              "title": "Type",
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        }
+                                      ]
+                                    },
+                                    "type": "array"
+                                  }
+                                ]
+                              },
+                              "role": {
+                                "title": "Role",
+                                "enum": [
+                                  "system",
+                                  "user",
+                                  "assistant",
+                                  "developer",
+                                  "tool",
+                                  "function"
+                                ],
+                                "type": "string"
+                              },
+                              "tool_calls": {
+                                "default": null,
+                                "title": "Tool Calls",
+                                "anyOf": [
+                                  {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "title": "FunctionCall",
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "title": "Function",
+                                          "properties": {
+                                            "return_value": {
+                                              "default": null,
+                                              "title": "Return Value",
+                                              "anyOf": [
+                                                {
+                                                  "type": "string"
+                                                },
+                                                {
+                                                  "type": "null"
+                                                }
+                                              ]
+                                            },
+                                            "name": {
+                                              "title": "Name",
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "title": "Arguments",
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "title": "Id",
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "const": "function",
+                                          "title": "Type",
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": "array"
+                                  },
+                                  {
+                                    "type": "null"
+                                  }
+                                ]
+                              },
+                              "reasoning_summary": {
+                                "default": null,
+                                "title": "Reasoning Summary",
+                                "anyOf": [
+                                  {
+                                    "type": "string"
+                                  },
+                                  {
+                                    "type": "null"
+                                  }
+                                ]
+                              },
+                              "name": {
+                                "default": null,
+                                "title": "Name",
+                                "anyOf": [
+                                  {
+                                    "type": "string"
+                                  },
+                                  {
+                                    "type": "null"
+                                  }
+                                ]
+                              },
+                              "function_call": {
+                                "default": null,
+                                "anyOf": [
+                                  {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "title": "Function",
+                                    "properties": {
+                                      "return_value": {
+                                        "default": null,
+                                        "title": "Return Value",
+                                        "anyOf": [
+                                          {
+                                            "type": "string"
+                                          },
+                                          {
+                                            "type": "null"
+                                          }
+                                        ]
+                                      },
+                                      "name": {
+                                        "title": "Name",
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "title": "Arguments",
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": "object"
+                                  },
+                                  {
+                                    "type": "null"
+                                  }
+                                ]
+                              },
+                              "refusal": {
+                                "default": null,
+                                "title": "Refusal",
+                                "anyOf": [
+                                  {
+                                    "type": "string"
+                                  },
+                                  {
+                                    "type": "null"
+                                  }
+                                ]
+                              },
+                              "trace_id": {
+                                "default": null,
+                                "title": "Trace Id",
+                                "anyOf": [
+                                  {
+                                    "type": "string"
+                                  },
+                                  {
+                                    "type": "null"
+                                  }
+                                ]
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "title": "Input",
+                          "type": "array"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                },
+                "metadata": {
+                  "usecase": "chatbot"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-8dee6df0-c9c3-4d4f-b500-c21b54aab4c9",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            }
+          ],
+          "first_id": "eval_692886ca71948191a94a46ef2866fa38",
+          "has_more": true,
+          "last_id": "eval_69288313e4408191922bf1863f2ba432"
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:45 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_list_setup.yml b/spec/fixtures/cassettes/evals_list_setup.yml
new file mode 100644
index 00000000..8d4d6769
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_list_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:45 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_ad59a4421109d03bf16f0dd5d568cb43
+      Openai-Processing-Ms:
+      - '187'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '189'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=2URRA0QzprNNuNe_uY_bAL3M.w.SeU2zUU2uu7Rgmz0-1764265065-1.0.1.1-vCfd9v8Qx1oKdvy8E2piuOxlGUK3rPotuNjTxl6IQYJ5WVaFqosWj4aLGxWLszSIoI0n04TnlJXIP.7QK4mv0e4P5vPOsuKUcXtA8Li09Yc;
+        path=/; expires=Thu, 27-Nov-25 18:07:45 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=YlC8_GZoyq1p_SeriLDaxuZ8sWYRFXIDpqQJzx6Rz.4-1764265065064-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53652e5cc9b11c-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c68ed50819185ed5845facf0be6",
+          "object": "eval",
+          "created_at": 1764265064,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-cd6a1097-7691-4502-a5f0-49c7e0043428",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:45 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_retrieve.yml b/spec/fixtures/cassettes/evals_retrieve.yml
new file mode 100644
index 00000000..aef193a5
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_retrieve.yml
@@ -0,0 +1,274 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_69288c65203881918b2f062b30cd7aa9
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:42 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_34dbc89ee9874a57aa011f2e89ff09c1
+      Openai-Processing-Ms:
+      - '64'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '67'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=u9ZriP.qwIAjZx8EdcGA_lIdQSy3JoUCEUCsrPXIkpA-1764265062-1.0.1.1-LNP5FQOOpKBdaIc3C2ccuYy.iL1pp_bMijvzoBpBvlWXNbP0g8wxMdoCOzPwpuOjSF8v3iva6oKWgOm89yNOz6qPnPaZSgHd9uPKbeJYKP4;
+        path=/; expires=Thu, 27-Nov-25 18:07:42 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=VCPQsuc9dpzChvitHps0wIlQBQgFbIHfaNT3JZ0f3XA-1764265062088-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365199dad35c5-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c65203881918b2f062b30cd7aa9",
+          "object": "eval",
+          "created_at": 1764265061,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-4e90c6d8-f26c-4cb4-a339-41b1b1bb0532",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:42 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_retrieve_setup.yml b/spec/fixtures/cassettes/evals_retrieve_setup.yml
new file mode 100644
index 00000000..af74513e
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_retrieve_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:41 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_93497c36b5e84c57ad0a8e3d9de98b23
+      Openai-Processing-Ms:
+      - '560'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '563'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=pLXAPhTauwRvA5Vu6GbOt1q0_W6MjmZflR3Trnvy8Z4-1764265061-1.0.1.1-N39qqUPxahcowOUZhs4ckWv3aC_u8dg2t9h8L492QtXIcewUsoTNAwIACQ6T8z8JFW6AWgAWg0AfaTZC5aEnahw312LxhY4ItvhkktWXlxM;
+        path=/; expires=Thu, 27-Nov-25 18:07:41 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=GzrCuuyexD.2XVBCWz_QtDE76Mqhxn7C3kVHuxbsHNY-1764265061277-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365124ca72216-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c65203881918b2f062b30cd7aa9",
+          "object": "eval",
+          "created_at": 1764265061,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-4e90c6d8-f26c-4cb4-a339-41b1b1bb0532",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:41 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_cancel.yml b/spec/fixtures/cassettes/evals_runs_cancel.yml
new file mode 100644
index 00000000..7510d981
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_cancel.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c7132e48191ad7ff46c32cf1c46/runs/evalrun_69288c71b74c819183f7e7ed01b4d5ff/cancel
+    body:
+      encoding: UTF-8
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Content-Length:
+      - '0'
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:54 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_9ce39fdd8e538dc9a6878bf4d0daa643
+      Openai-Processing-Ms:
+      - '670'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '672'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=4b4Q3YWwvFU9VANtkY3qjcqR_gDeiqYZCwJIyxsrEjU-1764265074-1.0.1.1-ymldn.MFe8F6uLfJ4vp5MhutNsXSjTy.gBbXOCzxLpiVbDIzmkPqtJS6g8.yD86fizxh3Qf.31nb7jxqc0v62ntUCa8wtPt2laRT4wtO7SE;
+        path=/; expires=Thu, 27-Nov-25 18:07:54 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=kVuspCEagYsUYNL.F9tTWd5XOiXGUoSC6cFPRSIZe.s-1764265074927-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365693b274f47-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c71b74c819183f7e7ed01b4d5ff",
+          "object": "eval.run",
+          "created_at": 1764265073,
+          "status": "canceled",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c7132e48191ad7ff46c32cf1c46",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c7132e48191ad7ff46c32cf1c46?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c71b74c819183f7e7ed01b4d5ff",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:54 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_cancel_run_setup.yml b/spec/fixtures/cassettes/evals_runs_cancel_run_setup.yml
new file mode 100644
index 00000000..dba503e1
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_cancel_run_setup.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c7132e48191ad7ff46c32cf1c46/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:54 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_0093cd2118771c8e402a60b5407cc55a
+      Openai-Processing-Ms:
+      - '406'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '408'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=uM0mV1hkYyLOKIK2GYHYsHFckJcIb2swTCF8MGpNWRQ-1764265074-1.0.1.1-MVHCmdcMYkqpRZoXUHf1Xaqx4uSf2gOzKUZef1C2cA11076.c5EnrNdYpkEC9Plv.aSuHmwDcU8ymp8rmkRfLcdBBpKEldY1LB4VhoKgS1A;
+        path=/; expires=Thu, 27-Nov-25 18:07:54 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=4CiPhk45QtL_dsOtL13kGKXPphdXg2Qyj1NKAWiyHTI-1764265074004-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a536564fcdcb0fb-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c71b74c819183f7e7ed01b4d5ff",
+          "object": "eval.run",
+          "created_at": 1764265073,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c7132e48191ad7ff46c32cf1c46",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c7132e48191ad7ff46c32cf1c46?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c71b74c819183f7e7ed01b4d5ff",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:53 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_cancel_setup.yml b/spec/fixtures/cassettes/evals_runs_cancel_setup.yml
new file mode 100644
index 00000000..a25921ad
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_cancel_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:53 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_95082c9c904d2480498f6f2999b250ac
+      Openai-Processing-Ms:
+      - '178'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '181'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=a7Te_D7MzgN1F9UQdvVgcdF66cnb72ZVkd7UpZRWGEE-1764265073-1.0.1.1-eYABxh1jILbTiLG6XrF.sWnR_vYqKaf7DPw91z9BFU_NJHkWacIIj5NEgutT3e0Rn.2VhJ1TXNP3vkFcu6rE1cqQsrOp_OwWQ6.JR4IOpdU;
+        path=/; expires=Thu, 27-Nov-25 18:07:53 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=.Ivd8N1lBfEG2ehGEYjU_vQM.irXnKU.iXYfQEgtkqU-1764265073337-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365625eb4a935-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c7132e48191ad7ff46c32cf1c46",
+          "object": "eval",
+          "created_at": 1764265073,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-27eb57ce-63cc-42fb-aa8b-6f3ab2cf73b8",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:53 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_create.yml b/spec/fixtures/cassettes/evals_runs_create.yml
new file mode 100644
index 00000000..906b6003
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_create.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c6ba2348191be05c2d02d0e0c0b/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:48 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_a227f8a26a2131d8473e1c92074eec46
+      Openai-Processing-Ms:
+      - '760'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '763'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=9ZswAkI3q8s5c6ThS5HjOpifSgedQWKidh4TOo7iv3w-1764265068-1.0.1.1-5Yims_ZHAyjNs7xrqmoYvMK3buvL3vGW1YIhz5oCF.FUvl1TnFPMg4zlbLZ7ScgFbFNlFTrwtzN8lCVxXd3.SjsaGt3sI00xabiVGrQdZfE;
+        path=/; expires=Thu, 27-Nov-25 18:07:48 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=WZSrX35UqN8Z.mHJ69EsSjq5J3..4kRCvd91oQHmxCI-1764265068789-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365422eff2210-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c6c25fc8191ac632b505a3ff1c9",
+          "object": "eval.run",
+          "created_at": 1764265068,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c6ba2348191be05c2d02d0e0c0b",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c6ba2348191be05c2d02d0e0c0b?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c6c25fc8191ac632b505a3ff1c9",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:48 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_create_setup.yml b/spec/fixtures/cassettes/evals_runs_create_setup.yml
new file mode 100644
index 00000000..a866fb21
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_create_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:47 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_96f911be32e1840672acd68fd0dea03f
+      Openai-Processing-Ms:
+      - '234'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '237'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=iSICdk.EdIDyle.RQysHEXmyES4qOD6qYZrmaELQLDA-1764265067-1.0.1.1-_E3nGyLYoebWy.Ed9MMjOgHi28ZepbGSTPvdl7w161O2ex_rTGDSlvf2p5c_uOnYMNQRWXYJ.HxGByHkU4miSAhurDxSKB.6hDN8ZIOX_mg;
+        path=/; expires=Thu, 27-Nov-25 18:07:47 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=vOcml4pSfdEN84KBG_RBgnPJ2nrh14amro7Q1K0d4xs-1764265067769-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53653f0c150c19-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c6ba2348191be05c2d02d0e0c0b",
+          "object": "eval",
+          "created_at": 1764265067,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-9474cccc-047e-4ddf-bb6f-b983ca6aef0d",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:47 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_delete.yml b/spec/fixtures/cassettes/evals_runs_delete.yml
new file mode 100644
index 00000000..45012dc0
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_delete.yml
@@ -0,0 +1,196 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288d369be48191a9dd469d6cb22c9f/runs/evalrun_69288d37912c81919e794ac84108363b/cancel
+    body:
+      encoding: UTF-8
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Content-Length:
+      - '0'
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:41:13 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_0e5f5722ba2a4e819f366b5b08f03d05
+      Openai-Processing-Ms:
+      - '555'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '558'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=HUQa2g4EML8HJ0fSYEyP8Y1LvFWAs0vMtx_2K2jXggA-1764265273-1.0.1.1-wLir27KcO3TJiEsJtTgjKpw02lSnGmQNyqlkZ4ljpvWaIsI5f4TrbxqT2Fi9klEzlfvGbpNyTaVJjcazBJ0Cu5Ezr_yl_Rns_F_9yxrGoFM;
+        path=/; expires=Thu, 27-Nov-25 18:11:13 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=VPa8xE7AHfpB9ZpDZZ8Tb0x8BiBplf_CE5ylsOX4FZE-1764265273069-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a536a403929eb17-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288d37912c81919e794ac84108363b",
+          "object": "eval.run",
+          "created_at": 1764265271,
+          "status": "canceled",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288d369be48191a9dd469d6cb22c9f",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288d369be48191a9dd469d6cb22c9f?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288d37912c81919e794ac84108363b",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:41:13 GMT
+- request:
+    method: delete
+    uri: https://api.openai.com/v1/evals/eval_69288d369be48191a9dd469d6cb22c9f/runs/evalrun_69288d37912c81919e794ac84108363b
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:41:14 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_f31add9c41781b9ef7f694152f59dadf
+      Openai-Processing-Ms:
+      - '763'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '767'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=f9nvHlPi3UV7AK.TJumxczUKk1dIQDhExsuDFexiC.o-1764265274-1.0.1.1-GRqhzByoSCF1KKz1tZXP23mtv9XboeAZfYge3kRkZ2xpVGDqAuxUOpqK8oxTahqMEYXBH1NvIDolFuhNGFmlJsbU2s_qGdYlnjxe5odg_Ss;
+        path=/; expires=Thu, 27-Nov-25 18:11:14 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=kt_WqBTaaNSKPkhG35J4JsD8KAwC3rcdQEFaxw2b_do-1764265274323-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a536a454a12b140-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "eval.run.deleted",
+          "deleted": true,
+          "run_id": "evalrun_69288d37912c81919e794ac84108363b"
+        }
+  recorded_at: Thu, 27 Nov 2025 17:41:14 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_delete_run_setup.yml b/spec/fixtures/cassettes/evals_runs_delete_run_setup.yml
new file mode 100644
index 00000000..827634fd
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_delete_run_setup.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288d369be48191a9dd469d6cb22c9f/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:41:12 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_992f53ec833344ac9d252a854242aad9
+      Openai-Processing-Ms:
+      - '1074'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '1077'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=pDzZar7eZ1cWdnVwvzRNA5pYoyYTAiFvoayFkrLokkA-1764265272-1.0.1.1-kU3SDYb6dj0Qzau6KPvdiPo5Z47dqdwFaMyhjpDaOQTtoNYKjMiuP.KEH.2iKgH43P.beXjXno_HWnkeoNJl5zm4vVXQT15LoUKrnGq55YE;
+        path=/; expires=Thu, 27-Nov-25 18:11:12 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=ptb8p7NZ79WAj3ZgZcOYPNFxCtjgfJHstwiuvU1ZPrw-1764265272257-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a536a36bf98a935-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288d37912c81919e794ac84108363b",
+          "object": "eval.run",
+          "created_at": 1764265271,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288d369be48191a9dd469d6cb22c9f",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288d369be48191a9dd469d6cb22c9f?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288d37912c81919e794ac84108363b",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:41:12 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_delete_setup.yml b/spec/fixtures/cassettes/evals_runs_delete_setup.yml
new file mode 100644
index 00000000..88d8195d
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_delete_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:41:10 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_e6cd656f098a4c5383491d45df4c6bdc
+      Openai-Processing-Ms:
+      - '210'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '213'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=eoMCgwTjmGa8TpChmbY35gYtGNsdnvJDG9Oy_HucA3A-1764265270-1.0.1.1-nu7guHa3w50bOYH9x7yVao20MEVGw4A6WC6gnPKshlJDGsawMg9XLgkgmeG0jhE.kQIM6LLGI_Dsm56wwalEAdBPK5Y9RzVtmPsZ.3CjgDo;
+        path=/; expires=Thu, 27-Nov-25 18:11:10 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=wvlyhu5ZmXw1nXW0.HaQuloGBXKaYEUGpYRztDTf63U-1764265270748-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a536a330a47b367-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288d369be48191a9dd469d6cb22c9f",
+          "object": "eval",
+          "created_at": 1764265270,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-44de3f73-0711-4426-8b59-cb26720e53f5",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:41:10 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_list.yml b/spec/fixtures/cassettes/evals_runs_output_items_list.yml
new file mode 100644
index 00000000..c0e38c0d
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_list.yml
@@ -0,0 +1,76 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_69288c6d54f48191bb18d415547ff09c/runs/evalrun_69288c6e2dac819181b017027cd1d2ba/output_items
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:51 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_044e8ae19b15a7101a63c51d97527437
+      Openai-Processing-Ms:
+      - '380'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '382'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=Ij1_ZQwKnjbo57fg2wgjLL6wo6ykZnDO32Ca0ushv18-1764265071-1.0.1.1-b0Zelat_yRyQHGzvXuBH7A62cjICDnwr8.KXH1ZNQ8nhk9qYmI4U3xE.6e6CojNe5CAODZVFAWymziugL4BG96hgHnpMUmgZzkh.0aCpR9M;
+        path=/; expires=Thu, 27-Nov-25 18:07:51 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=5kFavy3PInCs8S45ZxHlLdbcTlGBpxX80ap0mbCmaGc-1764265071138-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365532c7ab128-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "list",
+          "data": [],
+          "first_id": null,
+          "has_more": false,
+          "last_id": null
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:51 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_list_output_item_setup.yml b/spec/fixtures/cassettes/evals_runs_output_items_list_output_item_setup.yml
new file mode 100644
index 00000000..4c4d5055
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_list_output_item_setup.yml
@@ -0,0 +1,76 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_692886c4402c8191b16daf0a13927d55/runs/evalrun_692886c4cb7c8191a6d061543b6c0224/output_items
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:13:41 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_03cd0ef2eb6148bb9db8a92286faa23a
+      Openai-Processing-Ms:
+      - '224'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '228'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=6MUOh_RSBEwcm8ZA61n6mZa9_Wlfc.sKgtkfiFWcBjA-1764263621-1.0.1.1-itCbxTpT_mHZQI1ZXkaSUhKMXrMSXzYfYyQQ6PmQ56wgvn7uZ862F8GJ9vlxbxvngqovWvP8hgIFeb_iO5RRWbraUfIQSd4DVNPiQtwzIVI;
+        path=/; expires=Thu, 27-Nov-25 17:43:41 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=jphYlrigl5iW9Plhyq3CwLH4hYQY1Qp8MYaUJmIgoqc-1764263621721-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5341f08cb6220e-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "list",
+          "data": [],
+          "first_id": null,
+          "has_more": false,
+          "last_id": null
+        }
+  recorded_at: Thu, 27 Nov 2025 17:13:41 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_list_run_setup.yml b/spec/fixtures/cassettes/evals_runs_output_items_list_run_setup.yml
new file mode 100644
index 00000000..d75a849b
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_list_run_setup.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c6d54f48191bb18d415547ff09c/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:50 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_88e93be9ef8d9e6a97cc9becd3e56a22
+      Openai-Processing-Ms:
+      - '441'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '444'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=yn01YpoTluz_g0dCfOJu029HxxIE0YxtsV_IDxPV0cA-1764265070-1.0.1.1-RGZeYvFOEMNrnyrsOeT2CRd0vOtSqedJVlTGy7CyAvS2Gc9xzTqqGHOlPIQnWUpK77v2rogZPxT1htz3zdV7c0ACVCmsva90aIUPOPh8sh0;
+        path=/; expires=Thu, 27-Nov-25 18:07:50 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=zo1jRkukrqcGsIqmCJTtvx_F..hdTyvBGAcHj4x.aEo-1764265070481-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53654ceffb2196-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c6e2dac819181b017027cd1d2ba",
+          "object": "eval.run",
+          "created_at": 1764265070,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c6d54f48191bb18d415547ff09c",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c6d54f48191bb18d415547ff09c?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c6e2dac819181b017027cd1d2ba",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:50 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_list_setup.yml b/spec/fixtures/cassettes/evals_runs_output_items_list_setup.yml
new file mode 100644
index 00000000..c1a0cf44
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_list_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:49 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_900410bfa9674eb2bb47cbf185769adc
+      Openai-Processing-Ms:
+      - '184'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '187'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=HULhDfLfjW5mhRIsax.joyijyBBd7EmO2Q3O_IVs7sE-1764265069-1.0.1.1-U_UGCd9.7V5Vj1C.0tk_ra2ZeJ8wT6gajpUlS1MtflDyxwh.9EhYL1aVQCE7pMrSuLNCSyFtkGCltY2l.WqoKyBI0udBVF18gFbumUpDkZk;
+        path=/; expires=Thu, 27-Nov-25 18:07:49 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=xXc2dl6GdscUZ1VH5HMyd84ZVjDXG6GEdzqczQKDTQA-1764265069472-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365488bf0cca5-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c6d54f48191bb18d415547ff09c",
+          "object": "eval",
+          "created_at": 1764265069,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-abb2f7c9-703e-4ea7-93fa-a96fa580dcd4",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:49 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_retrieve.yml b/spec/fixtures/cassettes/evals_runs_output_items_retrieve.yml
new file mode 100644
index 00000000..224a38d7
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_retrieve.yml
@@ -0,0 +1,303 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_69288c6f782c8191900a5cb6be3db61e/runs/evalrun_69288c70a03881919438f1de10070910/output_items
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:57:40 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_63f77af99d92f4ad3817b3439f12de1c
+      Openai-Processing-Ms:
+      - '386'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '388'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=3KuybbQ5ivkgrdoN8P1j2dPObVied6ifNmVh4kUsZbQ-1764266260-1.0.1.1-yfOh7gnUJL3FHVOt4IEgFaKUZldHA2wK6vJofU4dU.x11w9ng8sRVPOrbK4ASfW2RI3L86PErGFm7tv3qy9VUza6CNSyWLgJxcSynLbK9z0;
+        path=/; expires=Thu, 27-Nov-25 18:27:40 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=jA56LhejCounC.ZFpgofvJ2Nk9OTb5HVXFeLiqACL1U-1764266260747-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53825d6a22eb1d-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "list",
+          "data": [
+            {
+              "id": "outputitem_69288c7485688191b3f81ee02a17a2b8",
+              "object": "eval.run.output_item",
+              "created_at": 1764265076,
+              "status": "pass",
+              "_datasource_item_content_hash": "07bd0d39b771a2e3976c536264799dc3f2b6e5e943a8d68dc3058bac176de445",
+              "available_includes": [],
+              "datasource_item": {
+                "input": "I love this product!",
+                "ground_truth": "positive"
+              },
+              "datasource_item_id": 0,
+              "eval_id": "eval_69288c6f782c8191900a5cb6be3db61e",
+              "results": [
+                {
+                  "name": "Sentiment grader-32bb5cf1-b6c0-4030-ba29-10a1dca004ac",
+                  "score": 1.0,
+                  "passed": true,
+                  "sample": {
+                    "input": [
+                      {
+                        "role": "developer",
+                        "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                      },
+                      {
+                        "role": "user",
+                        "content": "Statement: I love this product!"
+                      }
+                    ],
+                    "output": [
+                      {
+                        "role": "assistant",
+                        "content": "{\n  \"steps\": [\n    {\n      \"description\": \"The statement 'I love this product!' includes the word 'love,' indicating a strong positive emotion towards the product.\",\n      \"conclusion\": \"The sentiment is positive.\"\n    }\n  ],\n  \"result\": \"positive\"\n}"
+                      }
+                    ],
+                    "finish_reason": "stop",
+                    "model": "o3-mini-2025-01-31",
+                    "usage": {
+                      "total_tokens": 287,
+                      "completion_tokens": 137,
+                      "prompt_tokens": 150,
+                      "cached_tokens": 0
+                    },
+                    "error": null,
+                    "temperature": 1.0,
+                    "top_p": 1.0
+                  }
+                }
+              ],
+              "run_id": "evalrun_69288c70a03881919438f1de10070910",
+              "sample": {
+                "input": [
+                  {
+                    "role": "developer",
+                    "content": "You are a helpful assistant."
+                  },
+                  {
+                    "role": "user",
+                    "content": "I love this product!"
+                  }
+                ],
+                "output": [
+                  {
+                    "role": "assistant",
+                    "content": "That's great to hear! What product are you referring to? I'd love to know more about it and what you enjoy about it!"
+                  }
+                ],
+                "finish_reason": "stop",
+                "model": "gpt-4o-mini-2024-07-18",
+                "usage": {
+                  "total_tokens": 48,
+                  "completion_tokens": 26,
+                  "prompt_tokens": 22,
+                  "cached_tokens": 0
+                },
+                "error": null,
+                "temperature": 1.0,
+                "top_p": 1.0
+              }
+            }
+          ],
+          "first_id": "outputitem_69288c7485688191b3f81ee02a17a2b8",
+          "has_more": false,
+          "last_id": "outputitem_69288c7485688191b3f81ee02a17a2b8"
+        }
+  recorded_at: Thu, 27 Nov 2025 17:57:40 GMT
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_69288c6f782c8191900a5cb6be3db61e/runs/evalrun_69288c70a03881919438f1de10070910/output_items/outputitem_69288c7485688191b3f81ee02a17a2b8
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:57:41 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_96ab283a68914e5aae9e838825b84daa
+      Openai-Processing-Ms:
+      - '275'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '277'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=MpuILeElqrljrDl2kgh.q2CvWdew2aULy1VsYpIJLNw-1764266261-1.0.1.1-FOnzEQiiVXTlMCOaCE7fFSD2AiAu_M8r__8x6H8TAdj1_u0cNrAVEidH2LWeOVx022TfA4Qojh2ARuZcSxvIArc6GC0PpDUH_lV_7NpqB34;
+        path=/; expires=Thu, 27-Nov-25 18:27:41 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=qAK71GovjGWZA0BgqVhkKve3h_zFGkBk0nJgN3qfOzY-1764266261401-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5382624872b134-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "outputitem_69288c7485688191b3f81ee02a17a2b8",
+          "object": "eval.run.output_item",
+          "created_at": 1764265076,
+          "status": "pass",
+          "_datasource_item_content_hash": "07bd0d39b771a2e3976c536264799dc3f2b6e5e943a8d68dc3058bac176de445",
+          "available_includes": [],
+          "datasource_item": {
+            "input": "I love this product!",
+            "ground_truth": "positive"
+          },
+          "datasource_item_id": 0,
+          "eval_id": "eval_69288c6f782c8191900a5cb6be3db61e",
+          "results": [
+            {
+              "name": "Sentiment grader-32bb5cf1-b6c0-4030-ba29-10a1dca004ac",
+              "score": 1.0,
+              "passed": true,
+              "sample": {
+                "input": [
+                  {
+                    "role": "developer",
+                    "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                  },
+                  {
+                    "role": "user",
+                    "content": "Statement: I love this product!"
+                  }
+                ],
+                "output": [
+                  {
+                    "role": "assistant",
+                    "content": "{\n  \"steps\": [\n    {\n      \"description\": \"The statement 'I love this product!' includes the word 'love,' indicating a strong positive emotion towards the product.\",\n      \"conclusion\": \"The sentiment is positive.\"\n    }\n  ],\n  \"result\": \"positive\"\n}"
+                  }
+                ],
+                "finish_reason": "stop",
+                "model": "o3-mini-2025-01-31",
+                "usage": {
+                  "total_tokens": 287,
+                  "completion_tokens": 137,
+                  "prompt_tokens": 150,
+                  "cached_tokens": 0
+                },
+                "error": null,
+                "temperature": 1.0,
+                "top_p": 1.0
+              }
+            }
+          ],
+          "run_id": "evalrun_69288c70a03881919438f1de10070910",
+          "sample": {
+            "input": [
+              {
+                "role": "developer",
+                "content": "You are a helpful assistant."
+              },
+              {
+                "role": "user",
+                "content": "I love this product!"
+              }
+            ],
+            "output": [
+              {
+                "role": "assistant",
+                "content": "That's great to hear! What product are you referring to? I'd love to know more about it and what you enjoy about it!"
+              }
+            ],
+            "finish_reason": "stop",
+            "model": "gpt-4o-mini-2024-07-18",
+            "usage": {
+              "total_tokens": 48,
+              "completion_tokens": 26,
+              "prompt_tokens": 22,
+              "cached_tokens": 0
+            },
+            "error": null,
+            "temperature": 1.0,
+            "top_p": 1.0
+          }
+        }
+  recorded_at: Thu, 27 Nov 2025 17:57:41 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_retrieve_run_setup.yml b/spec/fixtures/cassettes/evals_runs_output_items_retrieve_run_setup.yml
new file mode 100644
index 00000000..b0641740
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_retrieve_run_setup.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c6f782c8191900a5cb6be3db61e/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:52 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_8024ef0cea996ec159db9512d79643bb
+      Openai-Processing-Ms:
+      - '888'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '891'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=OG6k0pEHTzDFHWR2hZstktrhqxb5xEApbyEXWLX4J8w-1764265072-1.0.1.1-0aVXMbN5qlCvwfOOYv.im0ZQN7B3cb4W52hJd2LtyueNQyeuwczOl6AHFZ5gaPajOy28Kn.BYIMKh1RqUcZZm7mEq_1KRSOaywG2rSXPviQ;
+        path=/; expires=Thu, 27-Nov-25 18:07:52 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=zX24zGeI3V58S_T1gJDemI.15.Ut328syKiyJ8nXK54-1764265072902-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53655af947ba0d-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c70a03881919438f1de10070910",
+          "object": "eval.run",
+          "created_at": 1764265072,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c6f782c8191900a5cb6be3db61e",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c6f782c8191900a5cb6be3db61e?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c70a03881919438f1de10070910",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:52 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_retrieve_setup.yml b/spec/fixtures/cassettes/evals_runs_output_items_retrieve_setup.yml
new file mode 100644
index 00000000..ac064fd8
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_retrieve_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:51 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_db025b54c4df6310390ca1dd18fd378a
+      Openai-Processing-Ms:
+      - '317'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '320'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=udxiDFrwu5WKkGQ9Wvj3P_U_AnoaYhAp9zKt1nDyASU-1764265071-1.0.1.1-r03vlkmGOoO_2v_HrjcqpRTyPZ6rGYjbwYzO.KNtyhCU1Ku4ZRHVSF8mczGOzNlJMakxHihZ3jobRlOUXanSLN18EwHvSiQlZ_b27rr5e.o;
+        path=/; expires=Thu, 27-Nov-25 18:07:51 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=qk9zb8mfB6JGfHeQNy2ogyookxBtkWpGrh_Uhe_b8zA-1764265071717-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365574b9eb3ae-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c6f782c8191900a5cb6be3db61e",
+          "object": "eval",
+          "created_at": 1764265071,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-32bb5cf1-b6c0-4030-ba29-10a1dca004ac",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:51 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_retrieve.yml b/spec/fixtures/cassettes/evals_runs_retrieve.yml
new file mode 100644
index 00000000..2e930997
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_retrieve.yml
@@ -0,0 +1,123 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_69288c6a0c488191ac6adc60180c4d03/runs/evalrun_69288c6a90708191b86c7e82b893c846
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:47 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_46323b933a8c92d4fb5a19503306f7e9
+      Openai-Processing-Ms:
+      - '135'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '138'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=qweBroIdUWsc2kwidjmrBoBjh0AgfmD9HIlYa7dr9U8-1764265067-1.0.1.1-s4MB7.JR0s1NVmhpFZtvnXP.ZjY7LBRRIGftgzUzTdfSaoEYyOXOl5b46LNRRoz_RSSRyhL_aRcnfjC259bzh4VuzydeG3pQrpbPzJ4pImQ;
+        path=/; expires=Thu, 27-Nov-25 18:07:47 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=tN3KJHPGcEeisrKyUp1v_Qxgl0niwbz0WWiaLrsuvfI-1764265067259-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53653c9a130d8b-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c6a90708191b86c7e82b893c846",
+          "object": "eval.run",
+          "created_at": 1764265066,
+          "status": "in_progress",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c6a0c488191ac6adc60180c4d03",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c6a0c488191ac6adc60180c4d03?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c6a90708191b86c7e82b893c846",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:47 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_retrieve_run_setup.yml b/spec/fixtures/cassettes/evals_runs_retrieve_run_setup.yml
new file mode 100644
index 00000000..82827f0e
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_retrieve_run_setup.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c6a0c488191ac6adc60180c4d03/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:46 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_7376dfd4e698e39760ab9f9e861b62ba
+      Openai-Processing-Ms:
+      - '430'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '433'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=Ob0N0Dhz8GBj8HxJrxf5mkfWFLAmmMZQhy1uzMJu0ho-1764265066-1.0.1.1-VrQ2qANnuSoglCiTa7t5h0DFmGk93wFj.RDjRQoZio10n7A0..tRymAKWzWbH0LhYlfiCIMRePofe78ZAi4rtW1D.Pno7zI6O3cB2MDW_2U;
+        path=/; expires=Thu, 27-Nov-25 18:07:46 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=Vtmm6kZeYPTgBS0vw15wIJboU1CXLC3c04o.l7r55W4-1764265066871-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365385d0e89d9-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c6a90708191b86c7e82b893c846",
+          "object": "eval.run",
+          "created_at": 1764265066,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c6a0c488191ac6adc60180c4d03",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c6a0c488191ac6adc60180c4d03?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c6a90708191b86c7e82b893c846",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:46 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_retrieve_setup.yml b/spec/fixtures/cassettes/evals_runs_retrieve_setup.yml
new file mode 100644
index 00000000..ef5954b9
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_retrieve_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:46 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_327229bac7ba039a82b7a1f8e5e9115a
+      Openai-Processing-Ms:
+      - '215'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '218'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=_zFDCgf3us1YhS..PbKumYZeV7ao_jczl7G5ViGrC70-1764265066-1.0.1.1-ugnL3IdDBgW7rjNvXjez6DPCmq0TSqhMfUqfmLGMSE.0rtQu9qKKpxPaGTFdthGabPTwXNlOsfcDSAPRKhUGODnxYZwkKwbdQiOu3xwKaDQ;
+        path=/; expires=Thu, 27-Nov-25 18:07:46 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=hU4FpeK1yL8DXLn7h76pYGgqJNJBbeYLu8_.K2R6ifw-1764265066186-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53653538ffeb1a-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c6a0c488191ac6adc60180c4d03",
+          "object": "eval",
+          "created_at": 1764265066,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-05bd0d3d-fee4-42ce-a2ad-6c9a8ad3d6a1",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:46 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_update.yml b/spec/fixtures/cassettes/evals_update.yml
new file mode 100644
index 00000000..c44a3a0c
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_update.yml
@@ -0,0 +1,276 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c67f948819192f601708f5599d9
+    body:
+      encoding: UTF-8
+      string: '{"metadata":{"modified":"true"}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:44 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_76577432037fb0cc3a49b85a478dd4ea
+      Openai-Processing-Ms:
+      - '250'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '253'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=LquyUkXNYgQfoMPEMVOLWFRNC_qD2hsiye2hDetrVF8-1764265064-1.0.1.1-9VDp9tH4CN_s_mg8W6Vfs8hkXfGh1fQy4keP3IgG719WIFoAGCpfkiaqn4PbLZHYnPziTRgbhsWHEqDbHepqoAmFdKvAx4jPAb.6gBLYRHk;
+        path=/; expires=Thu, 27-Nov-25 18:07:44 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=lZ7wtHiLT.0eAO_IUzVM9XwnK4S.DoHF2kk1gOfdPaE-1764265064596-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53652b4a49d85e-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c67f948819192f601708f5599d9",
+          "object": "eval",
+          "created_at": 1764265063,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-49de7e11-beb1-436c-92da-bc8611135e48",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {
+            "modified": "true"
+          }
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:44 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_update_setup.yml b/spec/fixtures/cassettes/evals_update_setup.yml
new file mode 100644
index 00000000..26fec873
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_update_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:44 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_74c5589fb7844e62af40aaf6e3f72341
+      Openai-Processing-Ms:
+      - '681'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '684'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=VzK9NnpeChQPlG5syc9jH_ssnhnA1FT8uXdbcVhOGBs-1764265064-1.0.1.1-fsILAJMqlR.D5mGnLKpZkjt7NNjmn4tH0JS_Ln1ekBktkhX1ALQtvJL0rP9KGDaqARVgN_dT8bW9IpRvpp1ItulJC16qsrGkuW.8wQKwiCw;
+        path=/; expires=Thu, 27-Nov-25 18:07:44 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=M8ILSssH1N4.yHGKflKGVTVsLIwgAvMibM.p7ZJROas-1764265064110-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365249faaa62f-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c67f948819192f601708f5599d9",
+          "object": "eval",
+          "created_at": 1764265063,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-49de7e11-beb1-436c-92da-bc8611135e48",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:44 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/openai/client/evals_spec.rb b/spec/openai/client/evals_spec.rb
new file mode 100644
index 00000000..fde0a383
--- /dev/null
+++ b/spec/openai/client/evals_spec.rb
@@ -0,0 +1,274 @@
+RSpec.describe OpenAI::Client do
+  describe "#evals" do
+    let(:eval_params) do
+      {
+        name: "Sentiment Analysis",
+        data_source_config: {
+          type: "custom",
+          item_schema: {
+            type: "object",
+            properties: {
+              input: { type: "string" }
+            },
+            required: ["input"]
+          },
+          include_sample_schema: true
+        },
+        testing_criteria: [
+          {
+            type: "label_model",
+            model: "o3-mini",
+            input: [
+              { role: "developer",
+                content: "Classify the sentiment of the following statement " \
+                         "as one of 'positive', 'neutral', or 'negative'" },
+              { role: "user", content: "Statement: {{item.input}}" }
+            ],
+            passing_labels: ["positive"],
+            labels: %w[positive neutral negative],
+            name: "Sentiment grader"
+          }
+        ]
+      }
+    end
+    let(:eval_id) do
+      VCR.use_cassette("#{cassette} setup") do
+        OpenAI::Client.new.evals.create(
+          parameters: eval_params
+        )["id"]
+      end
+    end
+
+    let(:run_params) do
+      {
+        name: "Run 1",
+        data_source: {
+          type: "completions",
+          input_messages: {
+            type: "template",
+            template: [
+              {
+                role: "developer",
+                content: "You are a helpful assistant."
+              },
+              {
+                role: "user",
+                content: "{{item.input}}"
+              }
+            ]
+          },
+          model: "gpt-4o-mini",
+          source: {
+            type: "file_content",
+            content: [
+              {
+                item: {
+                  input: "I love this product!",
+                  ground_truth: "positive"
+                }
+              }
+            ]
+          }
+        }
+      }
+    end
+
+    let(:run_id) do
+      VCR.use_cassette("#{cassette} run setup") do
+        OpenAI::Client.new.evals.runs.create(
+          eval_id: eval_id,
+          parameters: run_params
+        )["id"]
+      end
+    end
+
+    describe "#retrieve" do
+      let(:cassette) { "evals retrieve" }
+      let(:response) { OpenAI::Client.new.evals.retrieve(id: eval_id) }
+
+      it "succeeds" do
+        VCR.use_cassette(cassette) do
+          expect(response["object"]).to eq("eval")
+          expect(response["id"]).to eq(eval_id)
+        end
+      end
+    end
+
+    describe "#create" do
+      let(:cassette) { "evals create" }
+      let(:response) do
+        OpenAI::Client.new.evals.create(
+          parameters: eval_params
+        )
+      end
+
+      it "succeeds" do
+        VCR.use_cassette(cassette) do
+          expect(response["object"]).to eq("eval")
+          expect(response["name"]).to eq("Sentiment Analysis")
+        end
+      end
+    end
+
+    describe "#update" do
+      let(:cassette) { "evals update" }
+      let(:response) do
+        OpenAI::Client.new.evals.update(
+          id: eval_id,
+          parameters: { metadata: { modified: "true" } }
+        )
+      end
+
+      it "succeeds" do
+        VCR.use_cassette(cassette) do
+          expect(response["object"]).to eq("eval")
+        end
+      end
+    end
+    describe "#list", :vcr do
+      let(:cassette) { "evals list" }
+      let(:response) { OpenAI::Client.new.evals.list }
+
+      before { eval_id }
+
+      it "succeeds" do
+        VCR.use_cassette(cassette) do
+          expect(response["object"]).to eq("list")
+          expect(response["data"]).to be_an(Array)
+          expect(response.dig("data", 0, "object")).to eq("eval") if response["data"].any?
+        end
+      end
+    end
+
+    describe "#runs" do
+      describe "#retrieve" do
+        let(:cassette) { "evals runs retrieve" }
+        let(:response) do
+          OpenAI::Client.new.evals.runs.retrieve(
+            eval_id: eval_id,
+            id: run_id
+          )
+        end
+
+        it "succeeds" do
+          VCR.use_cassette(cassette) do
+            expect(response["object"]).to eq("eval.run")
+            expect(response["id"]).to eq(run_id)
+            expect(response["eval_id"]).to eq(eval_id)
+          end
+        end
+      end
+
+      describe "#create" do
+        let(:cassette) { "evals runs create" }
+        let(:response) do
+          OpenAI::Client.new.evals.runs.create(
+            eval_id: eval_id,
+            parameters: run_params
+          )
+        end
+
+        it "succeeds" do
+          VCR.use_cassette(cassette) do
+            expect(response["object"]).to eq("eval.run")
+            expect(response["eval_id"]).to eq(eval_id)
+            expect(response["name"]).to eq("Run 1")
+          end
+        end
+      end
+
+      describe "#output_items" do
+        describe "#list", :vcr do
+          let(:cassette) { "evals runs output_items list" }
+          let(:response) do
+            OpenAI::Client.new.evals.runs.output_items.list(
+              eval_id: eval_id,
+              run_id: run_id
+            )
+          end
+
+          it "succeeds" do
+            VCR.use_cassette(cassette) do
+              expect(response["object"]).to eq("list")
+              expect(response["data"]).to be_an(Array)
+            end
+          end
+        end
+
+        describe "#retrieve" do
+          let(:cassette) { "evals runs output_items retrieve" }
+          let(:output_item_id) do
+            OpenAI::Client.new.evals.runs.output_items.list(
+              eval_id: eval_id,
+              run_id: run_id
+            )["data"].first["id"]
+          end
+          let(:response) do
+            OpenAI::Client.new.evals.runs.output_items.retrieve(
+              eval_id: eval_id,
+              run_id: run_id,
+              id: output_item_id
+            )
+          end
+
+          it "succeeds" do
+            VCR.use_cassette(cassette) do
+              expect(response["object"]).to eq("eval.run.output_item")
+              expect(response["id"]).to eq(output_item_id)
+            end
+          end
+        end
+      end
+
+      describe "#cancel" do
+        let(:cassette) { "evals runs cancel" }
+        let(:response) do
+          OpenAI::Client.new.evals.runs.cancel(
+            eval_id: eval_id,
+            id: run_id
+          )
+        end
+
+        it "succeeds" do
+          VCR.use_cassette(cassette) do
+            expect(response["object"]).to eq("eval.run")
+            expect(response["status"]).to eq("canceled")
+          end
+        end
+      end
+
+      describe "#delete" do
+        let(:cassette) { "evals runs delete" }
+        let(:response) do
+          OpenAI::Client.new.evals.runs.cancel(
+            eval_id: eval_id,
+            id: run_id
+          )
+
+          OpenAI::Client.new.evals.runs.delete(
+            eval_id: eval_id,
+            id: run_id
+          )
+        end
+
+        it "succeeds" do
+          VCR.use_cassette(cassette) do
+            expect(response["object"]).to eq("eval.run.deleted")
+          end
+        end
+      end
+    end
+
+    describe "#delete" do
+      let(:cassette) { "evals delete" }
+      let(:response) do
+        OpenAI::Client.new.evals.delete(id: eval_id)
+      end
+      it "succeeds" do
+        VCR.use_cassette(cassette) do
+          expect(response["object"]).to eq("eval.deleted")
+        end
+      end
+    end
+  end
+end

From 9d2f21cf28b81dc48bb22c83f5b9d4c4957931af Mon Sep 17 00:00:00 2001
From: Juan Arboleda <35846576+alzeck@users.noreply.github.com>
Date: Thu, 27 Nov 2025 18:40:19 +0000
Subject: [PATCH 2/5] add list runs and docs

---
 README.md                                     | 260 ++++++++++++++++
 lib/openai/evals.rb                           |   4 +
 spec/fixtures/cassettes/evals_runs_list.yml   | 131 +++++++++
 .../cassettes/evals_runs_list_run_setup.yml   | 125 ++++++++
 .../cassettes/evals_runs_list_setup.yml       | 277 ++++++++++++++++++
 spec/openai/client/evals_spec.rb              |  15 +
 6 files changed, 812 insertions(+)
 create mode 100644 spec/fixtures/cassettes/evals_runs_list.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_list_run_setup.yml
 create mode 100644 spec/fixtures/cassettes/evals_runs_list_setup.yml

diff --git a/README.md b/README.md
index a2021daf..570e8e36 100644
--- a/README.md
+++ b/README.md
@@ -84,6 +84,19 @@ Stream GPT-5 chats with the Responses API, initiate Realtime WebRTC conversation
       - [Vision in a thread](#vision-in-a-thread)
       - [Runs involving function tools](#runs-involving-function-tools)
       - [Exploring chunks used in File Search](#exploring-chunks-used-in-file-search)
+    - [Evals](#evals)
+      - [Create an Eval](#create-an-eval)
+      - [Retrieve an Eval](#retrieve-an-eval)
+      - [List Evals](#list-evals)
+      - [Update an Eval](#update-an-eval)
+      - [Delete an Eval](#delete-an-eval)
+      - [Create an Eval Run](#create-an-eval-run)
+      - [List Eval Runs](#list-eval-runs)
+      - [Retrieve an Eval Run](#retrieve-an-eval-run)
+      - [Cancel an Eval Run](#cancel-an-eval-run)
+      - [Delete an Eval Run](#delete-an-eval-run)
+      - [List Output Items](#list-output-items)
+      - [Retrieve an Output Item](#retrieve-an-output-item)
     - [Image Generation](#image-generation)
       - [DALL·E 2](#dalle-2)
       - [DALL·E 3](#dalle-3)
@@ -1669,6 +1682,253 @@ end.compact
 client.messages.list(thread_id: thread_id)
 ```
 
+### Evals
+
+Evals allow you to systematically evaluate the quality and performance of your AI models. You can create evaluations with specific testing criteria, run them against your models, and analyze the results.
+
+#### Create an Eval
+
+Create an evaluation with testing criteria to assess model outputs:
+
+```ruby
+response = client.evals.create(
+  parameters: {
+    name: "Sentiment Analysis Eval",
+    data_source_config: {
+      type: "stored_completions",
+      metadata: { usecase: "chatbot" }
+    },
+    testing_criteria: [
+      {
+        type: "label_model",
+        model: "o3-mini",
+        input: [
+          {
+            role: "developer",
+            content: "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+          },
+          {
+            role: "user",
+            content: "Statement: {{item.input}}"
+          }
+        ],
+        passing_labels: ["positive"],
+        labels: ["positive", "neutral", "negative"],
+        name: "Sentiment grader"
+      }
+    ],
+    metadata: { team: "product", version: "1.0" }
+  }
+)
+puts response["id"]
+# => "eval_abc123"
+```
+
+#### Retrieve an Eval
+
+Get details about a specific evaluation:
+
+```ruby
+eval_id = "eval_abc123"
+response = client.evals.retrieve(id: eval_id)
+puts response["name"]
+# => "Sentiment Analysis Eval"
+```
+
+#### List Evals
+
+List all evaluations with optional pagination:
+
+```ruby
+# List all evals
+response = client.evals.list
+
+# List with limit
+response = client.evals.list(parameters: { limit: 10 })
+
+# List with pagination
+response = client.evals.list(parameters: { after: "eval_abc123", limit: 20 })
+```
+
+#### Update an Eval
+
+Update an evaluation's metadata:
+
+```ruby
+response = client.evals.update(
+  id: eval_id,
+  parameters: {
+    metadata: { version: "2.0", updated: "true" }
+  }
+)
+```
+
+#### Delete an Eval
+
+Delete an evaluation:
+
+```ruby
+response = client.evals.delete(id: eval_id)
+puts response["deleted"]
+# => true
+```
+
+#### Create an Eval Run
+
+Run an evaluation against a model with test data:
+
+```ruby
+response = client.evals.runs.create(
+  eval_id: eval_id,
+  parameters: {
+    name: "gpt-4o-mini baseline",
+    data_source: {
+      type: "completions",
+      input_messages: {
+        type: "template",
+        template: [
+          {
+            role: "system",
+            content: "You are a sentiment analyzer. Respond with only: positive, neutral, or negative."
+          },
+          {
+            role: "user",
+            content: "{{item.input}}"
+          }
+        ]
+      },
+      sampling_params: {
+        temperature: 0.7,
+        max_completion_tokens: 50,
+        top_p: 1.0
+      },
+      model: "gpt-4o-mini",
+      source: {
+        type: "file_content",
+        content: [
+          {
+            item: {
+              input: "I absolutely love this product! Best purchase ever.",
+              ground_truth: "positive"
+            }
+          },
+          {
+            item: {
+              input: "This is terrible. Very disappointed.",
+              ground_truth: "negative"
+            }
+          },
+          {
+            item: {
+              input: "It's okay, nothing special.",
+              ground_truth: "neutral"
+            }
+          }
+        ]
+      }
+    },
+    metadata: { experiment: "baseline", date: "2024-01-15" }
+  }
+)
+puts response["id"]
+# => "evalrun_xyz789"
+```
+
+#### List Eval Runs
+
+List all runs for a specific evaluation:
+
+```ruby
+# List all runs
+response = client.evals.runs.list(eval_id: eval_id)
+
+# List with limit
+response = client.evals.runs.list(
+  eval_id: eval_id,
+  parameters: { limit: 10 }
+)
+
+# List with pagination
+response = client.evals.runs.list(
+  eval_id: eval_id,
+  parameters: { after: "evalrun_abc123", limit: 20 }
+)
+```
+
+#### Retrieve an Eval Run
+
+Get details about a specific evaluation run:
+
+```ruby
+run_id = "evalrun_xyz789"
+response = client.evals.runs.retrieve(
+  eval_id: eval_id,
+  id: run_id
+)
+puts response["status"]
+# => "completed"
+```
+
+#### Cancel an Eval Run
+
+Cancel a running evaluation:
+
+```ruby
+response = client.evals.runs.cancel(
+  eval_id: eval_id,
+  id: run_id
+)
+puts response["status"]
+# => "canceled"
+```
+
+#### Delete an Eval Run
+
+Delete an evaluation run:
+
+```ruby
+response = client.evals.runs.delete(
+  eval_id: eval_id,
+  id: run_id
+)
+puts response["deleted"]
+# => true
+```
+
+#### List Output Items
+
+Retrieve the output items from an evaluation run:
+
+```ruby
+# List all output items
+response = client.evals.runs.output_items.list(
+  eval_id: eval_id,
+  run_id: run_id
+)
+
+# List with pagination
+response = client.evals.runs.output_items.list(
+  eval_id: eval_id,
+  run_id: run_id,
+  parameters: { limit: 10, after: "item_abc123" }
+)
+```
+
+#### Retrieve an Output Item
+
+Get details about a specific output item:
+
+```ruby
+output_item_id = "item_abc123"
+response = client.evals.runs.output_items.retrieve(
+  eval_id: eval_id,
+  run_id: run_id,
+  id: output_item_id
+)
+puts response["status"]
+# => "pass"
+```
+
 ### Image Generation
 
 Generate images using DALL·E 2 or DALL·E 3!
diff --git a/lib/openai/evals.rb b/lib/openai/evals.rb
index bbbbdb95..cb6927e0 100644
--- a/lib/openai/evals.rb
+++ b/lib/openai/evals.rb
@@ -41,6 +41,10 @@ def retrieve(eval_id:, id:)
         @client.get(path: "/evals/#{eval_id}/runs/#{id}")
       end
 
+      def list(eval_id:, parameters: {})
+        @client.get(path: "/evals/#{eval_id}/runs", parameters: parameters)
+      end
+
       def cancel(eval_id:, id:)
         @client.post(path: "/evals/#{eval_id}/runs/#{id}/cancel")
       end
diff --git a/spec/fixtures/cassettes/evals_runs_list.yml b/spec/fixtures/cassettes/evals_runs_list.yml
new file mode 100644
index 00000000..5fdc435f
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_list.yml
@@ -0,0 +1,131 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_6928994e3c788191aa5575493ab58226/runs
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 18:32:48 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_4014e25ebde4430ae5ff17e80598937c
+      Openai-Processing-Ms:
+      - '310'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '313'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=RCxCb9hNoDNLsT8SSz.QWaZ16OE3wz0H2OOVgObF0Hg-1764268368-1.0.1.1-xbuw3j1YAdWfl2qFpzspmbbwU220pO9LL4W14d4GqWvMorzobwUPp373M6RGG4obrYtV.kSFHFeBERs2yiVxllmvySVXHgXIeF1WqLf99zc;
+        path=/; expires=Thu, 27-Nov-25 19:02:48 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=69e_irZTWWiJst8RToJInC_xiIGqs6FHMv9GIZeo2cA-1764268368400-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53b5d36820cd1a-LHR
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "list",
+          "data": [
+            {
+              "id": "evalrun_6928994f39d88191b9a47a69de5eda51",
+              "object": "eval.run",
+              "created_at": 1764268367,
+              "status": "in_progress",
+              "data_source": {
+                "type": "completions",
+                "source": {
+                  "type": "file_content",
+                  "content": [
+                    {
+                      "item": {
+                        "input": "I love this product!",
+                        "ground_truth": "positive"
+                      }
+                    }
+                  ]
+                },
+                "input_messages": {
+                  "type": "template",
+                  "template": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "You are a helpful assistant."
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "{{item.input}}"
+                    }
+                  ]
+                },
+                "model": "gpt-4o-mini",
+                "provider_credentials": null,
+                "modalities": null,
+                "sampling_params": null
+              },
+              "error": null,
+              "eval_id": "eval_6928994e3c788191aa5575493ab58226",
+              "model": "gpt-4o-mini",
+              "name": "Run 1",
+              "per_model_usage": null,
+              "per_testing_criteria_results": null,
+              "report_url": "https://platform.openai.com/evaluations/eval_6928994e3c788191aa5575493ab58226?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_6928994f39d88191b9a47a69de5eda51",
+              "result_counts": {
+                "errored": 0,
+                "failed": 0,
+                "passed": 0,
+                "total": 0
+              },
+              "shared_with_openai": false,
+              "metadata": {}
+            }
+          ],
+          "first_id": "evalrun_6928994f39d88191b9a47a69de5eda51",
+          "has_more": false,
+          "last_id": "evalrun_6928994f39d88191b9a47a69de5eda51"
+        }
+  recorded_at: Thu, 27 Nov 2025 18:32:48 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_list_run_setup.yml b/spec/fixtures/cassettes/evals_runs_list_run_setup.yml
new file mode 100644
index 00000000..8e749f19
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_list_run_setup.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_6928994e3c788191aa5575493ab58226/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 18:32:47 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_21708b12e8268ffa47de3abd79ef2344
+      Openai-Processing-Ms:
+      - '1031'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '1034'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=i_CIB19VZtHAv_rk8skB4htrkC04jLpYcMScJYKzV_E-1764268367-1.0.1.1-y91vztSj2kUhklMWvqJgYdGsGk3Y.SfmrPoB7FAykNzgLIYqeZaZo12Df_dARe4utNy7jWI0novwYwAHqlaiVppfxpCHqlTThEom5TV3eOY;
+        path=/; expires=Thu, 27-Nov-25 19:02:47 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=h_NO79XlA6.gBFloHWFMOow1Kg0nS3yu.4GAJNFY6A0-1764268367770-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53b5caee33dfb4-LHR
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_6928994f39d88191b9a47a69de5eda51",
+          "object": "eval.run",
+          "created_at": 1764268367,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_6928994e3c788191aa5575493ab58226",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_6928994e3c788191aa5575493ab58226?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_6928994f39d88191b9a47a69de5eda51",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 18:32:47 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_list_setup.yml b/spec/fixtures/cassettes/evals_runs_list_setup.yml
new file mode 100644
index 00000000..d58f89ff
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_list_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 18:32:46 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_7761586e9b3dc966280851030e885ca3
+      Openai-Processing-Ms:
+      - '687'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '689'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=z3MpTJQQJDq8NpDy1Mgky8PcQ917uOEFzRSZzw_TX90-1764268366-1.0.1.1-2l4GXx9_Ul92nzE08ZGg4Re_dYImAYbYaL7O5z_qzmA067mtyGUhPYNQcLmINwRl76cW14HWWE9cMJheDF7xor28wYGbW1SjWY4D4_s0wxI;
+        path=/; expires=Thu, 27-Nov-25 19:02:46 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=qkT5usGvz0OmPbVRs3HPJ7EptZZee9PslFmbV9UvVac-1764268366427-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53b5c47d22f816-LHR
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_6928994e3c788191aa5575493ab58226",
+          "object": "eval",
+          "created_at": 1764268366,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-2e66f46a-0407-4cc5-bd42-a1c5cde44f6c",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 18:32:46 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/openai/client/evals_spec.rb b/spec/openai/client/evals_spec.rb
index fde0a383..d79289c0 100644
--- a/spec/openai/client/evals_spec.rb
+++ b/spec/openai/client/evals_spec.rb
@@ -141,6 +141,21 @@
     end
 
     describe "#runs" do
+      describe "#list", :vcr do
+        let(:cassette) { "evals runs list" }
+        let(:response) { OpenAI::Client.new.evals.runs.list(eval_id: eval_id) }
+
+        before { run_id }
+
+        it "succeeds" do
+          VCR.use_cassette(cassette) do
+            expect(response["object"]).to eq("list")
+            expect(response["data"]).to be_an(Array)
+            expect(response.dig("data", 0, "object")).to eq("eval.run") if response["data"].any?
+          end
+        end
+      end
+
       describe "#retrieve" do
         let(:cassette) { "evals runs retrieve" }
         let(:response) do

From cb6642d98278668f406af23ccfd91f85d4aeb422 Mon Sep 17 00:00:00 2001
From: Juan Arboleda <35846576+alzeck@users.noreply.github.com>
Date: Thu, 27 Nov 2025 18:55:06 +0000
Subject: [PATCH 3/5] reduce lines for evals list

---
 spec/fixtures/cassettes/evals_list.yml       | 5159 +-----------------
 spec/fixtures/cassettes/evals_list_setup.yml |   24 +-
 2 files changed, 27 insertions(+), 5156 deletions(-)

diff --git a/spec/fixtures/cassettes/evals_list.yml b/spec/fixtures/cassettes/evals_list.yml
index 54873fc8..40a5f998 100644
--- a/spec/fixtures/cassettes/evals_list.yml
+++ b/spec/fixtures/cassettes/evals_list.yml
@@ -23,7 +23,7 @@ http_interactions:
       message: OK
     headers:
       Date:
-      - Thu, 27 Nov 2025 17:37:45 GMT
+      - Thu, 27 Nov 2025 18:53:28 GMT
       Content-Type:
       - application/json
       Transfer-Encoding:
@@ -35,13 +35,13 @@ http_interactions:
       Openai-Organization:
       - user-jxm65ijkzc1qrfhc0ij8moic
       X-Request-Id:
-      - req_09908134b92384fc64c2e1a044fc1b8f
+      - req_fb43ab73c5cedacfd3da4797f457f98f
       Openai-Processing-Ms:
-      - '349'
+      - '219'
       Vary:
       - Accept-Encoding
       X-Envoy-Upstream-Service-Time:
-      - '365'
+      - '221'
       X-Openai-Proxy-Wasm:
       - v0.1
       Strict-Transport-Security:
@@ -49,17 +49,17 @@ http_interactions:
       Cf-Cache-Status:
       - DYNAMIC
       Set-Cookie:
-      - __cf_bm=a9lHm_ooC54LUqxpoUst1vazPY71OQaHdrCBcnTLJ8Y-1764265065-1.0.1.1-PvPVksJlQfOKII3YuqWv76rdDKlmg3Af7t.kcEILylSWKihcvR2SUFc.At3ilkSNU3DxtN1PWnAFTzSeGiuIxObyz7ifqWs2aR6jOudJDM0;
-        path=/; expires=Thu, 27-Nov-25 18:07:45 GMT; domain=.api.openai.com; HttpOnly;
+      - __cf_bm=KRMsUubnpJCqt86K8dPwv6XLqF3.4SbEgig4dkt7RRI-1764269608-1.0.1.1-_G07YEtDCfSxGTL9A8U8sbKS89nOibkLGjMQX4Jldo0PVN8nGeQ2Zwi4BbbkQZeADFqI8PULxKJ4FjeGEqUXLVe4Aydb4r.pit5qX.WyHpc;
+        path=/; expires=Thu, 27-Nov-25 19:23:28 GMT; domain=.api.openai.com; HttpOnly;
         Secure; SameSite=None
-      - _cfuvid=7IH9dxNsKoIkP3ir_C4yjxfvA8TzZJyRidP6c.KKy7w-1764265065684-0.0.1.1-604800000;
+      - _cfuvid=5YZOeZ8X7F73apsyWvgmXwsk44AQOl0d3728FzPciXA-1764269608650-0.0.1.1-604800000;
         path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
       X-Content-Type-Options:
       - nosniff
       Server:
       - cloudflare
       Cf-Ray:
-      - 9a5365315dbb97f8-MAN
+      - 9a53d41a0bc70764-MAN
       Alt-Svc:
       - h3=":443"; ma=86400
     body:
@@ -69,9 +69,9 @@ http_interactions:
           "object": "list",
           "data": [
             {
-              "id": "eval_692886ca71948191a94a46ef2866fa38",
+              "id": "eval_69289e005d008191bc07606b2ceb522c",
               "object": "eval",
-              "created_at": 1764263626,
+              "created_at": 1764269568,
               "data_source_config": {
                 "type": "custom",
                 "max_items": null,
@@ -141,37 +141,6 @@ http_interactions:
                                   "role"
                                 ],
                                 "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
                                   "tool_calls": {
                                     "items": {
                                       "required": [
@@ -212,146 +181,6 @@ http_interactions:
                                       "null"
                                     ]
                                   },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-9ec23aee-5784-4532-b734-4eaa1441c1b4",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_692886c81f0c81918301673fd48074e3",
-              "object": "eval",
-              "created_at": 1764263624,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
                                   "function_call": {
                                     "required": [
                                       "name",
@@ -377,4972 +206,17 @@ http_interactions:
                                       "null"
                                     ]
                                   },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
                                   "refusal": {
                                     "type": [
                                       "boolean",
                                       "null"
                                     ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-b605af03-d2b3-4219-aabc-bdbbd02864c3",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_692886c607dc819198c3b71cbf375088",
-              "object": "eval",
-              "created_at": 1764263622,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-5d95bb83-b098-4c8c-9ccc-1f327b7c680f",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_692886c4402c8191b16daf0a13927d55",
-              "object": "eval",
-              "created_at": 1764263620,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-a5f01088-9cd6-484c-b16c-4239b8ce247f",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_692886c286c08191a59229de96f2519c",
-              "object": "eval",
-              "created_at": 1764263618,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-6ab333ba-8b63-4868-ba3d-8444b9f0b788",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_692886c0c2e88191b175224e13418b17",
-              "object": "eval",
-              "created_at": 1764263616,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-ea13e537-3285-4116-87ea-d26232d4e433",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_692886bf65fc8191991a4848d677e502",
-              "object": "eval",
-              "created_at": 1764263615,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-ecffb0f6-e04c-470b-a089-71512d3f7e2e",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_692886be83748191aa1ecb05c5db958e",
-              "object": "eval",
-              "created_at": 1764263614,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-ec2a502e-e03d-4b4e-920f-c9996764023a",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {
-                "modified": "true"
-              }
-            },
-            {
-              "id": "eval_692886bd90d8819198391114178d4134",
-              "object": "eval",
-              "created_at": 1764263613,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-a8d9c70b-5062-40ad-b882-6d7d2b4835d7",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_692886bc33dc81919ec1696f9e931ff4",
-              "object": "eval",
-              "created_at": 1764263612,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-1bbd590e-ef4f-462e-a050-094b3925482c",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_69288535779c8191a0bed3405c7a72ab",
-              "object": "eval",
-              "created_at": 1764263221,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-9f9e21ad-adcd-4f8e-b359-b9aa6a34a974",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_692885332d10819180c74603e6462a4c",
-              "object": "eval",
-              "created_at": 1764263219,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-3b0cec65-bd94-41a5-a67c-894300044c4a",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_69288530792c819181f87315e2cf7a98",
-              "object": "eval",
-              "created_at": 1764263216,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-8d9f1ee8-2ca7-4ee7-9775-84fd81f188ac",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_6928852eacfc8191939c094b7bf768a3",
-              "object": "eval",
-              "created_at": 1764263214,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-261fabf9-7bb5-497b-8103-a989a3d5780a",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_6928852cfc9c81918d7e5fee3762e782",
-              "object": "eval",
-              "created_at": 1764263212,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-6b2951b4-82b6-4471-b494-2f362485c8ba",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_6928852ae6588191801b3c819b2ec864",
-              "object": "eval",
-              "created_at": 1764263210,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-27b6b470-b897-47a2-84a5-bae598fe8475",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_692885294e408191883f13c67b96cc77",
-              "object": "eval",
-              "created_at": 1764263209,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-724f7a30-b74b-49d5-8fbc-abe93f386347",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_69288527f2548191a4a1550aa8ae4962",
-              "object": "eval",
-              "created_at": 1764263207,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-0abe0176-b62b-4dd6-96a1-3f30623a53f4",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {
-                "modified": "true"
-              }
-            },
-            {
-              "id": "eval_692885254d4c8191b44f0366019b69c4",
-              "object": "eval",
-              "created_at": 1764263205,
-              "data_source_config": {
-                "type": "custom",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input"
-                      ],
-                      "properties": {
-                        "input": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
-                                  "role": {
-                                    "enum": [
-                                      "assistant"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
-                                  }
-                                },
-                                "type": "object"
-                              },
-                              "finish_reason": {
-                                "type": "string"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_text": {
-                          "type": "string"
-                        }
-                      },
-                      "type": "object"
-                    }
-                  },
-                  "type": "object"
-                }
-              },
-              "name": "Sentiment Analysis",
-              "testing_criteria": [
-                {
-                  "id": "Sentiment grader-d2a3c749-e56a-425d-8adf-8ec9ae2d2b45",
-                  "type": "label_model",
-                  "grdr_id": null,
-                  "inactive_at": null,
-                  "input": [
-                    {
-                      "type": "message",
-                      "role": "developer",
-                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
-                    },
-                    {
-                      "type": "message",
-                      "role": "user",
-                      "content": "Statement: {{item.input}}"
-                    }
-                  ],
-                  "labels": [
-                    "positive",
-                    "neutral",
-                    "negative"
-                  ],
-                  "model": "o3-mini",
-                  "name": "Sentiment grader",
-                  "passing_labels": [
-                    "positive"
-                  ],
-                  "sampling_params": null
-                }
-              ],
-              "metadata": {}
-            },
-            {
-              "id": "eval_69288313e4408191922bf1863f2ba432",
-              "object": "eval",
-              "created_at": 1764262675,
-              "data_source_config": {
-                "type": "logs",
-                "max_items": null,
-                "schema": {
-                  "required": [
-                    "item",
-                    "sample"
-                  ],
-                  "properties": {
-                    "item": {
-                      "required": [
-                        "input",
-                        "output"
-                      ],
-                      "title": "LogsItemSchema",
-                      "properties": {
-                        "output": {
-                          "items": {
-                            "required": [
-                              "model",
-                              "output"
-                            ],
-                            "title": "ResponseInputSample",
-                            "properties": {
-                              "model": {
-                                "title": "Model",
-                                "type": "string"
-                              },
-                              "output": {
-                                "items": {
-                                  "required": [
-                                    "role",
-                                    "content"
-                                  ],
-                                  "title": "ChatMessage",
-                                  "properties": {
-                                    "tool_call_id": {
-                                      "default": null,
-                                      "title": "Tool Call Id",
-                                      "anyOf": [
-                                        {
-                                          "type": "string"
-                                        },
-                                        {
-                                          "type": "null"
-                                        }
-                                      ]
-                                    },
-                                    "finish_reason": {
-                                      "default": null,
-                                      "title": "Finish Reason",
-                                      "anyOf": [
-                                        {
-                                          "enum": [
-                                            "stop",
-                                            "length",
-                                            "tool_calls",
-                                            "content_filter",
-                                            "function_call"
-                                          ],
-                                          "type": "string"
-                                        },
-                                        {
-                                          "type": "null"
-                                        }
-                                      ]
-                                    },
-                                    "content": {
-                                      "title": "Content",
-                                      "anyOf": [
-                                        {
-                                          "type": "string"
-                                        },
-                                        {
-                                          "items": {
-                                            "anyOf": [
-                                              {
-                                                "required": [
-                                                  "text",
-                                                  "type"
-                                                ],
-                                                "additionalProperties": true,
-                                                "title": "ResponseInputText",
-                                                "properties": {
-                                                  "text": {
-                                                    "title": "Text",
-                                                    "type": "string"
-                                                  },
-                                                  "type": {
-                                                    "const": "input_text",
-                                                    "title": "Type",
-                                                    "type": "string"
-                                                  }
-                                                },
-                                                "type": "object"
-                                              },
-                                              {
-                                                "required": [
-                                                  "detail",
-                                                  "type"
-                                                ],
-                                                "additionalProperties": true,
-                                                "title": "ResponseInputImage",
-                                                "properties": {
-                                                  "file_id": {
-                                                    "default": null,
-                                                    "title": "File Id",
-                                                    "anyOf": [
-                                                      {
-                                                        "type": "string"
-                                                      },
-                                                      {
-                                                        "type": "null"
-                                                      }
-                                                    ]
-                                                  },
-                                                  "detail": {
-                                                    "title": "Detail",
-                                                    "enum": [
-                                                      "low",
-                                                      "high",
-                                                      "auto"
-                                                    ],
-                                                    "type": "string"
-                                                  },
-                                                  "type": {
-                                                    "const": "input_image",
-                                                    "title": "Type",
-                                                    "type": "string"
-                                                  },
-                                                  "image_url": {
-                                                    "default": null,
-                                                    "title": "Image Url",
-                                                    "anyOf": [
-                                                      {
-                                                        "type": "string"
-                                                      },
-                                                      {
-                                                        "type": "null"
-                                                      }
-                                                    ]
-                                                  }
-                                                },
-                                                "type": "object"
-                                              },
-                                              {
-                                                "required": [
-                                                  "annotations",
-                                                  "text",
-                                                  "type"
-                                                ],
-                                                "additionalProperties": true,
-                                                "title": "ResponseOutputText",
-                                                "properties": {
-                                                  "text": {
-                                                    "title": "Text",
-                                                    "type": "string"
-                                                  },
-                                                  "type": {
-                                                    "const": "output_text",
-                                                    "title": "Type",
-                                                    "type": "string"
-                                                  },
-                                                  "logprobs": {
-                                                    "default": null,
-                                                    "title": "Logprobs",
-                                                    "anyOf": [
-                                                      {
-                                                        "items": {
-                                                          "required": [
-                                                            "token",
-                                                            "bytes",
-                                                            "logprob",
-                                                            "top_logprobs"
-                                                          ],
-                                                          "additionalProperties": true,
-                                                          "title": "Logprob",
-                                                          "properties": {
-                                                            "bytes": {
-                                                              "items": {
-                                                                "type": "integer"
-                                                              },
-                                                              "title": "Bytes",
-                                                              "type": "array"
-                                                            },
-                                                            "token": {
-                                                              "title": "Token",
-                                                              "type": "string"
-                                                            },
-                                                            "top_logprobs": {
-                                                              "items": {
-                                                                "required": [
-                                                                  "token",
-                                                                  "bytes",
-                                                                  "logprob"
-                                                                ],
-                                                                "additionalProperties": true,
-                                                                "title": "LogprobTopLogprob",
-                                                                "properties": {
-                                                                  "bytes": {
-                                                                    "items": {
-                                                                      "type": "integer"
-                                                                    },
-                                                                    "title": "Bytes",
-                                                                    "type": "array"
-                                                                  },
-                                                                  "token": {
-                                                                    "title": "Token",
-                                                                    "type": "string"
-                                                                  },
-                                                                  "logprob": {
-                                                                    "title": "Logprob",
-                                                                    "type": "number"
-                                                                  }
-                                                                },
-                                                                "type": "object"
-                                                              },
-                                                              "title": "Top Logprobs",
-                                                              "type": "array"
-                                                            },
-                                                            "logprob": {
-                                                              "title": "Logprob",
-                                                              "type": "number"
-                                                            }
-                                                          },
-                                                          "type": "object"
-                                                        },
-                                                        "type": "array"
-                                                      },
-                                                      {
-                                                        "type": "null"
-                                                      }
-                                                    ]
-                                                  },
-                                                  "annotations": {
-                                                    "items": {
-                                                      "anyOf": [
-                                                        {
-                                                          "required": [
-                                                            "file_id",
-                                                            "filename",
-                                                            "index",
-                                                            "type"
-                                                          ],
-                                                          "additionalProperties": true,
-                                                          "title": "AnnotationFileCitation",
-                                                          "properties": {
-                                                            "file_id": {
-                                                              "title": "File Id",
-                                                              "type": "string"
-                                                            },
-                                                            "index": {
-                                                              "title": "Index",
-                                                              "type": "integer"
-                                                            },
-                                                            "type": {
-                                                              "const": "file_citation",
-                                                              "title": "Type",
-                                                              "type": "string"
-                                                            },
-                                                            "filename": {
-                                                              "title": "Filename",
-                                                              "type": "string"
-                                                            }
-                                                          },
-                                                          "type": "object"
-                                                        },
-                                                        {
-                                                          "required": [
-                                                            "end_index",
-                                                            "start_index",
-                                                            "title",
-                                                            "type",
-                                                            "url"
-                                                          ],
-                                                          "additionalProperties": true,
-                                                          "title": "AnnotationURLCitation",
-                                                          "properties": {
-                                                            "start_index": {
-                                                              "title": "Start Index",
-                                                              "type": "integer"
-                                                            },
-                                                            "end_index": {
-                                                              "title": "End Index",
-                                                              "type": "integer"
-                                                            },
-                                                            "title": {
-                                                              "title": "Title",
-                                                              "type": "string"
-                                                            },
-                                                            "type": {
-                                                              "const": "url_citation",
-                                                              "title": "Type",
-                                                              "type": "string"
-                                                            },
-                                                            "url": {
-                                                              "title": "Url",
-                                                              "type": "string"
-                                                            }
-                                                          },
-                                                          "type": "object"
-                                                        },
-                                                        {
-                                                          "required": [
-                                                            "container_id",
-                                                            "end_index",
-                                                            "file_id",
-                                                            "filename",
-                                                            "start_index",
-                                                            "type"
-                                                          ],
-                                                          "additionalProperties": true,
-                                                          "title": "AnnotationContainerFileCitation",
-                                                          "properties": {
-                                                            "start_index": {
-                                                              "title": "Start Index",
-                                                              "type": "integer"
-                                                            },
-                                                            "end_index": {
-                                                              "title": "End Index",
-                                                              "type": "integer"
-                                                            },
-                                                            "type": {
-                                                              "const": "container_file_citation",
-                                                              "title": "Type",
-                                                              "type": "string"
-                                                            },
-                                                            "filename": {
-                                                              "title": "Filename",
-                                                              "type": "string"
-                                                            },
-                                                            "file_id": {
-                                                              "title": "File Id",
-                                                              "type": "string"
-                                                            },
-                                                            "container_id": {
-                                                              "title": "Container Id",
-                                                              "type": "string"
-                                                            }
-                                                          },
-                                                          "type": "object"
-                                                        },
-                                                        {
-                                                          "required": [
-                                                            "file_id",
-                                                            "index",
-                                                            "type"
-                                                          ],
-                                                          "additionalProperties": true,
-                                                          "title": "AnnotationFilePath",
-                                                          "properties": {
-                                                            "file_id": {
-                                                              "title": "File Id",
-                                                              "type": "string"
-                                                            },
-                                                            "index": {
-                                                              "title": "Index",
-                                                              "type": "integer"
-                                                            },
-                                                            "type": {
-                                                              "const": "file_path",
-                                                              "title": "Type",
-                                                              "type": "string"
-                                                            }
-                                                          },
-                                                          "type": "object"
-                                                        }
-                                                      ]
-                                                    },
-                                                    "title": "Annotations",
-                                                    "type": "array"
-                                                  }
-                                                },
-                                                "type": "object"
-                                              },
-                                              {
-                                                "required": [
-                                                  "type",
-                                                  "input_audio"
-                                                ],
-                                                "title": "ResponseInputAudio",
-                                                "properties": {
-                                                  "type": {
-                                                    "const": "input_audio",
-                                                    "title": "Type",
-                                                    "type": "string"
-                                                  },
-                                                  "input_audio": {
-                                                    "required": [
-                                                      "data"
-                                                    ],
-                                                    "title": "AudioData",
-                                                    "properties": {
-                                                      "data": {
-                                                        "title": "Data",
-                                                        "type": "string"
-                                                      },
-                                                      "format": {
-                                                        "default": "wav",
-                                                        "title": "Format",
-                                                        "enum": [
-                                                          "wav",
-                                                          "mp3"
-                                                        ],
-                                                        "type": "string"
-                                                      }
-                                                    },
-                                                    "type": "object"
-                                                  }
-                                                },
-                                                "type": "object"
-                                              },
-                                              {
-                                                "required": [
-                                                  "type",
-                                                  "output_audio"
-                                                ],
-                                                "title": "ResponseOutputAudio",
-                                                "properties": {
-                                                  "audio_transcript": {
-                                                    "default": null,
-                                                    "title": "Audio Transcript",
-                                                    "anyOf": [
-                                                      {
-                                                        "type": "string"
-                                                      },
-                                                      {
-                                                        "type": "null"
-                                                      }
-                                                    ]
-                                                  },
-                                                  "output_audio": {
-                                                    "required": [
-                                                      "data"
-                                                    ],
-                                                    "title": "AudioData",
-                                                    "properties": {
-                                                      "data": {
-                                                        "title": "Data",
-                                                        "type": "string"
-                                                      },
-                                                      "format": {
-                                                        "default": "wav",
-                                                        "title": "Format",
-                                                        "enum": [
-                                                          "wav",
-                                                          "mp3"
-                                                        ],
-                                                        "type": "string"
-                                                      }
-                                                    },
-                                                    "type": "object"
-                                                  },
-                                                  "type": {
-                                                    "const": "output_audio",
-                                                    "title": "Type",
-                                                    "type": "string"
-                                                  }
-                                                },
-                                                "type": "object"
-                                              }
-                                            ]
-                                          },
-                                          "type": "array"
-                                        }
-                                      ]
-                                    },
-                                    "role": {
-                                      "title": "Role",
-                                      "enum": [
-                                        "system",
-                                        "user",
-                                        "assistant",
-                                        "developer",
-                                        "tool",
-                                        "function"
-                                      ],
-                                      "type": "string"
-                                    },
-                                    "tool_calls": {
-                                      "default": null,
-                                      "title": "Tool Calls",
-                                      "anyOf": [
-                                        {
-                                          "items": {
-                                            "required": [
-                                              "type",
-                                              "function",
-                                              "id"
-                                            ],
-                                            "title": "FunctionCall",
-                                            "properties": {
-                                              "function": {
-                                                "required": [
-                                                  "name",
-                                                  "arguments"
-                                                ],
-                                                "title": "Function",
-                                                "properties": {
-                                                  "return_value": {
-                                                    "default": null,
-                                                    "title": "Return Value",
-                                                    "anyOf": [
-                                                      {
-                                                        "type": "string"
-                                                      },
-                                                      {
-                                                        "type": "null"
-                                                      }
-                                                    ]
-                                                  },
-                                                  "name": {
-                                                    "title": "Name",
-                                                    "type": "string"
-                                                  },
-                                                  "arguments": {
-                                                    "title": "Arguments",
-                                                    "type": "string"
-                                                  }
-                                                },
-                                                "type": "object"
-                                              },
-                                              "id": {
-                                                "title": "Id",
-                                                "type": "string"
-                                              },
-                                              "type": {
-                                                "const": "function",
-                                                "title": "Type",
-                                                "type": "string"
-                                              }
-                                            },
-                                            "type": "object"
-                                          },
-                                          "type": "array"
-                                        },
-                                        {
-                                          "type": "null"
-                                        }
-                                      ]
-                                    },
-                                    "reasoning_summary": {
-                                      "default": null,
-                                      "title": "Reasoning Summary",
-                                      "anyOf": [
-                                        {
-                                          "type": "string"
-                                        },
-                                        {
-                                          "type": "null"
-                                        }
-                                      ]
-                                    },
-                                    "name": {
-                                      "default": null,
-                                      "title": "Name",
-                                      "anyOf": [
-                                        {
-                                          "type": "string"
-                                        },
-                                        {
-                                          "type": "null"
-                                        }
-                                      ]
-                                    },
-                                    "function_call": {
-                                      "default": null,
-                                      "anyOf": [
-                                        {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "title": "Function",
-                                          "properties": {
-                                            "return_value": {
-                                              "default": null,
-                                              "title": "Return Value",
-                                              "anyOf": [
-                                                {
-                                                  "type": "string"
-                                                },
-                                                {
-                                                  "type": "null"
-                                                }
-                                              ]
-                                            },
-                                            "name": {
-                                              "title": "Name",
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "title": "Arguments",
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        {
-                                          "type": "null"
-                                        }
-                                      ]
-                                    },
-                                    "refusal": {
-                                      "default": null,
-                                      "title": "Refusal",
-                                      "anyOf": [
-                                        {
-                                          "type": "string"
-                                        },
-                                        {
-                                          "type": "null"
-                                        }
-                                      ]
-                                    },
-                                    "trace_id": {
-                                      "default": null,
-                                      "title": "Trace Id",
-                                      "anyOf": [
-                                        {
-                                          "type": "string"
-                                        },
-                                        {
-                                          "type": "null"
-                                        }
-                                      ]
-                                    }
-                                  },
-                                  "type": "object"
-                                },
-                                "title": "Output",
-                                "type": "array"
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "title": "Output",
-                          "type": "array"
-                        },
-                        "input": {
-                          "items": {
-                            "required": [
-                              "role",
-                              "content"
-                            ],
-                            "title": "ChatMessage",
-                            "properties": {
-                              "tool_call_id": {
-                                "default": null,
-                                "title": "Tool Call Id",
-                                "anyOf": [
-                                  {
-                                    "type": "string"
-                                  },
-                                  {
-                                    "type": "null"
-                                  }
-                                ]
-                              },
-                              "finish_reason": {
-                                "default": null,
-                                "title": "Finish Reason",
-                                "anyOf": [
-                                  {
-                                    "enum": [
-                                      "stop",
-                                      "length",
-                                      "tool_calls",
-                                      "content_filter",
-                                      "function_call"
-                                    ],
-                                    "type": "string"
-                                  },
-                                  {
-                                    "type": "null"
-                                  }
-                                ]
-                              },
-                              "content": {
-                                "title": "Content",
-                                "anyOf": [
-                                  {
-                                    "type": "string"
                                   },
-                                  {
-                                    "items": {
-                                      "anyOf": [
-                                        {
-                                          "required": [
-                                            "text",
-                                            "type"
-                                          ],
-                                          "additionalProperties": true,
-                                          "title": "ResponseInputText",
-                                          "properties": {
-                                            "text": {
-                                              "title": "Text",
-                                              "type": "string"
-                                            },
-                                            "type": {
-                                              "const": "input_text",
-                                              "title": "Type",
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        {
-                                          "required": [
-                                            "detail",
-                                            "type"
-                                          ],
-                                          "additionalProperties": true,
-                                          "title": "ResponseInputImage",
-                                          "properties": {
-                                            "file_id": {
-                                              "default": null,
-                                              "title": "File Id",
-                                              "anyOf": [
-                                                {
-                                                  "type": "string"
-                                                },
-                                                {
-                                                  "type": "null"
-                                                }
-                                              ]
-                                            },
-                                            "detail": {
-                                              "title": "Detail",
-                                              "enum": [
-                                                "low",
-                                                "high",
-                                                "auto"
-                                              ],
-                                              "type": "string"
-                                            },
-                                            "type": {
-                                              "const": "input_image",
-                                              "title": "Type",
-                                              "type": "string"
-                                            },
-                                            "image_url": {
-                                              "default": null,
-                                              "title": "Image Url",
-                                              "anyOf": [
-                                                {
-                                                  "type": "string"
-                                                },
-                                                {
-                                                  "type": "null"
-                                                }
-                                              ]
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        {
-                                          "required": [
-                                            "annotations",
-                                            "text",
-                                            "type"
-                                          ],
-                                          "additionalProperties": true,
-                                          "title": "ResponseOutputText",
-                                          "properties": {
-                                            "text": {
-                                              "title": "Text",
-                                              "type": "string"
-                                            },
-                                            "type": {
-                                              "const": "output_text",
-                                              "title": "Type",
-                                              "type": "string"
-                                            },
-                                            "logprobs": {
-                                              "default": null,
-                                              "title": "Logprobs",
-                                              "anyOf": [
-                                                {
-                                                  "items": {
-                                                    "required": [
-                                                      "token",
-                                                      "bytes",
-                                                      "logprob",
-                                                      "top_logprobs"
-                                                    ],
-                                                    "additionalProperties": true,
-                                                    "title": "Logprob",
-                                                    "properties": {
-                                                      "bytes": {
-                                                        "items": {
-                                                          "type": "integer"
-                                                        },
-                                                        "title": "Bytes",
-                                                        "type": "array"
-                                                      },
-                                                      "token": {
-                                                        "title": "Token",
-                                                        "type": "string"
-                                                      },
-                                                      "top_logprobs": {
-                                                        "items": {
-                                                          "required": [
-                                                            "token",
-                                                            "bytes",
-                                                            "logprob"
-                                                          ],
-                                                          "additionalProperties": true,
-                                                          "title": "LogprobTopLogprob",
-                                                          "properties": {
-                                                            "bytes": {
-                                                              "items": {
-                                                                "type": "integer"
-                                                              },
-                                                              "title": "Bytes",
-                                                              "type": "array"
-                                                            },
-                                                            "token": {
-                                                              "title": "Token",
-                                                              "type": "string"
-                                                            },
-                                                            "logprob": {
-                                                              "title": "Logprob",
-                                                              "type": "number"
-                                                            }
-                                                          },
-                                                          "type": "object"
-                                                        },
-                                                        "title": "Top Logprobs",
-                                                        "type": "array"
-                                                      },
-                                                      "logprob": {
-                                                        "title": "Logprob",
-                                                        "type": "number"
-                                                      }
-                                                    },
-                                                    "type": "object"
-                                                  },
-                                                  "type": "array"
-                                                },
-                                                {
-                                                  "type": "null"
-                                                }
-                                              ]
-                                            },
-                                            "annotations": {
-                                              "items": {
-                                                "anyOf": [
-                                                  {
-                                                    "required": [
-                                                      "file_id",
-                                                      "filename",
-                                                      "index",
-                                                      "type"
-                                                    ],
-                                                    "additionalProperties": true,
-                                                    "title": "AnnotationFileCitation",
-                                                    "properties": {
-                                                      "file_id": {
-                                                        "title": "File Id",
-                                                        "type": "string"
-                                                      },
-                                                      "index": {
-                                                        "title": "Index",
-                                                        "type": "integer"
-                                                      },
-                                                      "type": {
-                                                        "const": "file_citation",
-                                                        "title": "Type",
-                                                        "type": "string"
-                                                      },
-                                                      "filename": {
-                                                        "title": "Filename",
-                                                        "type": "string"
-                                                      }
-                                                    },
-                                                    "type": "object"
-                                                  },
-                                                  {
-                                                    "required": [
-                                                      "end_index",
-                                                      "start_index",
-                                                      "title",
-                                                      "type",
-                                                      "url"
-                                                    ],
-                                                    "additionalProperties": true,
-                                                    "title": "AnnotationURLCitation",
-                                                    "properties": {
-                                                      "start_index": {
-                                                        "title": "Start Index",
-                                                        "type": "integer"
-                                                      },
-                                                      "end_index": {
-                                                        "title": "End Index",
-                                                        "type": "integer"
-                                                      },
-                                                      "title": {
-                                                        "title": "Title",
-                                                        "type": "string"
-                                                      },
-                                                      "type": {
-                                                        "const": "url_citation",
-                                                        "title": "Type",
-                                                        "type": "string"
-                                                      },
-                                                      "url": {
-                                                        "title": "Url",
-                                                        "type": "string"
-                                                      }
-                                                    },
-                                                    "type": "object"
-                                                  },
-                                                  {
-                                                    "required": [
-                                                      "container_id",
-                                                      "end_index",
-                                                      "file_id",
-                                                      "filename",
-                                                      "start_index",
-                                                      "type"
-                                                    ],
-                                                    "additionalProperties": true,
-                                                    "title": "AnnotationContainerFileCitation",
-                                                    "properties": {
-                                                      "start_index": {
-                                                        "title": "Start Index",
-                                                        "type": "integer"
-                                                      },
-                                                      "end_index": {
-                                                        "title": "End Index",
-                                                        "type": "integer"
-                                                      },
-                                                      "type": {
-                                                        "const": "container_file_citation",
-                                                        "title": "Type",
-                                                        "type": "string"
-                                                      },
-                                                      "filename": {
-                                                        "title": "Filename",
-                                                        "type": "string"
-                                                      },
-                                                      "file_id": {
-                                                        "title": "File Id",
-                                                        "type": "string"
-                                                      },
-                                                      "container_id": {
-                                                        "title": "Container Id",
-                                                        "type": "string"
-                                                      }
-                                                    },
-                                                    "type": "object"
-                                                  },
-                                                  {
-                                                    "required": [
-                                                      "file_id",
-                                                      "index",
-                                                      "type"
-                                                    ],
-                                                    "additionalProperties": true,
-                                                    "title": "AnnotationFilePath",
-                                                    "properties": {
-                                                      "file_id": {
-                                                        "title": "File Id",
-                                                        "type": "string"
-                                                      },
-                                                      "index": {
-                                                        "title": "Index",
-                                                        "type": "integer"
-                                                      },
-                                                      "type": {
-                                                        "const": "file_path",
-                                                        "title": "Type",
-                                                        "type": "string"
-                                                      }
-                                                    },
-                                                    "type": "object"
-                                                  }
-                                                ]
-                                              },
-                                              "title": "Annotations",
-                                              "type": "array"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        {
-                                          "required": [
-                                            "type",
-                                            "input_audio"
-                                          ],
-                                          "title": "ResponseInputAudio",
-                                          "properties": {
-                                            "type": {
-                                              "const": "input_audio",
-                                              "title": "Type",
-                                              "type": "string"
-                                            },
-                                            "input_audio": {
-                                              "required": [
-                                                "data"
-                                              ],
-                                              "title": "AudioData",
-                                              "properties": {
-                                                "data": {
-                                                  "title": "Data",
-                                                  "type": "string"
-                                                },
-                                                "format": {
-                                                  "default": "wav",
-                                                  "title": "Format",
-                                                  "enum": [
-                                                    "wav",
-                                                    "mp3"
-                                                  ],
-                                                  "type": "string"
-                                                }
-                                              },
-                                              "type": "object"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        {
-                                          "required": [
-                                            "type",
-                                            "output_audio"
-                                          ],
-                                          "title": "ResponseOutputAudio",
-                                          "properties": {
-                                            "audio_transcript": {
-                                              "default": null,
-                                              "title": "Audio Transcript",
-                                              "anyOf": [
-                                                {
-                                                  "type": "string"
-                                                },
-                                                {
-                                                  "type": "null"
-                                                }
-                                              ]
-                                            },
-                                            "output_audio": {
-                                              "required": [
-                                                "data"
-                                              ],
-                                              "title": "AudioData",
-                                              "properties": {
-                                                "data": {
-                                                  "title": "Data",
-                                                  "type": "string"
-                                                },
-                                                "format": {
-                                                  "default": "wav",
-                                                  "title": "Format",
-                                                  "enum": [
-                                                    "wav",
-                                                    "mp3"
-                                                  ],
-                                                  "type": "string"
-                                                }
-                                              },
-                                              "type": "object"
-                                            },
-                                            "type": {
-                                              "const": "output_audio",
-                                              "title": "Type",
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        }
-                                      ]
-                                    },
-                                    "type": "array"
-                                  }
-                                ]
-                              },
-                              "role": {
-                                "title": "Role",
-                                "enum": [
-                                  "system",
-                                  "user",
-                                  "assistant",
-                                  "developer",
-                                  "tool",
-                                  "function"
-                                ],
-                                "type": "string"
-                              },
-                              "tool_calls": {
-                                "default": null,
-                                "title": "Tool Calls",
-                                "anyOf": [
-                                  {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "title": "FunctionCall",
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "title": "Function",
-                                          "properties": {
-                                            "return_value": {
-                                              "default": null,
-                                              "title": "Return Value",
-                                              "anyOf": [
-                                                {
-                                                  "type": "string"
-                                                },
-                                                {
-                                                  "type": "null"
-                                                }
-                                              ]
-                                            },
-                                            "name": {
-                                              "title": "Name",
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "title": "Arguments",
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "title": "Id",
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "const": "function",
-                                          "title": "Type",
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": "array"
-                                  },
-                                  {
-                                    "type": "null"
-                                  }
-                                ]
-                              },
-                              "reasoning_summary": {
-                                "default": null,
-                                "title": "Reasoning Summary",
-                                "anyOf": [
-                                  {
-                                    "type": "string"
-                                  },
-                                  {
-                                    "type": "null"
-                                  }
-                                ]
-                              },
-                              "name": {
-                                "default": null,
-                                "title": "Name",
-                                "anyOf": [
-                                  {
-                                    "type": "string"
-                                  },
-                                  {
-                                    "type": "null"
-                                  }
-                                ]
-                              },
-                              "function_call": {
-                                "default": null,
-                                "anyOf": [
-                                  {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "title": "Function",
-                                    "properties": {
-                                      "return_value": {
-                                        "default": null,
-                                        "title": "Return Value",
-                                        "anyOf": [
-                                          {
-                                            "type": "string"
-                                          },
-                                          {
-                                            "type": "null"
-                                          }
-                                        ]
-                                      },
-                                      "name": {
-                                        "title": "Name",
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "title": "Arguments",
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": "object"
-                                  },
-                                  {
-                                    "type": "null"
-                                  }
-                                ]
-                              },
-                              "refusal": {
-                                "default": null,
-                                "title": "Refusal",
-                                "anyOf": [
-                                  {
-                                    "type": "string"
-                                  },
-                                  {
-                                    "type": "null"
-                                  }
-                                ]
-                              },
-                              "trace_id": {
-                                "default": null,
-                                "title": "Trace Id",
-                                "anyOf": [
-                                  {
-                                    "type": "string"
-                                  },
-                                  {
-                                    "type": "null"
-                                  }
-                                ]
-                              }
-                            },
-                            "type": "object"
-                          },
-                          "title": "Input",
-                          "type": "array"
-                        }
-                      },
-                      "type": "object"
-                    },
-                    "sample": {
-                      "required": [
-                        "model",
-                        "choices"
-                      ],
-                      "properties": {
-                        "output_audio": {
-                          "type": [
-                            "object",
-                            "null"
-                          ]
-                        },
-                        "output_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "model": {
-                          "type": "string"
-                        },
-                        "input_tools": {
-                          "items": {
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "output_json": {
-                          "type": "object"
-                        },
-                        "output_reasoning_summary": {
-                          "type": [
-                            "string",
-                            "null"
-                          ]
-                        },
-                        "choices": {
-                          "items": {
-                            "required": [
-                              "index",
-                              "message",
-                              "finish_reason"
-                            ],
-                            "properties": {
-                              "message": {
-                                "required": [
-                                  "role"
-                                ],
-                                "properties": {
                                   "role": {
                                     "enum": [
                                       "assistant"
                                     ],
                                     "type": "string"
-                                  },
-                                  "function_call": {
-                                    "required": [
-                                      "name",
-                                      "arguments"
-                                    ],
-                                    "properties": {
-                                      "name": {
-                                        "type": "string"
-                                      },
-                                      "arguments": {
-                                        "type": "string"
-                                      }
-                                    },
-                                    "type": [
-                                      "object",
-                                      "null"
-                                    ]
-                                  },
-                                  "content": {
-                                    "type": [
-                                      "string",
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "tool_calls": {
-                                    "items": {
-                                      "required": [
-                                        "type",
-                                        "function",
-                                        "id"
-                                      ],
-                                      "properties": {
-                                        "function": {
-                                          "required": [
-                                            "name",
-                                            "arguments"
-                                          ],
-                                          "properties": {
-                                            "name": {
-                                              "type": "string"
-                                            },
-                                            "arguments": {
-                                              "type": "string"
-                                            }
-                                          },
-                                          "type": "object"
-                                        },
-                                        "id": {
-                                          "type": "string"
-                                        },
-                                        "type": {
-                                          "enum": [
-                                            "function"
-                                          ],
-                                          "type": "string"
-                                        }
-                                      },
-                                      "type": "object"
-                                    },
-                                    "type": [
-                                      "array",
-                                      "null"
-                                    ]
-                                  },
-                                  "refusal": {
-                                    "type": [
-                                      "boolean",
-                                      "null"
-                                    ]
                                   }
                                 },
                                 "type": "object"
@@ -5363,15 +237,12 @@ http_interactions:
                     }
                   },
                   "type": "object"
-                },
-                "metadata": {
-                  "usecase": "chatbot"
                 }
               },
               "name": "Sentiment Analysis",
               "testing_criteria": [
                 {
-                  "id": "Sentiment grader-8dee6df0-c9c3-4d4f-b500-c21b54aab4c9",
+                  "id": "Sentiment grader-88f5e332-d4f9-4843-aae1-918789dba587",
                   "type": "label_model",
                   "grdr_id": null,
                   "inactive_at": null,
@@ -5403,9 +274,9 @@ http_interactions:
               "metadata": {}
             }
           ],
-          "first_id": "eval_692886ca71948191a94a46ef2866fa38",
-          "has_more": true,
-          "last_id": "eval_69288313e4408191922bf1863f2ba432"
+          "first_id": "eval_69289e005d008191bc07606b2ceb522c",
+          "has_more": false,
+          "last_id": "eval_69289e005d008191bc07606b2ceb522c"
         }
-  recorded_at: Thu, 27 Nov 2025 17:37:45 GMT
+  recorded_at: Thu, 27 Nov 2025 18:53:28 GMT
 recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_list_setup.yml b/spec/fixtures/cassettes/evals_list_setup.yml
index 8d4d6769..36331d54 100644
--- a/spec/fixtures/cassettes/evals_list_setup.yml
+++ b/spec/fixtures/cassettes/evals_list_setup.yml
@@ -26,7 +26,7 @@ http_interactions:
       message: Created
     headers:
       Date:
-      - Thu, 27 Nov 2025 17:37:45 GMT
+      - Thu, 27 Nov 2025 18:52:48 GMT
       Content-Type:
       - application/json
       Transfer-Encoding:
@@ -38,13 +38,13 @@ http_interactions:
       Openai-Organization:
       - user-jxm65ijkzc1qrfhc0ij8moic
       X-Request-Id:
-      - req_ad59a4421109d03bf16f0dd5d568cb43
+      - req_60d32089805131c0071ce9eccb04177f
       Openai-Processing-Ms:
-      - '187'
+      - '550'
       Vary:
       - Accept-Encoding
       X-Envoy-Upstream-Service-Time:
-      - '189'
+      - '552'
       X-Openai-Proxy-Wasm:
       - v0.1
       Strict-Transport-Security:
@@ -52,26 +52,26 @@ http_interactions:
       Cf-Cache-Status:
       - DYNAMIC
       Set-Cookie:
-      - __cf_bm=2URRA0QzprNNuNe_uY_bAL3M.w.SeU2zUU2uu7Rgmz0-1764265065-1.0.1.1-vCfd9v8Qx1oKdvy8E2piuOxlGUK3rPotuNjTxl6IQYJ5WVaFqosWj4aLGxWLszSIoI0n04TnlJXIP.7QK4mv0e4P5vPOsuKUcXtA8Li09Yc;
-        path=/; expires=Thu, 27-Nov-25 18:07:45 GMT; domain=.api.openai.com; HttpOnly;
+      - __cf_bm=eFEINPHuc34CMCYL.SDiqLlyW4rJ.vafcOPwqC2BIDE-1764269568-1.0.1.1-CSW4VNVaa1sxqRqpfLPzzndPm4jSUTy_Rv8_rGTkh1X7cM1u91acPyUt_uboA44es5iyv.HJrprMsbi1okkD596sIKD189Iw3ijBcKj2i0A;
+        path=/; expires=Thu, 27-Nov-25 19:22:48 GMT; domain=.api.openai.com; HttpOnly;
         Secure; SameSite=None
-      - _cfuvid=YlC8_GZoyq1p_SeriLDaxuZ8sWYRFXIDpqQJzx6Rz.4-1764265065064-0.0.1.1-604800000;
+      - _cfuvid=1N0rpQDL2dtXgQFGNlVf.SVMbsPNPJg58wihf2nOrvM-1764269568498-0.0.1.1-604800000;
         path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
       X-Content-Type-Options:
       - nosniff
       Server:
       - cloudflare
       Cf-Ray:
-      - 9a53652e5cc9b11c-MAN
+      - 9a53d31ebd220764-MAN
       Alt-Svc:
       - h3=":443"; ma=86400
     body:
       encoding: ASCII-8BIT
       string: |-
         {
-          "id": "eval_69288c68ed50819185ed5845facf0be6",
+          "id": "eval_69289e005d008191bc07606b2ceb522c",
           "object": "eval",
-          "created_at": 1764265064,
+          "created_at": 1764269568,
           "data_source_config": {
             "type": "custom",
             "max_items": null,
@@ -242,7 +242,7 @@ http_interactions:
           "name": "Sentiment Analysis",
           "testing_criteria": [
             {
-              "id": "Sentiment grader-cd6a1097-7691-4502-a5f0-49c7e0043428",
+              "id": "Sentiment grader-88f5e332-d4f9-4843-aae1-918789dba587",
               "type": "label_model",
               "grdr_id": null,
               "inactive_at": null,
@@ -273,5 +273,5 @@ http_interactions:
           ],
           "metadata": {}
         }
-  recorded_at: Thu, 27 Nov 2025 17:37:45 GMT
+  recorded_at: Thu, 27 Nov 2025 18:52:48 GMT
 recorded_with: VCR 6.3.1

From b543a4ddfe9715cae826f1a94eafbecf40ab4f49 Mon Sep 17 00:00:00 2001
From: Juan Arboleda <35846576+alzeck@users.noreply.github.com>
Date: Thu, 27 Nov 2025 19:03:32 +0000
Subject: [PATCH 4/5] add supported endpoints

---
 README.md | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 570e8e36..3c5c4e37 100644
--- a/README.md
+++ b/README.md
@@ -1684,7 +1684,21 @@ client.messages.list(thread_id: thread_id)
 
 ### Evals
 
-Evals allow you to systematically evaluate the quality and performance of your AI models. You can create evaluations with specific testing criteria, run them against your models, and analyze the results.
+The [Evals Api](https://platform.openai.com/docs/api-reference/evals) allow you to systematically evaluate the quality and performance of your AI models.
+
+**Supported Endpoints:**
+- `POST /v1/evals` - Create an evaluation
+- `GET /v1/evals/{id}` - Retrieve an evaluation
+- `GET /v1/evals` - List evaluations
+- `POST /v1/evals/{id}` - Update an evaluation
+- `DELETE /v1/evals/{id}` - Delete an evaluation
+- `POST /v1/evals/{id}/runs` - Create an evaluation run
+- `GET /v1/evals/{id}/runs/{run_id}` - Retrieve an evaluation run
+- `GET /v1/evals/{id}/runs` - List evaluation runs
+- `POST /v1/evals/{id}/runs/{run_id}/cancel` - Cancel an evaluation run
+- `DELETE /v1/evals/{id}/runs/{run_id}` - Delete an evaluation run
+- `GET /v1/evals/{id}/runs/{run_id}/output_items` - List output items
+- `GET /v1/evals/{id}/runs/{run_id}/output_items/{item_id}` - Retrieve an output item
 
 #### Create an Eval
 

From dbabfd5496818e672a80ad37f0626043efca5fba Mon Sep 17 00:00:00 2001
From: Juan Arboleda <35846576+alzeck@users.noreply.github.com>
Date: Thu, 27 Nov 2025 19:18:14 +0000
Subject: [PATCH 5/5] update readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3c5c4e37..3a26fb41 100644
--- a/README.md
+++ b/README.md
@@ -1684,7 +1684,7 @@ client.messages.list(thread_id: thread_id)
 
 ### Evals
 
-The [Evals Api](https://platform.openai.com/docs/api-reference/evals) allow you to systematically evaluate the quality and performance of your AI models.
+The [Evals API](https://platform.openai.com/docs/api-reference/evals) allows you to systematically evaluate the quality and performance of your AI models.
 
 **Supported Endpoints:**
 - `POST /v1/evals` - Create an evaluation