diff --git a/README.md b/README.md
index a2021daf..3a26fb41 100644
--- a/README.md
+++ b/README.md
@@ -84,6 +84,19 @@ Stream GPT-5 chats with the Responses API, initiate Realtime WebRTC conversation
       - [Vision in a thread](#vision-in-a-thread)
       - [Runs involving function tools](#runs-involving-function-tools)
       - [Exploring chunks used in File Search](#exploring-chunks-used-in-file-search)
+    - [Evals](#evals)
+      - [Create an Eval](#create-an-eval)
+      - [Retrieve an Eval](#retrieve-an-eval)
+      - [List Evals](#list-evals)
+      - [Update an Eval](#update-an-eval)
+      - [Delete an Eval](#delete-an-eval)
+      - [Create an Eval Run](#create-an-eval-run)
+      - [List Eval Runs](#list-eval-runs)
+      - [Retrieve an Eval Run](#retrieve-an-eval-run)
+      - [Cancel an Eval Run](#cancel-an-eval-run)
+      - [Delete an Eval Run](#delete-an-eval-run)
+      - [List Output Items](#list-output-items)
+      - [Retrieve an Output Item](#retrieve-an-output-item)
     - [Image Generation](#image-generation)
       - [DALL·E 2](#dalle-2)
       - [DALL·E 3](#dalle-3)
@@ -1669,6 +1682,267 @@ end.compact
 client.messages.list(thread_id: thread_id)
 ```
 
+### Evals
+
+The [Evals API](https://platform.openai.com/docs/api-reference/evals) allows you to systematically evaluate the quality and performance of your AI models.
+
+**Supported Endpoints:**
+- `POST /v1/evals` - Create an evaluation
+- `GET /v1/evals/{id}` - Retrieve an evaluation
+- `GET /v1/evals` - List evaluations
+- `POST /v1/evals/{id}` - Update an evaluation
+- `DELETE /v1/evals/{id}` - Delete an evaluation
+- `POST /v1/evals/{id}/runs` - Create an evaluation run
+- `GET /v1/evals/{id}/runs/{run_id}` - Retrieve an evaluation run
+- `GET /v1/evals/{id}/runs` - List evaluation runs
+- `POST /v1/evals/{id}/runs/{run_id}/cancel` - Cancel an evaluation run
+- `DELETE /v1/evals/{id}/runs/{run_id}` - Delete an evaluation run
+- `GET /v1/evals/{id}/runs/{run_id}/output_items` - List output items
+- `GET /v1/evals/{id}/runs/{run_id}/output_items/{item_id}` - Retrieve an output item
+
+#### Create an Eval
+
+Create an evaluation with testing criteria to assess model outputs:
+
+```ruby
+response = client.evals.create(
+  parameters: {
+    name: "Sentiment Analysis Eval",
+    data_source_config: {
+      type: "stored_completions",
+      metadata: { usecase: "chatbot" }
+    },
+    testing_criteria: [
+      {
+        type: "label_model",
+        model: "o3-mini",
+        input: [
+          {
+            role: "developer",
+            content: "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+          },
+          {
+            role: "user",
+            content: "Statement: {{item.input}}"
+          }
+        ],
+        passing_labels: ["positive"],
+        labels: ["positive", "neutral", "negative"],
+        name: "Sentiment grader"
+      }
+    ],
+    metadata: { team: "product", version: "1.0" }
+  }
+)
+puts response["id"]
+# => "eval_abc123"
+```
+
+#### Retrieve an Eval
+
+Get details about a specific evaluation:
+
+```ruby
+eval_id = "eval_abc123"
+response = client.evals.retrieve(id: eval_id)
+puts response["name"]
+# => "Sentiment Analysis Eval"
+```
+
+#### List Evals
+
+List all evaluations with optional pagination:
+
+```ruby
+# List all evals
+response = client.evals.list
+
+# List with limit
+response = client.evals.list(parameters: { limit: 10 })
+
+# List with pagination
+response = client.evals.list(parameters: { after: "eval_abc123", limit: 20 })
+```
+
+#### Update an Eval
+
+Update an evaluation's metadata:
+
+```ruby
+response = client.evals.update(
+  id: eval_id,
+  parameters: {
+    metadata: { version: "2.0", updated: "true" }
+  }
+)
+```
+
+#### Delete an Eval
+
+Delete an evaluation:
+
+```ruby
+response = client.evals.delete(id: eval_id)
+puts response["deleted"]
+# => true
+```
+
+#### Create an Eval Run
+
+Run an evaluation against a model with test data:
+
+```ruby
+response = client.evals.runs.create(
+  eval_id: eval_id,
+  parameters: {
+    name: "gpt-4o-mini baseline",
+    data_source: {
+      type: "completions",
+      input_messages: {
+        type: "template",
+        template: [
+          {
+            role: "system",
+            content: "You are a sentiment analyzer. Respond with only: positive, neutral, or negative."
+          },
+          {
+            role: "user",
+            content: "{{item.input}}"
+          }
+        ]
+      },
+      sampling_params: {
+        temperature: 0.7,
+        max_completion_tokens: 50,
+        top_p: 1.0
+      },
+      model: "gpt-4o-mini",
+      source: {
+        type: "file_content",
+        content: [
+          {
+            item: {
+              input: "I absolutely love this product! Best purchase ever.",
+              ground_truth: "positive"
+            }
+          },
+          {
+            item: {
+              input: "This is terrible. Very disappointed.",
+              ground_truth: "negative"
+            }
+          },
+          {
+            item: {
+              input: "It's okay, nothing special.",
+              ground_truth: "neutral"
+            }
+          }
+        ]
+      }
+    },
+    metadata: { experiment: "baseline", date: "2024-01-15" }
+  }
+)
+puts response["id"]
+# => "evalrun_xyz789"
+```
+
+#### List Eval Runs
+
+List all runs for a specific evaluation:
+
+```ruby
+# List all runs
+response = client.evals.runs.list(eval_id: eval_id)
+
+# List with limit
+response = client.evals.runs.list(
+  eval_id: eval_id,
+  parameters: { limit: 10 }
+)
+
+# List with pagination
+response = client.evals.runs.list(
+  eval_id: eval_id,
+  parameters: { after: "evalrun_abc123", limit: 20 }
+)
+```
+
+#### Retrieve an Eval Run
+
+Get details about a specific evaluation run:
+
+```ruby
+run_id = "evalrun_xyz789"
+response = client.evals.runs.retrieve(
+  eval_id: eval_id,
+  id: run_id
+)
+puts response["status"]
+# => "completed"
+```
+
+#### Cancel an Eval Run
+
+Cancel a running evaluation:
+
+```ruby
+response = client.evals.runs.cancel(
+  eval_id: eval_id,
+  id: run_id
+)
+puts response["status"]
+# => "canceled"
+```
+
+#### Delete an Eval Run
+
+Delete an evaluation run:
+
+```ruby
+response = client.evals.runs.delete(
+  eval_id: eval_id,
+  id: run_id
+)
+puts response["deleted"]
+# => true
+```
+
+#### List Output Items
+
+Retrieve the output items from an evaluation run:
+
+```ruby
+# List all output items
+response = client.evals.runs.output_items.list(
+  eval_id: eval_id,
+  run_id: run_id
+)
+
+# List with pagination
+response = client.evals.runs.output_items.list(
+  eval_id: eval_id,
+  run_id: run_id,
+  parameters: { limit: 10, after: "item_abc123" }
+)
+```
+
+#### Retrieve an Output Item
+
+Get details about a specific output item:
+
+```ruby
+output_item_id = "item_abc123"
+response = client.evals.runs.output_items.retrieve(
+  eval_id: eval_id,
+  run_id: run_id,
+  id: output_item_id
+)
+puts response["status"]
+# => "pass"
+```
+
 ### Image Generation
 
 Generate images using DALL·E 2 or DALL·E 3!
diff --git a/lib/openai.rb b/lib/openai.rb
index d5880c90..9128bfaf 100644
--- a/lib/openai.rb
+++ b/lib/openai.rb
@@ -22,6 +22,7 @@
 require_relative "openai/batches"
 require_relative "openai/usage"
 require_relative "openai/conversations"
+require_relative "openai/evals"
 
 module OpenAI
   class Error < StandardError; end
diff --git a/lib/openai/client.rb b/lib/openai/client.rb
index 6054af0f..c9127861 100644
--- a/lib/openai/client.rb
+++ b/lib/openai/client.rb
@@ -109,6 +109,10 @@ def conversations
       @conversations ||= OpenAI::Conversations.new(client: self)
     end
 
+    def evals
+      @evals ||= OpenAI::Evals.new(client: self)
+    end
+
     def azure?
       @api_type&.to_sym == :azure
     end
diff --git a/lib/openai/evals.rb b/lib/openai/evals.rb
new file mode 100644
index 00000000..cb6927e0
--- /dev/null
+++ b/lib/openai/evals.rb
@@ -0,0 +1,75 @@
+module OpenAI
+  class Evals
+    def initialize(client:)
+      @client = client
+    end
+
+    def create(parameters: {})
+      @client.json_post(path: "/evals", parameters: parameters)
+    end
+
+    def retrieve(id:)
+      @client.get(path: "/evals/#{id}")
+    end
+
+    def update(id:, parameters: {})
+      @client.json_post(path: "/evals/#{id}", parameters: parameters)
+    end
+
+    def delete(id:)
+      @client.delete(path: "/evals/#{id}")
+    end
+
+    def list(parameters: {})
+      @client.get(path: "/evals", parameters: parameters)
+    end
+
+    def runs
+      @runs ||= Runs.new(client: @client)
+    end
+
+    class Runs
+      def initialize(client:)
+        @client = client
+      end
+
+      def create(eval_id:, parameters: {})
+        @client.json_post(path: "/evals/#{eval_id}/runs", parameters: parameters)
+      end
+
+      def retrieve(eval_id:, id:)
+        @client.get(path: "/evals/#{eval_id}/runs/#{id}")
+      end
+
+      def list(eval_id:, parameters: {})
+        @client.get(path: "/evals/#{eval_id}/runs", parameters: parameters)
+      end
+
+      def cancel(eval_id:, id:)
+        @client.post(path: "/evals/#{eval_id}/runs/#{id}/cancel")
+      end
+
+      def delete(eval_id:, id:)
+        @client.delete(path: "/evals/#{eval_id}/runs/#{id}")
+      end
+
+      def output_items
+        @output_items ||= OutputItems.new(client: @client)
+      end
+
+      class OutputItems
+        def initialize(client:)
+          @client = client
+        end
+
+        def list(eval_id:, run_id:, parameters: {})
+          @client.get(path: "/evals/#{eval_id}/runs/#{run_id}/output_items", parameters: parameters)
+        end
+
+        def retrieve(eval_id:, run_id:, id:)
+          @client.get(path: "/evals/#{eval_id}/runs/#{run_id}/output_items/#{id}")
+        end
+      end
+    end
+  end
+end
diff --git a/spec/fixtures/cassettes/evals_create.yml b/spec/fixtures/cassettes/evals_create.yml
new file mode 100644
index 00000000..919537ef
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_create.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:43 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_ffb92a5fdd9ce2220567f011a43dba2c
+      Openai-Processing-Ms:
+      - '555'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '557'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=G4GcoNWM5rT0kajORKFsYNgcXAz8HMsb5rZ8Zzkw_LI-1764265063-1.0.1.1-Dp9PBpRTcl2U.a5OAYevFIabxr3OC6hE7.9O3KIiWz6tB1_9SQ1VMKna9wP6_3b8KkVh3uQtSLsQ0_BXDzRXOfZyUuFUaoabN67UbMP4q64;
+        path=/; expires=Thu, 27-Nov-25 18:07:43 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=x0rOeErtaRsMGn4UZqdIWs3b9skWEly1bRr442iHDDE-1764265063034-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53651eddc83c84-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c66d7bc8191a3a478e4e5b174f7",
+          "object": "eval",
+          "created_at": 1764265062,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-8c5ece96-5e5d-4051-87a1-9af9a668be70",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:43 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_delete.yml b/spec/fixtures/cassettes/evals_delete.yml
new file mode 100644
index 00000000..4760d4fd
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_delete.yml
@@ -0,0 +1,74 @@
+---
+http_interactions:
+- request:
+    method: delete
+    uri: https://api.openai.com/v1/evals/eval_69288c753da081918baccc440dfb3cea
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:57 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_5b2c7c66080f297227416bf472b45866
+      Openai-Processing-Ms:
+      - '267'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '269'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=FDivFwBSqSQaDjz.Sv2hSLAw1c.5lyvVl0nLrxBNXHo-1764265077-1.0.1.1-KkM8oLJRZq5PU2QFlnlZFTTWvrS77ZQkSbdDyJTCxO_5sOqJZ8VB8KXJw4lN0JxBa06O3AaCfEBncyzc78mA7scuegGDTEebpLCiXE4iF3U;
+        path=/; expires=Thu, 27-Nov-25 18:07:57 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=I7CzEHguum6GIZRgNj0yySn3bEm20mjveBzKgnyKSA0-1764265077911-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53657e4af94d65-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "eval.deleted",
+          "deleted": true,
+          "eval_id": "eval_69288c753da081918baccc440dfb3cea"
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:57 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_delete_setup.yml b/spec/fixtures/cassettes/evals_delete_setup.yml
new file mode 100644
index 00000000..2afd5f32
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_delete_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:57 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_a48649a5da6f373a7dd1c03ad7727a2d
+      Openai-Processing-Ms:
+      - '181'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '184'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=gaAv6K1dIpaWENMBQCC5oWFC7WYA15BUV5ufk1SWx_Q-1764265077-1.0.1.1-_Rc0A7giRZmUc2b5hF6pZCpHFLyZscP93icF_6Lp9NDWaJhiafMjpahB2ETzDa9c6vS1QBMbHTvgr04kIRJZpwg7DX8lHQ4Mwbm6d5z6S7E;
+        path=/; expires=Thu, 27-Nov-25 18:07:57 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=1rBJq8YPuzzVcpl.U_aK0HikFl_72LTQ.QmqttbpqPk-1764265077383-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53657b9e2fcc9e-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c753da081918baccc440dfb3cea",
+          "object": "eval",
+          "created_at": 1764265077,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-a4525dea-03ff-497b-bdd4-7d1d1bdbb3d1",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:57 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_list.yml b/spec/fixtures/cassettes/evals_list.yml
new file mode 100644
index 00000000..40a5f998
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_list.yml
@@ -0,0 +1,282 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 18:53:28 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_fb43ab73c5cedacfd3da4797f457f98f
+      Openai-Processing-Ms:
+      - '219'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '221'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=KRMsUubnpJCqt86K8dPwv6XLqF3.4SbEgig4dkt7RRI-1764269608-1.0.1.1-_G07YEtDCfSxGTL9A8U8sbKS89nOibkLGjMQX4Jldo0PVN8nGeQ2Zwi4BbbkQZeADFqI8PULxKJ4FjeGEqUXLVe4Aydb4r.pit5qX.WyHpc;
+        path=/; expires=Thu, 27-Nov-25 19:23:28 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=5YZOeZ8X7F73apsyWvgmXwsk44AQOl0d3728FzPciXA-1764269608650-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53d41a0bc70764-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "list",
+          "data": [
+            {
+              "id": "eval_69289e005d008191bc07606b2ceb522c",
+              "object": "eval",
+              "created_at": 1764269568,
+              "data_source_config": {
+                "type": "custom",
+                "max_items": null,
+                "schema": {
+                  "required": [
+                    "item",
+                    "sample"
+                  ],
+                  "properties": {
+                    "item": {
+                      "required": [
+                        "input"
+                      ],
+                      "properties": {
+                        "input": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    },
+                    "sample": {
+                      "required": [
+                        "model",
+                        "choices"
+                      ],
+                      "properties": {
+                        "output_audio": {
+                          "type": [
+                            "object",
+                            "null"
+                          ]
+                        },
+                        "output_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "model": {
+                          "type": "string"
+                        },
+                        "input_tools": {
+                          "items": {
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_json": {
+                          "type": "object"
+                        },
+                        "output_reasoning_summary": {
+                          "type": [
+                            "string",
+                            "null"
+                          ]
+                        },
+                        "choices": {
+                          "items": {
+                            "required": [
+                              "index",
+                              "message",
+                              "finish_reason"
+                            ],
+                            "properties": {
+                              "message": {
+                                "required": [
+                                  "role"
+                                ],
+                                "properties": {
+                                  "tool_calls": {
+                                    "items": {
+                                      "required": [
+                                        "type",
+                                        "function",
+                                        "id"
+                                      ],
+                                      "properties": {
+                                        "function": {
+                                          "required": [
+                                            "name",
+                                            "arguments"
+                                          ],
+                                          "properties": {
+                                            "name": {
+                                              "type": "string"
+                                            },
+                                            "arguments": {
+                                              "type": "string"
+                                            }
+                                          },
+                                          "type": "object"
+                                        },
+                                        "id": {
+                                          "type": "string"
+                                        },
+                                        "type": {
+                                          "enum": [
+                                            "function"
+                                          ],
+                                          "type": "string"
+                                        }
+                                      },
+                                      "type": "object"
+                                    },
+                                    "type": [
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "function_call": {
+                                    "required": [
+                                      "name",
+                                      "arguments"
+                                    ],
+                                    "properties": {
+                                      "name": {
+                                        "type": "string"
+                                      },
+                                      "arguments": {
+                                        "type": "string"
+                                      }
+                                    },
+                                    "type": [
+                                      "object",
+                                      "null"
+                                    ]
+                                  },
+                                  "content": {
+                                    "type": [
+                                      "string",
+                                      "array",
+                                      "null"
+                                    ]
+                                  },
+                                  "refusal": {
+                                    "type": [
+                                      "boolean",
+                                      "null"
+                                    ]
+                                  },
+                                  "role": {
+                                    "enum": [
+                                      "assistant"
+                                    ],
+                                    "type": "string"
+                                  }
+                                },
+                                "type": "object"
+                              },
+                              "finish_reason": {
+                                "type": "string"
+                              }
+                            },
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "output_text": {
+                          "type": "string"
+                        }
+                      },
+                      "type": "object"
+                    }
+                  },
+                  "type": "object"
+                }
+              },
+              "name": "Sentiment Analysis",
+              "testing_criteria": [
+                {
+                  "id": "Sentiment grader-88f5e332-d4f9-4843-aae1-918789dba587",
+                  "type": "label_model",
+                  "grdr_id": null,
+                  "inactive_at": null,
+                  "input": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "Statement: {{item.input}}"
+                    }
+                  ],
+                  "labels": [
+                    "positive",
+                    "neutral",
+                    "negative"
+                  ],
+                  "model": "o3-mini",
+                  "name": "Sentiment grader",
+                  "passing_labels": [
+                    "positive"
+                  ],
+                  "sampling_params": null
+                }
+              ],
+              "metadata": {}
+            }
+          ],
+          "first_id": "eval_69289e005d008191bc07606b2ceb522c",
+          "has_more": false,
+          "last_id": "eval_69289e005d008191bc07606b2ceb522c"
+        }
+  recorded_at: Thu, 27 Nov 2025 18:53:28 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_list_setup.yml b/spec/fixtures/cassettes/evals_list_setup.yml
new file mode 100644
index 00000000..36331d54
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_list_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 18:52:48 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_60d32089805131c0071ce9eccb04177f
+      Openai-Processing-Ms:
+      - '550'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '552'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=eFEINPHuc34CMCYL.SDiqLlyW4rJ.vafcOPwqC2BIDE-1764269568-1.0.1.1-CSW4VNVaa1sxqRqpfLPzzndPm4jSUTy_Rv8_rGTkh1X7cM1u91acPyUt_uboA44es5iyv.HJrprMsbi1okkD596sIKD189Iw3ijBcKj2i0A;
+        path=/; expires=Thu, 27-Nov-25 19:22:48 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=1N0rpQDL2dtXgQFGNlVf.SVMbsPNPJg58wihf2nOrvM-1764269568498-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53d31ebd220764-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69289e005d008191bc07606b2ceb522c",
+          "object": "eval",
+          "created_at": 1764269568,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-88f5e332-d4f9-4843-aae1-918789dba587",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 18:52:48 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_retrieve.yml b/spec/fixtures/cassettes/evals_retrieve.yml
new file mode 100644
index 00000000..aef193a5
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_retrieve.yml
@@ -0,0 +1,274 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_69288c65203881918b2f062b30cd7aa9
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:42 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_34dbc89ee9874a57aa011f2e89ff09c1
+      Openai-Processing-Ms:
+      - '64'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '67'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=u9ZriP.qwIAjZx8EdcGA_lIdQSy3JoUCEUCsrPXIkpA-1764265062-1.0.1.1-LNP5FQOOpKBdaIc3C2ccuYy.iL1pp_bMijvzoBpBvlWXNbP0g8wxMdoCOzPwpuOjSF8v3iva6oKWgOm89yNOz6qPnPaZSgHd9uPKbeJYKP4;
+        path=/; expires=Thu, 27-Nov-25 18:07:42 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=VCPQsuc9dpzChvitHps0wIlQBQgFbIHfaNT3JZ0f3XA-1764265062088-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365199dad35c5-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c65203881918b2f062b30cd7aa9",
+          "object": "eval",
+          "created_at": 1764265061,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-4e90c6d8-f26c-4cb4-a339-41b1b1bb0532",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:42 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_retrieve_setup.yml b/spec/fixtures/cassettes/evals_retrieve_setup.yml
new file mode 100644
index 00000000..af74513e
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_retrieve_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:41 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_93497c36b5e84c57ad0a8e3d9de98b23
+      Openai-Processing-Ms:
+      - '560'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '563'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=pLXAPhTauwRvA5Vu6GbOt1q0_W6MjmZflR3Trnvy8Z4-1764265061-1.0.1.1-N39qqUPxahcowOUZhs4ckWv3aC_u8dg2t9h8L492QtXIcewUsoTNAwIACQ6T8z8JFW6AWgAWg0AfaTZC5aEnahw312LxhY4ItvhkktWXlxM;
+        path=/; expires=Thu, 27-Nov-25 18:07:41 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=GzrCuuyexD.2XVBCWz_QtDE76Mqhxn7C3kVHuxbsHNY-1764265061277-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365124ca72216-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c65203881918b2f062b30cd7aa9",
+          "object": "eval",
+          "created_at": 1764265061,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-4e90c6d8-f26c-4cb4-a339-41b1b1bb0532",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:41 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_cancel.yml b/spec/fixtures/cassettes/evals_runs_cancel.yml
new file mode 100644
index 00000000..7510d981
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_cancel.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c7132e48191ad7ff46c32cf1c46/runs/evalrun_69288c71b74c819183f7e7ed01b4d5ff/cancel
+    body:
+      encoding: UTF-8
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Content-Length:
+      - '0'
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:54 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_9ce39fdd8e538dc9a6878bf4d0daa643
+      Openai-Processing-Ms:
+      - '670'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '672'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=4b4Q3YWwvFU9VANtkY3qjcqR_gDeiqYZCwJIyxsrEjU-1764265074-1.0.1.1-ymldn.MFe8F6uLfJ4vp5MhutNsXSjTy.gBbXOCzxLpiVbDIzmkPqtJS6g8.yD86fizxh3Qf.31nb7jxqc0v62ntUCa8wtPt2laRT4wtO7SE;
+        path=/; expires=Thu, 27-Nov-25 18:07:54 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=kVuspCEagYsUYNL.F9tTWd5XOiXGUoSC6cFPRSIZe.s-1764265074927-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365693b274f47-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c71b74c819183f7e7ed01b4d5ff",
+          "object": "eval.run",
+          "created_at": 1764265073,
+          "status": "canceled",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c7132e48191ad7ff46c32cf1c46",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c7132e48191ad7ff46c32cf1c46?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c71b74c819183f7e7ed01b4d5ff",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:54 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_cancel_run_setup.yml b/spec/fixtures/cassettes/evals_runs_cancel_run_setup.yml
new file mode 100644
index 00000000..dba503e1
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_cancel_run_setup.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c7132e48191ad7ff46c32cf1c46/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:54 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_0093cd2118771c8e402a60b5407cc55a
+      Openai-Processing-Ms:
+      - '406'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '408'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=uM0mV1hkYyLOKIK2GYHYsHFckJcIb2swTCF8MGpNWRQ-1764265074-1.0.1.1-MVHCmdcMYkqpRZoXUHf1Xaqx4uSf2gOzKUZef1C2cA11076.c5EnrNdYpkEC9Plv.aSuHmwDcU8ymp8rmkRfLcdBBpKEldY1LB4VhoKgS1A;
+        path=/; expires=Thu, 27-Nov-25 18:07:54 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=4CiPhk45QtL_dsOtL13kGKXPphdXg2Qyj1NKAWiyHTI-1764265074004-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a536564fcdcb0fb-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c71b74c819183f7e7ed01b4d5ff",
+          "object": "eval.run",
+          "created_at": 1764265073,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c7132e48191ad7ff46c32cf1c46",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c7132e48191ad7ff46c32cf1c46?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c71b74c819183f7e7ed01b4d5ff",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:53 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_cancel_setup.yml b/spec/fixtures/cassettes/evals_runs_cancel_setup.yml
new file mode 100644
index 00000000..a25921ad
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_cancel_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:53 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_95082c9c904d2480498f6f2999b250ac
+      Openai-Processing-Ms:
+      - '178'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '181'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=a7Te_D7MzgN1F9UQdvVgcdF66cnb72ZVkd7UpZRWGEE-1764265073-1.0.1.1-eYABxh1jILbTiLG6XrF.sWnR_vYqKaf7DPw91z9BFU_NJHkWacIIj5NEgutT3e0Rn.2VhJ1TXNP3vkFcu6rE1cqQsrOp_OwWQ6.JR4IOpdU;
+        path=/; expires=Thu, 27-Nov-25 18:07:53 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=.Ivd8N1lBfEG2ehGEYjU_vQM.irXnKU.iXYfQEgtkqU-1764265073337-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365625eb4a935-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c7132e48191ad7ff46c32cf1c46",
+          "object": "eval",
+          "created_at": 1764265073,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-27eb57ce-63cc-42fb-aa8b-6f3ab2cf73b8",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:53 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_create.yml b/spec/fixtures/cassettes/evals_runs_create.yml
new file mode 100644
index 00000000..906b6003
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_create.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c6ba2348191be05c2d02d0e0c0b/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:48 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_a227f8a26a2131d8473e1c92074eec46
+      Openai-Processing-Ms:
+      - '760'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '763'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=9ZswAkI3q8s5c6ThS5HjOpifSgedQWKidh4TOo7iv3w-1764265068-1.0.1.1-5Yims_ZHAyjNs7xrqmoYvMK3buvL3vGW1YIhz5oCF.FUvl1TnFPMg4zlbLZ7ScgFbFNlFTrwtzN8lCVxXd3.SjsaGt3sI00xabiVGrQdZfE;
+        path=/; expires=Thu, 27-Nov-25 18:07:48 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=WZSrX35UqN8Z.mHJ69EsSjq5J3..4kRCvd91oQHmxCI-1764265068789-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365422eff2210-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c6c25fc8191ac632b505a3ff1c9",
+          "object": "eval.run",
+          "created_at": 1764265068,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c6ba2348191be05c2d02d0e0c0b",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c6ba2348191be05c2d02d0e0c0b?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c6c25fc8191ac632b505a3ff1c9",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:48 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_create_setup.yml b/spec/fixtures/cassettes/evals_runs_create_setup.yml
new file mode 100644
index 00000000..a866fb21
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_create_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:47 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_96f911be32e1840672acd68fd0dea03f
+      Openai-Processing-Ms:
+      - '234'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '237'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=iSICdk.EdIDyle.RQysHEXmyES4qOD6qYZrmaELQLDA-1764265067-1.0.1.1-_E3nGyLYoebWy.Ed9MMjOgHi28ZepbGSTPvdl7w161O2ex_rTGDSlvf2p5c_uOnYMNQRWXYJ.HxGByHkU4miSAhurDxSKB.6hDN8ZIOX_mg;
+        path=/; expires=Thu, 27-Nov-25 18:07:47 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=vOcml4pSfdEN84KBG_RBgnPJ2nrh14amro7Q1K0d4xs-1764265067769-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53653f0c150c19-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c6ba2348191be05c2d02d0e0c0b",
+          "object": "eval",
+          "created_at": 1764265067,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-9474cccc-047e-4ddf-bb6f-b983ca6aef0d",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:47 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_delete.yml b/spec/fixtures/cassettes/evals_runs_delete.yml
new file mode 100644
index 00000000..45012dc0
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_delete.yml
@@ -0,0 +1,196 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288d369be48191a9dd469d6cb22c9f/runs/evalrun_69288d37912c81919e794ac84108363b/cancel
+    body:
+      encoding: UTF-8
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Content-Length:
+      - '0'
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:41:13 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_0e5f5722ba2a4e819f366b5b08f03d05
+      Openai-Processing-Ms:
+      - '555'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '558'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=HUQa2g4EML8HJ0fSYEyP8Y1LvFWAs0vMtx_2K2jXggA-1764265273-1.0.1.1-wLir27KcO3TJiEsJtTgjKpw02lSnGmQNyqlkZ4ljpvWaIsI5f4TrbxqT2Fi9klEzlfvGbpNyTaVJjcazBJ0Cu5Ezr_yl_Rns_F_9yxrGoFM;
+        path=/; expires=Thu, 27-Nov-25 18:11:13 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=VPa8xE7AHfpB9ZpDZZ8Tb0x8BiBplf_CE5ylsOX4FZE-1764265273069-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a536a403929eb17-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288d37912c81919e794ac84108363b",
+          "object": "eval.run",
+          "created_at": 1764265271,
+          "status": "canceled",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288d369be48191a9dd469d6cb22c9f",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288d369be48191a9dd469d6cb22c9f?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288d37912c81919e794ac84108363b",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:41:13 GMT
+- request:
+    method: delete
+    uri: https://api.openai.com/v1/evals/eval_69288d369be48191a9dd469d6cb22c9f/runs/evalrun_69288d37912c81919e794ac84108363b
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:41:14 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_f31add9c41781b9ef7f694152f59dadf
+      Openai-Processing-Ms:
+      - '763'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '767'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=f9nvHlPi3UV7AK.TJumxczUKk1dIQDhExsuDFexiC.o-1764265274-1.0.1.1-GRqhzByoSCF1KKz1tZXP23mtv9XboeAZfYge3kRkZ2xpVGDqAuxUOpqK8oxTahqMEYXBH1NvIDolFuhNGFmlJsbU2s_qGdYlnjxe5odg_Ss;
+        path=/; expires=Thu, 27-Nov-25 18:11:14 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=kt_WqBTaaNSKPkhG35J4JsD8KAwC3rcdQEFaxw2b_do-1764265274323-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a536a454a12b140-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "eval.run.deleted",
+          "deleted": true,
+          "run_id": "evalrun_69288d37912c81919e794ac84108363b"
+        }
+  recorded_at: Thu, 27 Nov 2025 17:41:14 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_delete_run_setup.yml b/spec/fixtures/cassettes/evals_runs_delete_run_setup.yml
new file mode 100644
index 00000000..827634fd
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_delete_run_setup.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288d369be48191a9dd469d6cb22c9f/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:41:12 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_992f53ec833344ac9d252a854242aad9
+      Openai-Processing-Ms:
+      - '1074'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '1077'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=pDzZar7eZ1cWdnVwvzRNA5pYoyYTAiFvoayFkrLokkA-1764265272-1.0.1.1-kU3SDYb6dj0Qzau6KPvdiPo5Z47dqdwFaMyhjpDaOQTtoNYKjMiuP.KEH.2iKgH43P.beXjXno_HWnkeoNJl5zm4vVXQT15LoUKrnGq55YE;
+        path=/; expires=Thu, 27-Nov-25 18:11:12 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=ptb8p7NZ79WAj3ZgZcOYPNFxCtjgfJHstwiuvU1ZPrw-1764265272257-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a536a36bf98a935-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288d37912c81919e794ac84108363b",
+          "object": "eval.run",
+          "created_at": 1764265271,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288d369be48191a9dd469d6cb22c9f",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288d369be48191a9dd469d6cb22c9f?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288d37912c81919e794ac84108363b",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:41:12 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_delete_setup.yml b/spec/fixtures/cassettes/evals_runs_delete_setup.yml
new file mode 100644
index 00000000..88d8195d
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_delete_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:41:10 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_e6cd656f098a4c5383491d45df4c6bdc
+      Openai-Processing-Ms:
+      - '210'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '213'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=eoMCgwTjmGa8TpChmbY35gYtGNsdnvJDG9Oy_HucA3A-1764265270-1.0.1.1-nu7guHa3w50bOYH9x7yVao20MEVGw4A6WC6gnPKshlJDGsawMg9XLgkgmeG0jhE.kQIM6LLGI_Dsm56wwalEAdBPK5Y9RzVtmPsZ.3CjgDo;
+        path=/; expires=Thu, 27-Nov-25 18:11:10 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=wvlyhu5ZmXw1nXW0.HaQuloGBXKaYEUGpYRztDTf63U-1764265270748-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a536a330a47b367-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288d369be48191a9dd469d6cb22c9f",
+          "object": "eval",
+          "created_at": 1764265270,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-44de3f73-0711-4426-8b59-cb26720e53f5",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:41:10 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_list.yml b/spec/fixtures/cassettes/evals_runs_list.yml
new file mode 100644
index 00000000..5fdc435f
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_list.yml
@@ -0,0 +1,131 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_6928994e3c788191aa5575493ab58226/runs
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 18:32:48 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_4014e25ebde4430ae5ff17e80598937c
+      Openai-Processing-Ms:
+      - '310'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '313'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=RCxCb9hNoDNLsT8SSz.QWaZ16OE3wz0H2OOVgObF0Hg-1764268368-1.0.1.1-xbuw3j1YAdWfl2qFpzspmbbwU220pO9LL4W14d4GqWvMorzobwUPp373M6RGG4obrYtV.kSFHFeBERs2yiVxllmvySVXHgXIeF1WqLf99zc;
+        path=/; expires=Thu, 27-Nov-25 19:02:48 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=69e_irZTWWiJst8RToJInC_xiIGqs6FHMv9GIZeo2cA-1764268368400-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53b5d36820cd1a-LHR
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "list",
+          "data": [
+            {
+              "id": "evalrun_6928994f39d88191b9a47a69de5eda51",
+              "object": "eval.run",
+              "created_at": 1764268367,
+              "status": "in_progress",
+              "data_source": {
+                "type": "completions",
+                "source": {
+                  "type": "file_content",
+                  "content": [
+                    {
+                      "item": {
+                        "input": "I love this product!",
+                        "ground_truth": "positive"
+                      }
+                    }
+                  ]
+                },
+                "input_messages": {
+                  "type": "template",
+                  "template": [
+                    {
+                      "type": "message",
+                      "role": "developer",
+                      "content": "You are a helpful assistant."
+                    },
+                    {
+                      "type": "message",
+                      "role": "user",
+                      "content": "{{item.input}}"
+                    }
+                  ]
+                },
+                "model": "gpt-4o-mini",
+                "provider_credentials": null,
+                "modalities": null,
+                "sampling_params": null
+              },
+              "error": null,
+              "eval_id": "eval_6928994e3c788191aa5575493ab58226",
+              "model": "gpt-4o-mini",
+              "name": "Run 1",
+              "per_model_usage": null,
+              "per_testing_criteria_results": null,
+              "report_url": "https://platform.openai.com/evaluations/eval_6928994e3c788191aa5575493ab58226?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_6928994f39d88191b9a47a69de5eda51",
+              "result_counts": {
+                "errored": 0,
+                "failed": 0,
+                "passed": 0,
+                "total": 0
+              },
+              "shared_with_openai": false,
+              "metadata": {}
+            }
+          ],
+          "first_id": "evalrun_6928994f39d88191b9a47a69de5eda51",
+          "has_more": false,
+          "last_id": "evalrun_6928994f39d88191b9a47a69de5eda51"
+        }
+  recorded_at: Thu, 27 Nov 2025 18:32:48 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_list_run_setup.yml b/spec/fixtures/cassettes/evals_runs_list_run_setup.yml
new file mode 100644
index 00000000..8e749f19
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_list_run_setup.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_6928994e3c788191aa5575493ab58226/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 18:32:47 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_21708b12e8268ffa47de3abd79ef2344
+      Openai-Processing-Ms:
+      - '1031'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '1034'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=i_CIB19VZtHAv_rk8skB4htrkC04jLpYcMScJYKzV_E-1764268367-1.0.1.1-y91vztSj2kUhklMWvqJgYdGsGk3Y.SfmrPoB7FAykNzgLIYqeZaZo12Df_dARe4utNy7jWI0novwYwAHqlaiVppfxpCHqlTThEom5TV3eOY;
+        path=/; expires=Thu, 27-Nov-25 19:02:47 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=h_NO79XlA6.gBFloHWFMOow1Kg0nS3yu.4GAJNFY6A0-1764268367770-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53b5caee33dfb4-LHR
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_6928994f39d88191b9a47a69de5eda51",
+          "object": "eval.run",
+          "created_at": 1764268367,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_6928994e3c788191aa5575493ab58226",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_6928994e3c788191aa5575493ab58226?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_6928994f39d88191b9a47a69de5eda51",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 18:32:47 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_list_setup.yml b/spec/fixtures/cassettes/evals_runs_list_setup.yml
new file mode 100644
index 00000000..d58f89ff
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_list_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 18:32:46 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_7761586e9b3dc966280851030e885ca3
+      Openai-Processing-Ms:
+      - '687'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '689'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=z3MpTJQQJDq8NpDy1Mgky8PcQ917uOEFzRSZzw_TX90-1764268366-1.0.1.1-2l4GXx9_Ul92nzE08ZGg4Re_dYImAYbYaL7O5z_qzmA067mtyGUhPYNQcLmINwRl76cW14HWWE9cMJheDF7xor28wYGbW1SjWY4D4_s0wxI;
+        path=/; expires=Thu, 27-Nov-25 19:02:46 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=qkT5usGvz0OmPbVRs3HPJ7EptZZee9PslFmbV9UvVac-1764268366427-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53b5c47d22f816-LHR
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_6928994e3c788191aa5575493ab58226",
+          "object": "eval",
+          "created_at": 1764268366,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-2e66f46a-0407-4cc5-bd42-a1c5cde44f6c",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 18:32:46 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_list.yml b/spec/fixtures/cassettes/evals_runs_output_items_list.yml
new file mode 100644
index 00000000..c0e38c0d
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_list.yml
@@ -0,0 +1,76 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_69288c6d54f48191bb18d415547ff09c/runs/evalrun_69288c6e2dac819181b017027cd1d2ba/output_items
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:51 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_044e8ae19b15a7101a63c51d97527437
+      Openai-Processing-Ms:
+      - '380'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '382'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=Ij1_ZQwKnjbo57fg2wgjLL6wo6ykZnDO32Ca0ushv18-1764265071-1.0.1.1-b0Zelat_yRyQHGzvXuBH7A62cjICDnwr8.KXH1ZNQ8nhk9qYmI4U3xE.6e6CojNe5CAODZVFAWymziugL4BG96hgHnpMUmgZzkh.0aCpR9M;
+        path=/; expires=Thu, 27-Nov-25 18:07:51 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=5kFavy3PInCs8S45ZxHlLdbcTlGBpxX80ap0mbCmaGc-1764265071138-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365532c7ab128-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "list",
+          "data": [],
+          "first_id": null,
+          "has_more": false,
+          "last_id": null
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:51 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_list_output_item_setup.yml b/spec/fixtures/cassettes/evals_runs_output_items_list_output_item_setup.yml
new file mode 100644
index 00000000..4c4d5055
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_list_output_item_setup.yml
@@ -0,0 +1,76 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_692886c4402c8191b16daf0a13927d55/runs/evalrun_692886c4cb7c8191a6d061543b6c0224/output_items
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:13:41 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_03cd0ef2eb6148bb9db8a92286faa23a
+      Openai-Processing-Ms:
+      - '224'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '228'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=6MUOh_RSBEwcm8ZA61n6mZa9_Wlfc.sKgtkfiFWcBjA-1764263621-1.0.1.1-itCbxTpT_mHZQI1ZXkaSUhKMXrMSXzYfYyQQ6PmQ56wgvn7uZ862F8GJ9vlxbxvngqovWvP8hgIFeb_iO5RRWbraUfIQSd4DVNPiQtwzIVI;
+        path=/; expires=Thu, 27-Nov-25 17:43:41 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=jphYlrigl5iW9Plhyq3CwLH4hYQY1Qp8MYaUJmIgoqc-1764263621721-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5341f08cb6220e-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "list",
+          "data": [],
+          "first_id": null,
+          "has_more": false,
+          "last_id": null
+        }
+  recorded_at: Thu, 27 Nov 2025 17:13:41 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_list_run_setup.yml b/spec/fixtures/cassettes/evals_runs_output_items_list_run_setup.yml
new file mode 100644
index 00000000..d75a849b
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_list_run_setup.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c6d54f48191bb18d415547ff09c/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:50 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_88e93be9ef8d9e6a97cc9becd3e56a22
+      Openai-Processing-Ms:
+      - '441'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '444'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=yn01YpoTluz_g0dCfOJu029HxxIE0YxtsV_IDxPV0cA-1764265070-1.0.1.1-RGZeYvFOEMNrnyrsOeT2CRd0vOtSqedJVlTGy7CyAvS2Gc9xzTqqGHOlPIQnWUpK77v2rogZPxT1htz3zdV7c0ACVCmsva90aIUPOPh8sh0;
+        path=/; expires=Thu, 27-Nov-25 18:07:50 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=zo1jRkukrqcGsIqmCJTtvx_F..hdTyvBGAcHj4x.aEo-1764265070481-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53654ceffb2196-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c6e2dac819181b017027cd1d2ba",
+          "object": "eval.run",
+          "created_at": 1764265070,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c6d54f48191bb18d415547ff09c",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c6d54f48191bb18d415547ff09c?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c6e2dac819181b017027cd1d2ba",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:50 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_list_setup.yml b/spec/fixtures/cassettes/evals_runs_output_items_list_setup.yml
new file mode 100644
index 00000000..c1a0cf44
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_list_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:49 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_900410bfa9674eb2bb47cbf185769adc
+      Openai-Processing-Ms:
+      - '184'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '187'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=HULhDfLfjW5mhRIsax.joyijyBBd7EmO2Q3O_IVs7sE-1764265069-1.0.1.1-U_UGCd9.7V5Vj1C.0tk_ra2ZeJ8wT6gajpUlS1MtflDyxwh.9EhYL1aVQCE7pMrSuLNCSyFtkGCltY2l.WqoKyBI0udBVF18gFbumUpDkZk;
+        path=/; expires=Thu, 27-Nov-25 18:07:49 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=xXc2dl6GdscUZ1VH5HMyd84ZVjDXG6GEdzqczQKDTQA-1764265069472-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365488bf0cca5-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c6d54f48191bb18d415547ff09c",
+          "object": "eval",
+          "created_at": 1764265069,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-abb2f7c9-703e-4ea7-93fa-a96fa580dcd4",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:49 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_retrieve.yml b/spec/fixtures/cassettes/evals_runs_output_items_retrieve.yml
new file mode 100644
index 00000000..224a38d7
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_retrieve.yml
@@ -0,0 +1,303 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_69288c6f782c8191900a5cb6be3db61e/runs/evalrun_69288c70a03881919438f1de10070910/output_items
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:57:40 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_63f77af99d92f4ad3817b3439f12de1c
+      Openai-Processing-Ms:
+      - '386'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '388'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=3KuybbQ5ivkgrdoN8P1j2dPObVied6ifNmVh4kUsZbQ-1764266260-1.0.1.1-yfOh7gnUJL3FHVOt4IEgFaKUZldHA2wK6vJofU4dU.x11w9ng8sRVPOrbK4ASfW2RI3L86PErGFm7tv3qy9VUza6CNSyWLgJxcSynLbK9z0;
+        path=/; expires=Thu, 27-Nov-25 18:27:40 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=jA56LhejCounC.ZFpgofvJ2Nk9OTb5HVXFeLiqACL1U-1764266260747-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53825d6a22eb1d-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "object": "list",
+          "data": [
+            {
+              "id": "outputitem_69288c7485688191b3f81ee02a17a2b8",
+              "object": "eval.run.output_item",
+              "created_at": 1764265076,
+              "status": "pass",
+              "_datasource_item_content_hash": "07bd0d39b771a2e3976c536264799dc3f2b6e5e943a8d68dc3058bac176de445",
+              "available_includes": [],
+              "datasource_item": {
+                "input": "I love this product!",
+                "ground_truth": "positive"
+              },
+              "datasource_item_id": 0,
+              "eval_id": "eval_69288c6f782c8191900a5cb6be3db61e",
+              "results": [
+                {
+                  "name": "Sentiment grader-32bb5cf1-b6c0-4030-ba29-10a1dca004ac",
+                  "score": 1.0,
+                  "passed": true,
+                  "sample": {
+                    "input": [
+                      {
+                        "role": "developer",
+                        "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                      },
+                      {
+                        "role": "user",
+                        "content": "Statement: I love this product!"
+                      }
+                    ],
+                    "output": [
+                      {
+                        "role": "assistant",
+                        "content": "{\n  \"steps\": [\n    {\n      \"description\": \"The statement 'I love this product!' includes the word 'love,' indicating a strong positive emotion towards the product.\",\n      \"conclusion\": \"The sentiment is positive.\"\n    }\n  ],\n  \"result\": \"positive\"\n}"
+                      }
+                    ],
+                    "finish_reason": "stop",
+                    "model": "o3-mini-2025-01-31",
+                    "usage": {
+                      "total_tokens": 287,
+                      "completion_tokens": 137,
+                      "prompt_tokens": 150,
+                      "cached_tokens": 0
+                    },
+                    "error": null,
+                    "temperature": 1.0,
+                    "top_p": 1.0
+                  }
+                }
+              ],
+              "run_id": "evalrun_69288c70a03881919438f1de10070910",
+              "sample": {
+                "input": [
+                  {
+                    "role": "developer",
+                    "content": "You are a helpful assistant."
+                  },
+                  {
+                    "role": "user",
+                    "content": "I love this product!"
+                  }
+                ],
+                "output": [
+                  {
+                    "role": "assistant",
+                    "content": "That's great to hear! What product are you referring to? I'd love to know more about it and what you enjoy about it!"
+                  }
+                ],
+                "finish_reason": "stop",
+                "model": "gpt-4o-mini-2024-07-18",
+                "usage": {
+                  "total_tokens": 48,
+                  "completion_tokens": 26,
+                  "prompt_tokens": 22,
+                  "cached_tokens": 0
+                },
+                "error": null,
+                "temperature": 1.0,
+                "top_p": 1.0
+              }
+            }
+          ],
+          "first_id": "outputitem_69288c7485688191b3f81ee02a17a2b8",
+          "has_more": false,
+          "last_id": "outputitem_69288c7485688191b3f81ee02a17a2b8"
+        }
+  recorded_at: Thu, 27 Nov 2025 17:57:40 GMT
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_69288c6f782c8191900a5cb6be3db61e/runs/evalrun_69288c70a03881919438f1de10070910/output_items/outputitem_69288c7485688191b3f81ee02a17a2b8
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:57:41 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_96ab283a68914e5aae9e838825b84daa
+      Openai-Processing-Ms:
+      - '275'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '277'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=MpuILeElqrljrDl2kgh.q2CvWdew2aULy1VsYpIJLNw-1764266261-1.0.1.1-FOnzEQiiVXTlMCOaCE7fFSD2AiAu_M8r__8x6H8TAdj1_u0cNrAVEidH2LWeOVx022TfA4Qojh2ARuZcSxvIArc6GC0PpDUH_lV_7NpqB34;
+        path=/; expires=Thu, 27-Nov-25 18:27:41 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=qAK71GovjGWZA0BgqVhkKve3h_zFGkBk0nJgN3qfOzY-1764266261401-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5382624872b134-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "outputitem_69288c7485688191b3f81ee02a17a2b8",
+          "object": "eval.run.output_item",
+          "created_at": 1764265076,
+          "status": "pass",
+          "_datasource_item_content_hash": "07bd0d39b771a2e3976c536264799dc3f2b6e5e943a8d68dc3058bac176de445",
+          "available_includes": [],
+          "datasource_item": {
+            "input": "I love this product!",
+            "ground_truth": "positive"
+          },
+          "datasource_item_id": 0,
+          "eval_id": "eval_69288c6f782c8191900a5cb6be3db61e",
+          "results": [
+            {
+              "name": "Sentiment grader-32bb5cf1-b6c0-4030-ba29-10a1dca004ac",
+              "score": 1.0,
+              "passed": true,
+              "sample": {
+                "input": [
+                  {
+                    "role": "developer",
+                    "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                  },
+                  {
+                    "role": "user",
+                    "content": "Statement: I love this product!"
+                  }
+                ],
+                "output": [
+                  {
+                    "role": "assistant",
+                    "content": "{\n  \"steps\": [\n    {\n      \"description\": \"The statement 'I love this product!' includes the word 'love,' indicating a strong positive emotion towards the product.\",\n      \"conclusion\": \"The sentiment is positive.\"\n    }\n  ],\n  \"result\": \"positive\"\n}"
+                  }
+                ],
+                "finish_reason": "stop",
+                "model": "o3-mini-2025-01-31",
+                "usage": {
+                  "total_tokens": 287,
+                  "completion_tokens": 137,
+                  "prompt_tokens": 150,
+                  "cached_tokens": 0
+                },
+                "error": null,
+                "temperature": 1.0,
+                "top_p": 1.0
+              }
+            }
+          ],
+          "run_id": "evalrun_69288c70a03881919438f1de10070910",
+          "sample": {
+            "input": [
+              {
+                "role": "developer",
+                "content": "You are a helpful assistant."
+              },
+              {
+                "role": "user",
+                "content": "I love this product!"
+              }
+            ],
+            "output": [
+              {
+                "role": "assistant",
+                "content": "That's great to hear! What product are you referring to? I'd love to know more about it and what you enjoy about it!"
+              }
+            ],
+            "finish_reason": "stop",
+            "model": "gpt-4o-mini-2024-07-18",
+            "usage": {
+              "total_tokens": 48,
+              "completion_tokens": 26,
+              "prompt_tokens": 22,
+              "cached_tokens": 0
+            },
+            "error": null,
+            "temperature": 1.0,
+            "top_p": 1.0
+          }
+        }
+  recorded_at: Thu, 27 Nov 2025 17:57:41 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_retrieve_run_setup.yml b/spec/fixtures/cassettes/evals_runs_output_items_retrieve_run_setup.yml
new file mode 100644
index 00000000..b0641740
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_retrieve_run_setup.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c6f782c8191900a5cb6be3db61e/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:52 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_8024ef0cea996ec159db9512d79643bb
+      Openai-Processing-Ms:
+      - '888'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '891'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=OG6k0pEHTzDFHWR2hZstktrhqxb5xEApbyEXWLX4J8w-1764265072-1.0.1.1-0aVXMbN5qlCvwfOOYv.im0ZQN7B3cb4W52hJd2LtyueNQyeuwczOl6AHFZ5gaPajOy28Kn.BYIMKh1RqUcZZm7mEq_1KRSOaywG2rSXPviQ;
+        path=/; expires=Thu, 27-Nov-25 18:07:52 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=zX24zGeI3V58S_T1gJDemI.15.Ut328syKiyJ8nXK54-1764265072902-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53655af947ba0d-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c70a03881919438f1de10070910",
+          "object": "eval.run",
+          "created_at": 1764265072,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c6f782c8191900a5cb6be3db61e",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c6f782c8191900a5cb6be3db61e?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c70a03881919438f1de10070910",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:52 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_output_items_retrieve_setup.yml b/spec/fixtures/cassettes/evals_runs_output_items_retrieve_setup.yml
new file mode 100644
index 00000000..ac064fd8
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_output_items_retrieve_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:51 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_db025b54c4df6310390ca1dd18fd378a
+      Openai-Processing-Ms:
+      - '317'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '320'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=udxiDFrwu5WKkGQ9Wvj3P_U_AnoaYhAp9zKt1nDyASU-1764265071-1.0.1.1-r03vlkmGOoO_2v_HrjcqpRTyPZ6rGYjbwYzO.KNtyhCU1Ku4ZRHVSF8mczGOzNlJMakxHihZ3jobRlOUXanSLN18EwHvSiQlZ_b27rr5e.o;
+        path=/; expires=Thu, 27-Nov-25 18:07:51 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=qk9zb8mfB6JGfHeQNy2ogyookxBtkWpGrh_Uhe_b8zA-1764265071717-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365574b9eb3ae-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c6f782c8191900a5cb6be3db61e",
+          "object": "eval",
+          "created_at": 1764265071,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-32bb5cf1-b6c0-4030-ba29-10a1dca004ac",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:51 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_retrieve.yml b/spec/fixtures/cassettes/evals_runs_retrieve.yml
new file mode 100644
index 00000000..2e930997
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_retrieve.yml
@@ -0,0 +1,123 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.openai.com/v1/evals/eval_69288c6a0c488191ac6adc60180c4d03/runs/evalrun_69288c6a90708191b86c7e82b893c846
+    body:
+      encoding: US-ASCII
+      string: ''
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:47 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_46323b933a8c92d4fb5a19503306f7e9
+      Openai-Processing-Ms:
+      - '135'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '138'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=qweBroIdUWsc2kwidjmrBoBjh0AgfmD9HIlYa7dr9U8-1764265067-1.0.1.1-s4MB7.JR0s1NVmhpFZtvnXP.ZjY7LBRRIGftgzUzTdfSaoEYyOXOl5b46LNRRoz_RSSRyhL_aRcnfjC259bzh4VuzydeG3pQrpbPzJ4pImQ;
+        path=/; expires=Thu, 27-Nov-25 18:07:47 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=tN3KJHPGcEeisrKyUp1v_Qxgl0niwbz0WWiaLrsuvfI-1764265067259-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53653c9a130d8b-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c6a90708191b86c7e82b893c846",
+          "object": "eval.run",
+          "created_at": 1764265066,
+          "status": "in_progress",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c6a0c488191ac6adc60180c4d03",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c6a0c488191ac6adc60180c4d03?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c6a90708191b86c7e82b893c846",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:47 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_retrieve_run_setup.yml b/spec/fixtures/cassettes/evals_runs_retrieve_run_setup.yml
new file mode 100644
index 00000000..82827f0e
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_retrieve_run_setup.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c6a0c488191ac6adc60180c4d03/runs
+    body:
+      encoding: UTF-8
+      string: '{"name":"Run 1","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"You
+        are a helpful assistant."},{"role":"user","content":"{{item.input}}"}]},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"I
+        love this product!","ground_truth":"positive"}}]}}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:46 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_7376dfd4e698e39760ab9f9e861b62ba
+      Openai-Processing-Ms:
+      - '430'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '433'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=Ob0N0Dhz8GBj8HxJrxf5mkfWFLAmmMZQhy1uzMJu0ho-1764265066-1.0.1.1-VrQ2qANnuSoglCiTa7t5h0DFmGk93wFj.RDjRQoZio10n7A0..tRymAKWzWbH0LhYlfiCIMRePofe78ZAi4rtW1D.Pno7zI6O3cB2MDW_2U;
+        path=/; expires=Thu, 27-Nov-25 18:07:46 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=Vtmm6kZeYPTgBS0vw15wIJboU1CXLC3c04o.l7r55W4-1764265066871-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365385d0e89d9-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "evalrun_69288c6a90708191b86c7e82b893c846",
+          "object": "eval.run",
+          "created_at": 1764265066,
+          "status": "queued",
+          "data_source": {
+            "type": "completions",
+            "source": {
+              "type": "file_content",
+              "content": [
+                {
+                  "item": {
+                    "input": "I love this product!",
+                    "ground_truth": "positive"
+                  }
+                }
+              ]
+            },
+            "input_messages": {
+              "type": "template",
+              "template": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "You are a helpful assistant."
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "{{item.input}}"
+                }
+              ]
+            },
+            "model": "gpt-4o-mini",
+            "provider_credentials": null,
+            "modalities": null,
+            "sampling_params": null
+          },
+          "error": null,
+          "eval_id": "eval_69288c6a0c488191ac6adc60180c4d03",
+          "model": "gpt-4o-mini",
+          "name": "Run 1",
+          "per_model_usage": null,
+          "per_testing_criteria_results": null,
+          "report_url": "https://platform.openai.com/evaluations/eval_69288c6a0c488191ac6adc60180c4d03?project_id=proj_0h5pObirNvBYj1ZWydWLSS04&run_id=evalrun_69288c6a90708191b86c7e82b893c846",
+          "result_counts": {
+            "errored": 0,
+            "failed": 0,
+            "passed": 0,
+            "total": 0
+          },
+          "shared_with_openai": false,
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:46 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_runs_retrieve_setup.yml b/spec/fixtures/cassettes/evals_runs_retrieve_setup.yml
new file mode 100644
index 00000000..ef5954b9
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_runs_retrieve_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:46 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_327229bac7ba039a82b7a1f8e5e9115a
+      Openai-Processing-Ms:
+      - '215'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '218'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=_zFDCgf3us1YhS..PbKumYZeV7ao_jczl7G5ViGrC70-1764265066-1.0.1.1-ugnL3IdDBgW7rjNvXjez6DPCmq0TSqhMfUqfmLGMSE.0rtQu9qKKpxPaGTFdthGabPTwXNlOsfcDSAPRKhUGODnxYZwkKwbdQiOu3xwKaDQ;
+        path=/; expires=Thu, 27-Nov-25 18:07:46 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=hU4FpeK1yL8DXLn7h76pYGgqJNJBbeYLu8_.K2R6ifw-1764265066186-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53653538ffeb1a-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c6a0c488191ac6adc60180c4d03",
+          "object": "eval",
+          "created_at": 1764265066,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-05bd0d3d-fee4-42ce-a2ad-6c9a8ad3d6a1",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:46 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_update.yml b/spec/fixtures/cassettes/evals_update.yml
new file mode 100644
index 00000000..c44a3a0c
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_update.yml
@@ -0,0 +1,276 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals/eval_69288c67f948819192f601708f5599d9
+    body:
+      encoding: UTF-8
+      string: '{"metadata":{"modified":"true"}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:44 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_76577432037fb0cc3a49b85a478dd4ea
+      Openai-Processing-Ms:
+      - '250'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '253'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=LquyUkXNYgQfoMPEMVOLWFRNC_qD2hsiye2hDetrVF8-1764265064-1.0.1.1-9VDp9tH4CN_s_mg8W6Vfs8hkXfGh1fQy4keP3IgG719WIFoAGCpfkiaqn4PbLZHYnPziTRgbhsWHEqDbHepqoAmFdKvAx4jPAb.6gBLYRHk;
+        path=/; expires=Thu, 27-Nov-25 18:07:44 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=lZ7wtHiLT.0eAO_IUzVM9XwnK4S.DoHF2kk1gOfdPaE-1764265064596-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a53652b4a49d85e-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c67f948819192f601708f5599d9",
+          "object": "eval",
+          "created_at": 1764265063,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-49de7e11-beb1-436c-92da-bc8611135e48",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {
+            "modified": "true"
+          }
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:44 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/cassettes/evals_update_setup.yml b/spec/fixtures/cassettes/evals_update_setup.yml
new file mode 100644
index 00000000..26fec873
--- /dev/null
+++ b/spec/fixtures/cassettes/evals_update_setup.yml
@@ -0,0 +1,277 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/evals
+    body:
+      encoding: UTF-8
+      string: '{"name":"Sentiment Analysis","data_source_config":{"type":"custom","item_schema":{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]},"include_sample_schema":true},"testing_criteria":[{"type":"label_model","model":"o3-mini","input":[{"role":"developer","content":"Classify
+        the sentiment of the following statement as one of ''positive'', ''neutral'',
+        or ''negative''"},{"role":"user","content":"Statement: {{item.input}}"}],"passing_labels":["positive"],"labels":["positive","neutral","negative"],"name":"Sentiment
+        grader"}]}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_ACCESS_TOKEN>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 201
+      message: Created
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 17:37:44 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - user-jxm65ijkzc1qrfhc0ij8moic
+      X-Request-Id:
+      - req_74c5589fb7844e62af40aaf6e3f72341
+      Openai-Processing-Ms:
+      - '681'
+      Vary:
+      - Accept-Encoding
+      X-Envoy-Upstream-Service-Time:
+      - '684'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=VzK9NnpeChQPlG5syc9jH_ssnhnA1FT8uXdbcVhOGBs-1764265064-1.0.1.1-fsILAJMqlR.D5mGnLKpZkjt7NNjmn4tH0JS_Ln1ekBktkhX1ALQtvJL0rP9KGDaqARVgN_dT8bW9IpRvpp1ItulJC16qsrGkuW.8wQKwiCw;
+        path=/; expires=Thu, 27-Nov-25 18:07:44 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=M8ILSssH1N4.yHGKflKGVTVsLIwgAvMibM.p7ZJROas-1764265064110-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9a5365249faaa62f-MAN
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "eval_69288c67f948819192f601708f5599d9",
+          "object": "eval",
+          "created_at": 1764265063,
+          "data_source_config": {
+            "type": "custom",
+            "max_items": null,
+            "schema": {
+              "type": "object",
+              "properties": {
+                "item": {
+                  "type": "object",
+                  "properties": {
+                    "input": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "input"
+                  ]
+                },
+                "sample": {
+                  "type": "object",
+                  "properties": {
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "assistant"
+                                ]
+                              },
+                              "content": {
+                                "type": [
+                                  "string",
+                                  "array",
+                                  "null"
+                                ]
+                              },
+                              "refusal": {
+                                "type": [
+                                  "boolean",
+                                  "null"
+                                ]
+                              },
+                              "tool_calls": {
+                                "type": [
+                                  "array",
+                                  "null"
+                                ],
+                                "items": {
+                                  "type": "object",
+                                  "properties": {
+                                    "type": {
+                                      "type": "string",
+                                      "enum": [
+                                        "function"
+                                      ]
+                                    },
+                                    "function": {
+                                      "type": "object",
+                                      "properties": {
+                                        "name": {
+                                          "type": "string"
+                                        },
+                                        "arguments": {
+                                          "type": "string"
+                                        }
+                                      },
+                                      "required": [
+                                        "name",
+                                        "arguments"
+                                      ]
+                                    },
+                                    "id": {
+                                      "type": "string"
+                                    }
+                                  },
+                                  "required": [
+                                    "type",
+                                    "function",
+                                    "id"
+                                  ]
+                                }
+                              },
+                              "function_call": {
+                                "type": [
+                                  "object",
+                                  "null"
+                                ],
+                                "properties": {
+                                  "name": {
+                                    "type": "string"
+                                  },
+                                  "arguments": {
+                                    "type": "string"
+                                  }
+                                },
+                                "required": [
+                                  "name",
+                                  "arguments"
+                                ]
+                              }
+                            },
+                            "required": [
+                              "role"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "message",
+                          "finish_reason"
+                        ]
+                      }
+                    },
+                    "output_text": {
+                      "type": "string"
+                    },
+                    "output_json": {
+                      "type": "object"
+                    },
+                    "output_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    },
+                    "output_reasoning_summary": {
+                      "type": [
+                        "string",
+                        "null"
+                      ]
+                    },
+                    "output_audio": {
+                      "type": [
+                        "object",
+                        "null"
+                      ]
+                    },
+                    "input_tools": {
+                      "type": "array",
+                      "items": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "required": [
+                    "model",
+                    "choices"
+                  ]
+                }
+              },
+              "required": [
+                "item",
+                "sample"
+              ]
+            }
+          },
+          "name": "Sentiment Analysis",
+          "testing_criteria": [
+            {
+              "id": "Sentiment grader-49de7e11-beb1-436c-92da-bc8611135e48",
+              "type": "label_model",
+              "grdr_id": null,
+              "inactive_at": null,
+              "input": [
+                {
+                  "type": "message",
+                  "role": "developer",
+                  "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'"
+                },
+                {
+                  "type": "message",
+                  "role": "user",
+                  "content": "Statement: {{item.input}}"
+                }
+              ],
+              "labels": [
+                "positive",
+                "neutral",
+                "negative"
+              ],
+              "model": "o3-mini",
+              "name": "Sentiment grader",
+              "passing_labels": [
+                "positive"
+              ],
+              "sampling_params": null
+            }
+          ],
+          "metadata": {}
+        }
+  recorded_at: Thu, 27 Nov 2025 17:37:44 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/openai/client/evals_spec.rb b/spec/openai/client/evals_spec.rb
new file mode 100644
index 00000000..d79289c0
--- /dev/null
+++ b/spec/openai/client/evals_spec.rb
@@ -0,0 +1,289 @@
+RSpec.describe OpenAI::Client do
+  describe "#evals" do
+    let(:eval_params) do
+      {
+        name: "Sentiment Analysis",
+        data_source_config: {
+          type: "custom",
+          item_schema: {
+            type: "object",
+            properties: {
+              input: { type: "string" }
+            },
+            required: ["input"]
+          },
+          include_sample_schema: true
+        },
+        testing_criteria: [
+          {
+            type: "label_model",
+            model: "o3-mini",
+            input: [
+              { role: "developer",
+                content: "Classify the sentiment of the following statement " \
+                         "as one of 'positive', 'neutral', or 'negative'" },
+              { role: "user", content: "Statement: {{item.input}}" }
+            ],
+            passing_labels: ["positive"],
+            labels: %w[positive neutral negative],
+            name: "Sentiment grader"
+          }
+        ]
+      }
+    end
+    let(:eval_id) do
+      VCR.use_cassette("#{cassette} setup") do
+        OpenAI::Client.new.evals.create(
+          parameters: eval_params
+        )["id"]
+      end
+    end
+
+    let(:run_params) do
+      {
+        name: "Run 1",
+        data_source: {
+          type: "completions",
+          input_messages: {
+            type: "template",
+            template: [
+              {
+                role: "developer",
+                content: "You are a helpful assistant."
+              },
+              {
+                role: "user",
+                content: "{{item.input}}"
+              }
+            ]
+          },
+          model: "gpt-4o-mini",
+          source: {
+            type: "file_content",
+            content: [
+              {
+                item: {
+                  input: "I love this product!",
+                  ground_truth: "positive"
+                }
+              }
+            ]
+          }
+        }
+      }
+    end
+
+    let(:run_id) do
+      VCR.use_cassette("#{cassette} run setup") do
+        OpenAI::Client.new.evals.runs.create(
+          eval_id: eval_id,
+          parameters: run_params
+        )["id"]
+      end
+    end
+
+    describe "#retrieve" do
+      let(:cassette) { "evals retrieve" }
+      let(:response) { OpenAI::Client.new.evals.retrieve(id: eval_id) }
+
+      it "succeeds" do
+        VCR.use_cassette(cassette) do
+          expect(response["object"]).to eq("eval")
+          expect(response["id"]).to eq(eval_id)
+        end
+      end
+    end
+
+    describe "#create" do
+      let(:cassette) { "evals create" }
+      let(:response) do
+        OpenAI::Client.new.evals.create(
+          parameters: eval_params
+        )
+      end
+
+      it "succeeds" do
+        VCR.use_cassette(cassette) do
+          expect(response["object"]).to eq("eval")
+          expect(response["name"]).to eq("Sentiment Analysis")
+        end
+      end
+    end
+
+    describe "#update" do
+      let(:cassette) { "evals update" }
+      let(:response) do
+        OpenAI::Client.new.evals.update(
+          id: eval_id,
+          parameters: { metadata: { modified: "true" } }
+        )
+      end
+
+      it "succeeds" do
+        VCR.use_cassette(cassette) do
+          expect(response["object"]).to eq("eval")
+        end
+      end
+    end
+    describe "#list", :vcr do
+      let(:cassette) { "evals list" }
+      let(:response) { OpenAI::Client.new.evals.list }
+
+      before { eval_id }
+
+      it "succeeds" do
+        VCR.use_cassette(cassette) do
+          expect(response["object"]).to eq("list")
+          expect(response["data"]).to be_an(Array)
+          expect(response.dig("data", 0, "object")).to eq("eval") if response["data"].any?
+        end
+      end
+    end
+
+    describe "#runs" do
+      describe "#list", :vcr do
+        let(:cassette) { "evals runs list" }
+        let(:response) { OpenAI::Client.new.evals.runs.list(eval_id: eval_id) }
+
+        before { run_id }
+
+        it "succeeds" do
+          VCR.use_cassette(cassette) do
+            expect(response["object"]).to eq("list")
+            expect(response["data"]).to be_an(Array)
+            expect(response.dig("data", 0, "object")).to eq("eval.run") if response["data"].any?
+          end
+        end
+      end
+
+      describe "#retrieve" do
+        let(:cassette) { "evals runs retrieve" }
+        let(:response) do
+          OpenAI::Client.new.evals.runs.retrieve(
+            eval_id: eval_id,
+            id: run_id
+          )
+        end
+
+        it "succeeds" do
+          VCR.use_cassette(cassette) do
+            expect(response["object"]).to eq("eval.run")
+            expect(response["id"]).to eq(run_id)
+            expect(response["eval_id"]).to eq(eval_id)
+          end
+        end
+      end
+
+      describe "#create" do
+        let(:cassette) { "evals runs create" }
+        let(:response) do
+          OpenAI::Client.new.evals.runs.create(
+            eval_id: eval_id,
+            parameters: run_params
+          )
+        end
+
+        it "succeeds" do
+          VCR.use_cassette(cassette) do
+            expect(response["object"]).to eq("eval.run")
+            expect(response["eval_id"]).to eq(eval_id)
+            expect(response["name"]).to eq("Run 1")
+          end
+        end
+      end
+
+      describe "#output_items" do
+        describe "#list", :vcr do
+          let(:cassette) { "evals runs output_items list" }
+          let(:response) do
+            OpenAI::Client.new.evals.runs.output_items.list(
+              eval_id: eval_id,
+              run_id: run_id
+            )
+          end
+
+          it "succeeds" do
+            VCR.use_cassette(cassette) do
+              expect(response["object"]).to eq("list")
+              expect(response["data"]).to be_an(Array)
+            end
+          end
+        end
+
+        describe "#retrieve" do
+          let(:cassette) { "evals runs output_items retrieve" }
+          let(:output_item_id) do
+            OpenAI::Client.new.evals.runs.output_items.list(
+              eval_id: eval_id,
+              run_id: run_id
+            )["data"].first["id"]
+          end
+          let(:response) do
+            OpenAI::Client.new.evals.runs.output_items.retrieve(
+              eval_id: eval_id,
+              run_id: run_id,
+              id: output_item_id
+            )
+          end
+
+          it "succeeds" do
+            VCR.use_cassette(cassette) do
+              expect(response["object"]).to eq("eval.run.output_item")
+              expect(response["id"]).to eq(output_item_id)
+            end
+          end
+        end
+      end
+
+      describe "#cancel" do
+        let(:cassette) { "evals runs cancel" }
+        let(:response) do
+          OpenAI::Client.new.evals.runs.cancel(
+            eval_id: eval_id,
+            id: run_id
+          )
+        end
+
+        it "succeeds" do
+          VCR.use_cassette(cassette) do
+            expect(response["object"]).to eq("eval.run")
+            expect(response["status"]).to eq("canceled")
+          end
+        end
+      end
+
+      describe "#delete" do
+        let(:cassette) { "evals runs delete" }
+        let(:response) do
+          OpenAI::Client.new.evals.runs.cancel(
+            eval_id: eval_id,
+            id: run_id
+          )
+
+          OpenAI::Client.new.evals.runs.delete(
+            eval_id: eval_id,
+            id: run_id
+          )
+        end
+
+        it "succeeds" do
+          VCR.use_cassette(cassette) do
+            expect(response["object"]).to eq("eval.run.deleted")
+          end
+        end
+      end
+    end
+
+    describe "#delete" do
+      let(:cassette) { "evals delete" }
+      let(:response) do
+        OpenAI::Client.new.evals.delete(id: eval_id)
+      end
+      it "succeeds" do
+        VCR.use_cassette(cassette) do
+          expect(response["object"]).to eq("eval.deleted")
+        end
+      end
+    end
+  end
+end