Future-House · sidnarayanan · Oct 23, 2024 · Oct 21, 2024 · Oct 22, 2024 · Oct 23, 2024
diff --git a/ldp/alg/runners.py b/ldp/alg/runners.py
@@ -306,30 +306,47 @@ def __init__(
     async def train(self) -> None:
         random.shuffle(self.train_trajectories)
 
-        for training_step, i_batch_start in enumerate(
-            trange(
-                0,
-                len(self.train_trajectories),
-                self.config.batch_size,
-                desc="Training iterations",
-                ncols=0,
-            )
-        ):
-            batch = self.train_trajectories[
-                i_batch_start : i_batch_start + self.config.batch_size
-            ]
-
-            # Only show the progress bar if we are doing full-batch optimization
-            self.optimizer.aggregate(
-                batch, show_pbar=len(self.train_trajectories) <= self.config.batch_size
-            )
+        full_batch = len(self.train_trajectories) <= self.config.batch_size
 
-            if (training_step + 1) % self.config.update_every == 0:
-                await self.optimizer.update()
-                await asyncio.gather(*[
-                    callback.after_update() for callback in self.callbacks
-                ])
+        if full_batch:
+            # Separating out the full batch case lets the user run a single update()
+            # step even if train_trajectories is empty. This can be useful if the
+            # optimizer is pre-populated with offline training data, for example.
+            batch = self.train_trajectories
+
+            self.optimizer.aggregate(batch, show_pbar=True)
+            await self.optimizer.update()
+
+            await asyncio.gather(*[
+                callback.after_update() for callback in self.callbacks
+            ])
 
             await asyncio.gather(*[
                 callback.after_train_step(batch) for callback in self.callbacks
             ])
+
+        else:
+            for training_step, i_batch_start in enumerate(
+                trange(
+                    0,
+                    len(self.train_trajectories),
+                    self.config.batch_size,
+                    desc="Training Iterations",
+                    ncols=0,
+                )
+            ):
+                batch = self.train_trajectories[
+                    i_batch_start : i_batch_start + self.config.batch_size
+                ]
+
+                self.optimizer.aggregate(batch)
+
+                if (training_step + 1) % self.config.update_every == 0:
+                    await self.optimizer.update()
+                    await asyncio.gather(*[
+                        callback.after_update() for callback in self.callbacks
+                    ])
+
+                await asyncio.gather(*[
+                    callback.after_train_step(batch) for callback in self.callbacks
+                ])
diff --git a/ldp/graph/common_ops.py b/ldp/graph/common_ops.py
@@ -209,9 +209,6 @@ class LLMCallOp(Op[Message]):
 
     def __init__(self, num_samples_logprob_estimate: int = 0) -> None:
         super().__init__()
-        # Trainable is metadata that an optimizer can use this. It enables things
-        # like (remote) fine-tuning with OpenAI
-        self.trainable: bool = False
         self.num_samples_partition_estimate = num_samples_logprob_estimate
 
     @overload
@@ -322,10 +319,9 @@ def backward(
         # tree.map_structure allows us to assign a gradient of 0 to all fields of config
         grad_config = tree.map_structure(lambda _: 0.0, input_kwargs["config"])
         grad_kwargs = {"config": grad_config}
-        if "msgs" in input_kwargs:
-            grad_kwargs["msgs"] = 0.0
-        if "tools" in input_kwargs:
-            grad_kwargs["tools"] = 0.0
+        for arg in ("msgs", "tools", "tool_choice"):
+            if arg in input_kwargs:
+                grad_kwargs[arg] = 0.0
 
         return [], grad_kwargs