[CLEANUP]

The-Swarm-Corporation · Jan 29, 2025 · 748f41b · 748f41b
1 parent 51a0b5b
commit 748f41b
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -19,6 +19,7 @@ eggs/
 lib/
 lib64/
 parts/
+wandb/
 .ruff_cache/
 sdist/
 var/

diff --git a/grpo_example_two.py b/grpo_example_two.py
@@ -0,0 +1,18 @@
+# train_grpo.py
+from datasets import load_dataset
+from trl import GRPOConfig, GRPOTrainer
+
+dataset = load_dataset("trl-lib/tldr", split="train")
+
+# Define the reward function, which rewards completions that are close to 20 characters
+def reward_len(completions, **kwargs):
+    return [abs(20 - len(completion)) for completion in completions]
+
+training_args = GRPOConfig(output_dir="Qwen2-0.5B-GRPO", logging_steps=10)
+trainer = GRPOTrainer(
+    model="Qwen/Qwen2-0.5B-Instruct",
+    reward_funcs=reward_len,
+    args=training_args,
+    train_dataset=dataset,
+)
+trainer.train()
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,6 +19,7 @@ eggs/ @@
     lib/
     lib64/
     parts/
+    wandb/
     .ruff_cache/
     sdist/
     var/
@@ Expand Down @@