GRPO/GSPO

mydatascience · mydatascience · commit 12ddad64e257 · 2025-11-12T22:26:55.000+04:00
Signed-off-by: Vladimir Suvorov &lt;suvorovv@google.com&gt;
diff --git a/src/MaxText/examples/grpo_llama3_1_8b_demo.ipynb b/src/MaxText/examples/grpo_llama3_1_8b_demo.ipynb
@@ -37,8 +37,8 @@
    "outputs": [],
    "source": [
     "# Clone MaxText repository\n",
-    "!git clone https://github.com/AI-Hypercomputer/maxtext.git\n",
-    "%cd maxtext"
+    "!git clone https://github.com/AI-Hypercomputer/maxtext\n",
+    "%cd maxtext/src"
    ]
   },
   {
@@ -47,19 +47,57 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Install dependencies\n",
-    "!chmod +x setup.sh\n",
-    "!./setup.sh\n",
+    "!bash tools/setup/setup.sh\n",
+    "%pip uninstall -y jax jaxlib libtpu\n",
     "\n",
-    "# Install GRPO-specific dependencies\n",
-    "!./src/MaxText/examples/install_tunix_vllm_requirement.sh\n",
+    "%pip install aiohttp==3.12.15\n",
+    "\n",
+    "# Install Python packages that enable pip to authenticate with Google Artifact Registry automatically.\n",
+    "%pip install keyring keyrings.google-artifactregistry-auth\n",
+    "\n",
+    "# Install vLLM for Jax and TPUs from the artifact registry\n",
+    "!VLLM_TARGET_DEVICE=\"tpu\" pip install --no-cache-dir --pre \\\n",
+    "    --index-url https://us-python.pkg.dev/cloud-tpu-images/maxtext-rl/simple/ \\\n",
+    "    --extra-index-url https://pypi.org/simple/ \\\n",
+    "    --extra-index-url https://us-python.pkg.dev/ml-oss-artifacts-published/jax/simple/ \\\n",
+    "    --extra-index-url https://download.pytorch.org/whl/nightly/cpu \\\n",
+    "    --find-links https://storage.googleapis.com/jax-releases/libtpu_releases.html \\\n",
+    "    --find-links https://storage.googleapis.com/libtpu-wheels/index.html \\\n",
+    "    --find-links https://storage.googleapis.com/libtpu-releases/index.html \\\n",
+    "    --find-links https://storage.googleapis.com/jax-releases/jax_nightly_releases.html \\\n",
+    "    --find-links https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html \\\n",
+    "    vllm==0.11.1rc1.dev292+g1b86bd8e1.tpu\n",
+    "\n",
+    "# Install tpu-commons from the artifact registry\n",
+    "%pip install --no-cache-dir --pre \\\n",
+    "    --index-url https://us-python.pkg.dev/cloud-tpu-images/maxtext-rl/simple/ \\\n",
+    "    --extra-index-url https://pypi.org/simple/ \\\n",
+    "    --extra-index-url https://us-python.pkg.dev/ml-oss-artifacts-published/jax/simple/ \\\n",
+    "    --find-links https://storage.googleapis.com/jax-releases/libtpu_releases.html \\\n",
+    "    tpu-commons==0.1.2\n",
+    "\n",
+    "%pip install numba==0.61.2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "\n",
-    "# Install additional requirements\n",
-    "%pip install --force-reinstall numpy==2.1.2\n",
     "%pip install nest_asyncio\n",
     "\n",
     "import nest_asyncio\n",
-    "nest_asyncio.apply()  # Fix for Colab event loop"
+    "nest_asyncio.apply()  # Fix for Colab event loop\n",
+    "\n",
+    "%cd maxtext/src/\n",
+    "\n",
+    "#Fix nnx problems\n",
+    "!pip uninstall flax \n",
+    "!pip uninstall qwix\n",
+    "!pip install flax \n",
+    "!pip install qwix"
    ]
   },
   {
@@ -97,19 +135,21 @@
    "source": [
     "# Configuration for GRPO training\n",
     "import os\n",
+    "import MaxText\n",
     "\n",
     "# Set up paths (adjust if needed)\n",
-    "MAXTEXT_REPO_ROOT = os.path.expanduser(\"~\") + \"/maxtext\"\n",
-    "\n",
+    "MAXTEXT_REPO_ROOT = os.path.dirname(MaxText.__file__)\n",
+    "RUN_NAME=\"grpo_test\"\n",
     "# Hardcoded defaults for Llama3.1-8B\n",
     "MODEL_NAME = \"llama3.1-8b\"\n",
     "HF_REPO_ID = \"meta-llama/Llama-3.1-8B-Instruct\"\n",
-    "CHAT_TEMPLATE_PATH = \"src/MaxText/examples/chat_templates/gsm8k_rl.json\"\n",
+    "CHAT_TEMPLATE_PATH = f\"{MAXTEXT_REPO_ROOT}/examples/chat_templates/gsm8k_rl.json\"\n",
+    "LOSS_ALGO=\"gspo-token\"\n",
     "\n",
     "# Required: Set these before running\n",
-    "MODEL_CHECKPOINT_PATH = \"gs://maxtext-model-checkpoints/llama3.1-8b/2025-01-23-19-04/scanned/0/items\"  # Update this!\n",
-    "OUTPUT_DIRECTORY = \"/tmp/grpo_output\"  # Update this!\n",
-    "HF_TOKEN = os.environ.get(\"HF_TOKEN\", \"\")  # Set HF_TOKEN environment variable\n",
+    "MODEL_CHECKPOINT_PATH = \"\"  # Update this!\n",
+    "OUTPUT_DIRECTORY = \"/tmp/gpo_output\"  # Update this!\n",
+    "HF_TOKEN = \"\" # Set HF_TOKEN environment variable\n",
     "\n",
     "# Optional: Override training parameters\n",
     "STEPS = 10  # Reduced for demo purposes\n",
@@ -118,14 +158,15 @@
     "NUM_GENERATIONS = 2\n",
     "GRPO_BETA = 0.08\n",
     "GRPO_EPSILON = 0.2\n",
-    "CHIPS_PER_VM = 4\n",
+    "CHIPS_PER_VM = 1\n",
     "\n",
     "print(f\"📁 MaxText Home: {MAXTEXT_REPO_ROOT}\")\n",
     "print(f\"🤖 Model: {MODEL_NAME}\")\n",
     "print(f\"📦 Checkpoint: {MODEL_CHECKPOINT_PATH}\")\n",
     "print(f\"💾 Output: {OUTPUT_DIRECTORY}\")\n",
     "print(f\"🔑 HF Token: {'✅ Set' if HF_TOKEN else '❌ Missing - set HF_TOKEN env var'}\")\n",
-    "print(f\"📊 Steps: {STEPS}\")"
+    "print(f\"📊 Steps: {STEPS}\")\n",
+    "print(f\"Loss Algorithm : {LOSS_ALGO}\")"
    ]
   },
   {
@@ -140,7 +181,7 @@
     "from pathlib import Path\n",
     "\n",
     "# Add MaxText to Python path\n",
-    "maxtext_path = Path(MAXTEXT_REPO_ROOT) / \"src\" / \"MaxText\"\n",
+    "maxtext_path = Path(MAXTEXT_REPO_ROOT) \n",
     "sys.path.insert(0, str(maxtext_path))\n",
     "\n",
     "from MaxText import pyconfig, max_utils\n",
@@ -163,6 +204,51 @@
     "print(f\"📁 MaxText path: {maxtext_path}\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Build configuration for GRPO training\n",
+    "config_file = os.path.join(MAXTEXT_REPO_ROOT, \"configs/rl.yml\")\n",
+    "\n",
+    "# Verify chat template exists\n",
+    "if not os.path.exists(os.path.join(MAXTEXT_REPO_ROOT, CHAT_TEMPLATE_PATH)):\n",
+    "    raise FileNotFoundError(f\"Chat template not found: {CHAT_TEMPLATE_PATH}\")\n",
+    "\n",
+    "# Build argv list for pyconfig.initialize()\n",
+    "config_argv = [\n",
+    "    \"\",  # argv[0] placeholder\n",
+    "    config_file,\n",
+    "    f\"model_name={MODEL_NAME}\",\n",
+    "    f\"tokenizer_path={HF_REPO_ID}\",\n",
+    "    f\"run_name={RUN_NAME}\",\n",
+    "    f\"chat_template_path={CHAT_TEMPLATE_PATH}\",\n",
+    "    f\"load_parameters_path={MODEL_CHECKPOINT_PATH}\",\n",
+    "    f\"base_output_directory={OUTPUT_DIRECTORY}\",\n",
+    "    f\"hf_access_token={HF_TOKEN}\",\n",
+    "    f\"steps={STEPS}\",\n",
+    "    f\"per_device_batch_size={PER_DEVICE_BATCH_SIZE}\",\n",
+    "    f\"learning_rate={LEARNING_RATE}\",\n",
+    "    f\"num_generations={NUM_GENERATIONS}\",\n",
+    "    f\"grpo_beta={GRPO_BETA}\",\n",
+    "    f\"grpo_epsilon={GRPO_EPSILON}\",\n",
+    "    f\"chips_per_vm={CHIPS_PER_VM}\",\n",
+    "    f\"loss_algo={LOSS_ALGO}\"\n",
+    "]\n",
+    "\n",
+    "# Initialize configuration\n",
+    "print(f\"🔧 Initializing configuration from: {config_file}\")\n",
+    "config = pyconfig.initialize(config_argv)\n",
+    "max_utils.print_system_information()\n",
+    "\n",
+    "print(\"\\n✅ Configuration initialized successfully\")\n",
+    "print(f\"📊 Training steps: {config.steps}\")\n",
+    "print(f\"📁 Output directory: {config.base_output_directory}\")\n",
+    "print(f\"🤖 Model: {config.model_name}\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -214,25 +300,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Execute GRPO training\n",
+    "# Execute GRPO/GSPO training\n",
     "print(\"\\n\" + \"=\"*80)\n",
-    "print(\"🚀 Starting GRPO Training...\")\n",
+    "print(\"🚀 Starting Training...\")\n",
     "print(\"=\"*80)\n",
-    "\n",
+    "print(1)\n",
     "try:\n",
     "    # Call the rl_train function (it handles everything internally)\n",
     "    rl_train(config)\n",
     "    \n",
     "    print(\"\\n\" + \"=\"*80)\n",
-    "    print(\"✅ GRPO Training Completed Successfully!\")\n",
+    "    print(\"✅ Training Completed Successfully!\")\n",
     "    print(\"=\"*80)\n",
     "    print(f\"📁 Checkpoints saved to: {config.checkpoint_dir}\")\n",
     "    print(f\"📊 TensorBoard logs: {config.tensorboard_dir}\")\n",
     "    print(f\"🎯 Model ready for inference!\")\n",
     "    \n",
     "except Exception as e:\n",
     "    print(\"\\n\" + \"=\"*80)\n",
-    "    print(\"❌ GRPO Training Failed!\")\n",
+    "    print(\"❌Training Failed!\")\n",
     "    print(\"=\"*80)\n",
     "    print(f\"Error: {str(e)}\")\n",
     "    import traceback\n",