From b7066c5eb8d4ec5643ae6d219da8c71b98b92ffe Mon Sep 17 00:00:00 2001 From: MengAiDev <3463526515@qq.com> Date: Thu, 28 Aug 2025 08:11:35 +0800 Subject: [PATCH 1/2] add glm4 9b model for reasoning conversational --- README.md | 270 +++++++++--------- nb/GLM4_(9B)-Reasoning-Conversational.ipynb | 232 +++++++++++++++ ...e-GLM4_(9B)-Reasoning-Conversational.ipynb | 232 +++++++++++++++ .../GLM4_(9B)-Reasoning-Conversational.ipynb | 180 ++++++++++++ 4 files changed, 780 insertions(+), 134 deletions(-) create mode 100644 nb/GLM4_(9B)-Reasoning-Conversational.ipynb create mode 100644 nb/Kaggle-GLM4_(9B)-Reasoning-Conversational.ipynb create mode 100644 original_template/GLM4_(9B)-Reasoning-Conversational.ipynb diff --git a/README.md b/README.md index dff3011a..42e42d26 100644 --- a/README.md +++ b/README.md @@ -78,121 +78,122 @@ Below are our notebooks for Google Colab categorized by model. You can view our ### GRPO Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Phi 4** **(14B)** | GRPO | Open In Colab | -| **Llama3.1** **(8B)** | GRPO | Open In Colab | -| **Meta Synthetic Data Llama3.1** **(8B)** | GRPO | Open In Colab | -| **Qwen3** **(4B)** | GRPO | Open In Colab | -| **Meta Synthetic Data Llama3 2** **(3B)** | GRPO | Open In Colab | -| **Gemma3** **(1B)** | GRPO | Open In Colab | -| **Qwen2.5** **(3B)** | GRPO | Open In Colab | -| **DeepSeek R1 0528 Qwen3** **(8B)** | GRPO | Open In Colab | -| **Mistral v0.3** **(7B)** | GRPO | Open In Colab | +| **Phi 4** **(14B)** | GRPO | Open In Colab | +| **Llama3.1** **(8B)** | GRPO | Open In Colab | +| **Meta Synthetic Data Llama3.1** **(8B)** | GRPO | Open In Colab | +| **Gemma3** **(1B)** | GRPO | Open In Colab | +| **Meta Synthetic Data Llama3 2** **(3B)** | GRPO | Open In Colab | +| **Qwen3** **(4B)** | GRPO | Open In Colab | +| **Qwen2.5** **(3B)** | GRPO | Open In Colab | +| **DeepSeek R1 0528 Qwen3** **(8B)** | GRPO | Open In Colab | +| **Mistral v0.3** **(7B)** | GRPO | Open In Colab | ### GPT-OSS Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **gpt oss** **(20B)** | | Open In Colab | -| **GPT OSS BNB** **(20B)** | Inference | Open In Colab | -| **GPT OSS MXFP4** **(20B)** | Inference | Open In Colab | +| **GPT OSS BNB** **(20B)** | Inference | Open In Colab | +| **gpt oss** **(20B)** | | Open In Colab | +| **GPT OSS MXFP4** **(20B)** | Inference | Open In Colab | ### Gemma Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **CodeGemma** **(7B)** | Conversational | Open In Colab | -| **Gemma3** **(4B)** | Vision | Open In Colab | -| **Gemma3** **(4B)** | | Open In Colab | -| **Gemma3N** **(4B)** | Vision | Open In Colab | -| **Gemma3** **(270M)** | | Open In Colab | -| **Gemma3N** **(2B)** | Inference | Open In Colab | -| **Gemma3N** **(4B)** | Multimodal | Open In Colab | -| **Gemma3N** **(4B)** | Audio | Open In Colab | -| **Gemma2** **(9B)** | Alpaca | Open In Colab | -| **Gemma2** **(2B)** | Alpaca | Open In Colab | +| **CodeGemma** **(7B)** | Conversational | Open In Colab | +| **Gemma3N** **(2B)** | Inference | Open In Colab | +| **Gemma3N** **(4B)** | Audio | Open In Colab | +| **Gemma3N** **(4B)** | Multimodal | Open In Colab | +| **Gemma3N** **(4B)** | Vision | Open In Colab | +| **Gemma3** **(270M)** | | Open In Colab | +| **Gemma3** **(4B)** | Vision | Open In Colab | +| **Gemma3** **(4B)** | | Open In Colab | +| **Gemma2** **(2B)** | Alpaca | Open In Colab | +| **Gemma2** **(9B)** | Alpaca | Open In Colab | ### Linear Attention Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Liquid LFM2** **(1.2B)** | Conversational | Open In Colab | -| **Falcon H1** **(0.5B)** | Alpaca | Open In Colab | +| **Liquid LFM2** **(1.2B)** | Conversational | Open In Colab | +| **Falcon H1** **(0.5B)** | Alpaca | Open In Colab | ### Llama Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Llama3.2** **(11B)** | Vision | Open In Colab | -| **Llama3.2** **(1B and 3B)** | Conversational | Open In Colab | -| **Llama3.2** **(1B)** | RAFT | Open In Colab | -| **Llama3.1** **(8B)** | Alpaca | Open In Colab | -| **Llama3.1** **(8B)** | Inference | Open In Colab | -| **Llasa TTS** **(3B)** | TTS | Open In Colab | -| **Llama3** **(8B)** | ORPO | Open In Colab | -| **Llama3** **(8B)** | Alpaca | Open In Colab | -| **Llama3** **(8B)** | Conversational | Open In Colab | -| **Llama3** **(8B)** | Ollama | Open In Colab | -| **TinyLlama** **(1.1B)** | Alpaca | Open In Colab | -| **Llasa TTS** **(1B)** | TTS | Open In Colab | +| **Llama3.2** **(11B)** | Vision | Open In Colab | +| **Llama3.2** **(1B)** | RAFT | Open In Colab | +| **Llama3.2** **(1B and 3B)** | Conversational | Open In Colab | +| **Llama3.1** **(8B)** | Alpaca | Open In Colab | +| **Llama3.1** **(8B)** | Inference | Open In Colab | +| **Llasa TTS** **(3B)** | TTS | Open In Colab | +| **Llama3** **(8B)** | Alpaca | Open In Colab | +| **Llama3** **(8B)** | Conversational | Open In Colab | +| **Llama3** **(8B)** | ORPO | Open In Colab | +| **Llama3** **(8B)** | Ollama | Open In Colab | +| **TinyLlama** **(1.1B)** | Alpaca | Open In Colab | +| **Llasa TTS** **(1B)** | TTS | Open In Colab | ### Mistral Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Mistral Small** **(22B)** | Alpaca | Open In Colab | -| **Mistral Nemo** **(12B)** | Alpaca | Open In Colab | -| **Pixtral** **(12B)** | Vision | Open In Colab | -| **Mistral** **(7B)** | Text Completion | Open In Colab | -| **Zephyr** **(7B)** | DPO | Open In Colab | -| **Mistral v0.3** **(7B)** | Alpaca | Open In Colab | -| **Mistral v0.3** **(7B)** | CPT | Open In Colab | -| **Mistral v0.3** **(7B)** | Conversational | Open In Colab | +| **Mistral Small** **(22B)** | Alpaca | Open In Colab | +| **Mistral Nemo** **(12B)** | Alpaca | Open In Colab | +| **Pixtral** **(12B)** | Vision | Open In Colab | +| **Mistral** **(7B)** | Text Completion | Open In Colab | +| **Zephyr** **(7B)** | DPO | Open In Colab | +| **Mistral v0.3** **(7B)** | Alpaca | Open In Colab | +| **Mistral v0.3** **(7B)** | CPT | Open In Colab | +| **Mistral v0.3** **(7B)** | Conversational | Open In Colab | ### Orpheus Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Orpheus** **(3B)** | TTS | Open In Colab | +| **Orpheus** **(3B)** | TTS | Open In Colab | ### Oute Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Oute TTS** **(1B)** | TTS | Open In Colab | +| **Oute TTS** **(1B)** | TTS | Open In Colab | ### Phi Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Phi 4** | Conversational | Open In Colab | -| **Phi 3.5 Mini** | Conversational | Open In Colab | -| **Phi 3 Medium** | Conversational | Open In Colab | +| **Phi 4** | Conversational | Open In Colab | +| **Phi 3.5 Mini** | Conversational | Open In Colab | +| **Phi 3 Medium** | Conversational | Open In Colab | ### Qwen Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Qwen3** **(4B)** | | Open In Colab | -| **Qwen3** **(4B)** | | Open In Colab | -| **Qwen3** **(14B)** | Reasoning Conversational | Open In Colab | -| **Qwen3** **(14B)** | | Open In Colab | -| **Qwen3** **(14B)** | Alpaca | Open In Colab | -| **Qwen2.5 Coder** **(1.5B)** | Tool Calling | Open In Colab | -| **Qwen2.5** **(7B)** | Alpaca | Open In Colab | -| **Qwen2.5 Coder** **(14B)** | Conversational | Open In Colab | -| **Qwen2.5 VL** **(7B)** | Vision | Open In Colab | -| **Qwen2 VL** **(7B)** | Vision | Open In Colab | -| **Qwen2** **(7B)** | Alpaca | Open In Colab | +| **Qwen3** **(14B)** | Alpaca | Open In Colab | +| **Qwen3** **(14B)** | Reasoning Conversational | Open In Colab | +| **Qwen3** **(14B)** | | Open In Colab | +| **Qwen3** **(4B)** | | Open In Colab | +| **Qwen3** **(4B)** | | Open In Colab | +| **Qwen2.5** **(7B)** | Alpaca | Open In Colab | +| **Qwen2.5 Coder** **(1.5B)** | Tool Calling | Open In Colab | +| **Qwen2.5 Coder** **(14B)** | Conversational | Open In Colab | +| **Qwen2.5 VL** **(7B)** | Vision | Open In Colab | +| **Qwen2** **(7B)** | Alpaca | Open In Colab | +| **Qwen2 VL** **(7B)** | Vision | Open In Colab | ### Spark Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Spark TTS** **(0 5B)** | TTS | Open In Colab | +| **Spark TTS** **(0 5B)** | TTS | Open In Colab | ### Whisper Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Whisper** | | Open In Colab | +| **Whisper** | | Open In Colab | ### Other Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Magistral** **(24B)** | Reasoning Conversational | Open In Colab | -| **Sesame CSM** **(1B)** | TTS | Open In Colab | -| **bert classification** | | Open In Colab | -| **Unsloth** | Studio | Open In Colab | -| **CodeForces cot Finetune for Reasoning on CodeForces** | Reasoning | Open In Colab | +| **Magistral** **(24B)** | Reasoning Conversational | Open In Colab | +| **GLM4** **(9B)** | Reasoning Conversational | Open In Colab | +| **Sesame CSM** **(1B)** | TTS | Open In Colab | +| **CodeForces cot Finetune for Reasoning on CodeForces** | Reasoning | Open In Colab | +| **Unsloth** | Studio | Open In Colab | +| **bert classification** | | Open In Colab | # 📒 Kaggle Notebooks
@@ -203,121 +204,122 @@ Below are our notebooks for Google Colab categorized by model. You can view our ### GRPO Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Phi 4** **(14B)** | GRPO | Open in Kaggle | -| **Meta Synthetic Data Llama3.1** **(8B)** | GRPO | Open in Kaggle | -| **Llama3.1** **(8B)** | GRPO | Open in Kaggle | -| **Gemma3** **(1B)** | GRPO | Open in Kaggle | -| **Meta Synthetic Data Llama3 2** **(3B)** | GRPO | Open in Kaggle | -| **Qwen3** **(4B)** | GRPO | Open in Kaggle | -| **Qwen2.5** **(3B)** | GRPO | Open in Kaggle | -| **DeepSeek R1 0528 Qwen3** **(8B)** | GRPO | Open in Kaggle | -| **Mistral v0.3** **(7B)** | GRPO | Open in Kaggle | +| **Phi 4** **(14B)** | GRPO | Open in Kaggle | +| **Llama3.1** **(8B)** | GRPO | Open in Kaggle | +| **Meta Synthetic Data Llama3.1** **(8B)** | GRPO | Open in Kaggle | +| **Gemma3** **(1B)** | GRPO | Open in Kaggle | +| **Meta Synthetic Data Llama3 2** **(3B)** | GRPO | Open in Kaggle | +| **Qwen3** **(4B)** | GRPO | Open in Kaggle | +| **Qwen2.5** **(3B)** | GRPO | Open in Kaggle | +| **DeepSeek R1 0528 Qwen3** **(8B)** | GRPO | Open in Kaggle | +| **Mistral v0.3** **(7B)** | GRPO | Open in Kaggle | ### GPT-OSS Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **GPT OSS BNB** **(20B)** | Inference | Open in Kaggle | -| **gpt oss** **(20B)** | | Open in Kaggle | -| **GPT OSS MXFP4** **(20B)** | Inference | Open in Kaggle | +| **GPT OSS BNB** **(20B)** | Inference | Open in Kaggle | +| **gpt oss** **(20B)** | | Open in Kaggle | +| **GPT OSS MXFP4** **(20B)** | Inference | Open in Kaggle | ### Gemma Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **CodeGemma** **(7B)** | Conversational | Open in Kaggle | -| **Gemma3** **(4B)** | | Open in Kaggle | -| **Gemma3N** **(4B)** | Audio | Open in Kaggle | -| **Gemma3N** **(2B)** | Inference | Open in Kaggle | -| **Gemma3N** **(4B)** | Vision | Open in Kaggle | -| **Gemma3** **(4B)** | Vision | Open in Kaggle | -| **Gemma3N** **(4B)** | Multimodal | Open in Kaggle | -| **Gemma3** **(270M)** | | Open in Kaggle | -| **Gemma2** **(2B)** | Alpaca | Open in Kaggle | -| **Gemma2** **(9B)** | Alpaca | Open in Kaggle | +| **CodeGemma** **(7B)** | Conversational | Open in Kaggle | +| **Gemma3N** **(2B)** | Inference | Open in Kaggle | +| **Gemma3N** **(4B)** | Audio | Open in Kaggle | +| **Gemma3N** **(4B)** | Multimodal | Open in Kaggle | +| **Gemma3N** **(4B)** | Vision | Open in Kaggle | +| **Gemma3** **(270M)** | | Open in Kaggle | +| **Gemma3** **(4B)** | Vision | Open in Kaggle | +| **Gemma3** **(4B)** | | Open in Kaggle | +| **Gemma2** **(2B)** | Alpaca | Open in Kaggle | +| **Gemma2** **(9B)** | Alpaca | Open in Kaggle | ### Linear Attention Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Liquid LFM2** **(1.2B)** | Conversational | Open in Kaggle | -| **Falcon H1** **(0.5B)** | Alpaca | Open in Kaggle | +| **Liquid LFM2** **(1.2B)** | Conversational | Open in Kaggle | +| **Falcon H1** **(0.5B)** | Alpaca | Open in Kaggle | ### Llama Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Llama3.2** **(1B and 3B)** | Conversational | Open in Kaggle | -| **Llama3.2** **(11B)** | Vision | Open in Kaggle | -| **Llama3.2** **(1B)** | RAFT | Open in Kaggle | -| **Llama3.1** **(8B)** | Inference | Open in Kaggle | -| **Llama3.1** **(8B)** | Alpaca | Open in Kaggle | -| **Llasa TTS** **(3B)** | TTS | Open in Kaggle | -| **Llama3** **(8B)** | Ollama | Open in Kaggle | -| **Llama3** **(8B)** | Conversational | Open in Kaggle | -| **Llama3** **(8B)** | ORPO | Open in Kaggle | -| **Llama3** **(8B)** | Alpaca | Open in Kaggle | -| **TinyLlama** **(1.1B)** | Alpaca | Open in Kaggle | -| **Llasa TTS** **(1B)** | TTS | Open in Kaggle | +| **Llama3.2** **(11B)** | Vision | Open in Kaggle | +| **Llama3.2** **(1B)** | RAFT | Open in Kaggle | +| **Llama3.2** **(1B and 3B)** | Conversational | Open in Kaggle | +| **Llama3.1** **(8B)** | Alpaca | Open in Kaggle | +| **Llama3.1** **(8B)** | Inference | Open in Kaggle | +| **Llasa TTS** **(3B)** | TTS | Open in Kaggle | +| **Llama3** **(8B)** | Alpaca | Open in Kaggle | +| **Llama3** **(8B)** | Conversational | Open in Kaggle | +| **Llama3** **(8B)** | ORPO | Open in Kaggle | +| **Llama3** **(8B)** | Ollama | Open in Kaggle | +| **TinyLlama** **(1.1B)** | Alpaca | Open in Kaggle | +| **Llasa TTS** **(1B)** | TTS | Open in Kaggle | ### Mistral Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Mistral Small** **(22B)** | Alpaca | Open in Kaggle | -| **Mistral Nemo** **(12B)** | Alpaca | Open in Kaggle | -| **Pixtral** **(12B)** | Vision | Open in Kaggle | -| **Mistral** **(7B)** | Text Completion | Open in Kaggle | -| **Zephyr** **(7B)** | DPO | Open in Kaggle | -| **Mistral v0.3** **(7B)** | CPT | Open in Kaggle | -| **Mistral v0.3** **(7B)** | Alpaca | Open in Kaggle | -| **Mistral v0.3** **(7B)** | Conversational | Open in Kaggle | +| **Mistral Small** **(22B)** | Alpaca | Open in Kaggle | +| **Mistral Nemo** **(12B)** | Alpaca | Open in Kaggle | +| **Pixtral** **(12B)** | Vision | Open in Kaggle | +| **Mistral** **(7B)** | Text Completion | Open in Kaggle | +| **Zephyr** **(7B)** | DPO | Open in Kaggle | +| **Mistral v0.3** **(7B)** | Alpaca | Open in Kaggle | +| **Mistral v0.3** **(7B)** | CPT | Open in Kaggle | +| **Mistral v0.3** **(7B)** | Conversational | Open in Kaggle | ### Orpheus Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Orpheus** **(3B)** | TTS | Open in Kaggle | +| **Orpheus** **(3B)** | TTS | Open in Kaggle | ### Oute Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Oute TTS** **(1B)** | TTS | Open in Kaggle | +| **Oute TTS** **(1B)** | TTS | Open in Kaggle | ### Phi Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Phi 4** | Conversational | Open in Kaggle | -| **Phi 3.5 Mini** | Conversational | Open in Kaggle | -| **Phi 3 Medium** | Conversational | Open in Kaggle | +| **Phi 4** | Conversational | Open in Kaggle | +| **Phi 3.5 Mini** | Conversational | Open in Kaggle | +| **Phi 3 Medium** | Conversational | Open in Kaggle | ### Qwen Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Qwen3** **(14B)** | Alpaca | Open in Kaggle | -| **Qwen3** **(4B)** | | Open in Kaggle | -| **Qwen3** **(4B)** | | Open in Kaggle | -| **Qwen3** **(14B)** | | Open in Kaggle | -| **Qwen3** **(14B)** | Reasoning Conversational | Open in Kaggle | -| **Qwen2.5 Coder** **(14B)** | Conversational | Open in Kaggle | -| **Qwen2.5** **(7B)** | Alpaca | Open in Kaggle | -| **Qwen2.5 Coder** **(1.5B)** | Tool Calling | Open in Kaggle | -| **Qwen2.5 VL** **(7B)** | Vision | Open in Kaggle | -| **Qwen2 VL** **(7B)** | Vision | Open in Kaggle | -| **Qwen2** **(7B)** | Alpaca | Open in Kaggle | +| **Qwen3** **(14B)** | Alpaca | Open in Kaggle | +| **Qwen3** **(14B)** | Reasoning Conversational | Open in Kaggle | +| **Qwen3** **(14B)** | | Open in Kaggle | +| **Qwen3** **(4B)** | | Open in Kaggle | +| **Qwen3** **(4B)** | | Open in Kaggle | +| **Qwen2.5** **(7B)** | Alpaca | Open in Kaggle | +| **Qwen2.5 Coder** **(1.5B)** | Tool Calling | Open in Kaggle | +| **Qwen2.5 Coder** **(14B)** | Conversational | Open in Kaggle | +| **Qwen2.5 VL** **(7B)** | Vision | Open in Kaggle | +| **Qwen2** **(7B)** | Alpaca | Open in Kaggle | +| **Qwen2 VL** **(7B)** | Vision | Open in Kaggle | ### Spark Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Spark TTS** **(0 5B)** | TTS | Open in Kaggle | +| **Spark TTS** **(0 5B)** | TTS | Open in Kaggle | ### Whisper Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Whisper** | | Open in Kaggle | +| **Whisper** | | Open in Kaggle | ### Other Notebooks | Model | Type | Notebook Link | | --- | --- | --- | -| **Magistral** **(24B)** | Reasoning Conversational | Open in Kaggle | -| **Sesame CSM** **(1B)** | TTS | Open in Kaggle | -| **CodeForces cot Finetune for Reasoning on CodeForces** | Reasoning | Open in Kaggle | -| **Unsloth** | Studio | Open in Kaggle | -| **bert classification** | | Open in Kaggle | +| **Magistral** **(24B)** | Reasoning Conversational | Open in Kaggle | +| **GLM4** **(9B)** | Reasoning Conversational | Open in Kaggle | +| **Sesame CSM** **(1B)** | TTS | Open in Kaggle | +| **CodeForces cot Finetune for Reasoning on CodeForces** | Reasoning | Open in Kaggle | +| **Unsloth** | Studio | Open in Kaggle | +| **bert classification** | | Open in Kaggle |
diff --git a/nb/GLM4_(9B)-Reasoning-Conversational.ipynb b/nb/GLM4_(9B)-Reasoning-Conversational.ipynb new file mode 100644 index 00000000..d634c686 --- /dev/null +++ b/nb/GLM4_(9B)-Reasoning-Conversational.ipynb @@ -0,0 +1,232 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To run this, press \"*Runtime*\" and press \"*Run all*\" on a **free** Tesla T4 Google Colab instance!\n", + "
\n", + "\n", + "\n", + " Join Discord if you need help + \u2b50 Star us on Github \u2b50\n", + "
\n", + "\n", + "To install Unsloth on your own computer, follow the installation instructions on our Github page [here](https://docs.unsloth.ai/get-started/installing-+-updating).\n", + "\n", + "You will learn how to do [data prep](#Data), how to [train](#Train), how to [run the model](#Inference), & [how to save it](#Save)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### News" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**NEW** Unsloth now supports training the new **gpt-oss** model from OpenAI! You can start finetune gpt-oss for free with our **[Colab notebook](https://x.com/UnslothAI/status/1953896997867729075)**!\n", + "\n", + "Unsloth now supports Text-to-Speech (TTS) models. Read our [guide here](https://docs.unsloth.ai/basics/text-to-speech-tts-fine-tuning).\n", + "\n", + "Read our **[Gemma 3N Guide](https://docs.unsloth.ai/basics/gemma-3n-how-to-run-and-fine-tune)** and check out our new **[Dynamic 2.0](https://docs.unsloth.ai/basics/unsloth-dynamic-2.0-ggufs)** quants which outperforms other quantization methods!\n", + "\n", + "Visit our docs for all our [model uploads](https://docs.unsloth.ai/get-started/all-our-models) and [notebooks](https://docs.unsloth.ai/get-started/unsloth-notebooks).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "%%capture\nimport os, re\nif \"COLAB_\" not in \"\".join(os.environ.keys()):\n !pip install unsloth\nelse:\n # Do this only in Colab notebooks! Otherwise use pip install unsloth\n import torch; v = re.match(r\"[0-9\\.]{3,}\", str(torch.__version__)).group(0)\n xformers = \"xformers==\" + (\"0.0.32.post2\" if v == \"2.8.0\" else \"0.0.29.post3\")\n !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo\n !pip install sentencepiece protobuf \"datasets>=3.4.1,<4.0.0\" \"huggingface_hub>=0.34.0\" hf_transfer\n !pip install --no-deps unsloth\n!pip install transformers==4.55.4" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Unsloth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datasets import load_dataset\n", + "\n", + "dataset = load_dataset(\"open-r1/OpenR1-Math-220k\")\n", + "\n", + "print(dataset)\n", + "\n", + "# \u67e5\u770b\u7b2c\u4e00\u4e2a\u8bad\u7ec3\u6837\u672c\n", + "sample = dataset['train'][0]\n", + "print(\"\\n--- Problem ---\")\n", + "print(sample['problem'])\n", + "print(\"\\n--- Solution (The Chain-of-Thought) ---\")\n", + "print(sample['solution'])\n", + "print(\"\\n--- Answer ---\")\n", + "print(sample['answer'])\n", + "\n", + "def format_function(sample):\n", + " \"\"\"\n", + " \u5c06\u539f\u59cb\u6570\u636e\u6837\u672c\u8f6c\u6362\u4e3a\u7528\u4e8e\u5fae\u8c03\u7684 prompt-completion \u5bf9\u3002\n", + " \"\"\"\n", + " prompt_text = f\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", + "You are an expert mathematician. Solve the following problem. Think step by step. End your response with a final answer in the format \\\\boxed{{answer}}.<|eot_id|>\n", + "<|start_header_id|>user<|eot_id|>\n", + "{sample['problem']}<|eot_id|>\n", + "<|start_header_id|>assistant<|eot_id|>\n", + "\"\"\"\n", + " \n", + " solution_text = sample['solution'].strip()\n", + " final_answer = sample['answer'].strip()\n", + " \n", + " if f\"\\\\boxed{{{final_answer}}}\" not in solution_text:\n", + " completion_text = f\"{solution_text}\\n\\\\boxed{{{final_answer}}}\"\n", + " else:\n", + " completion_text = solution_text\n", + "\n", + " return {\"prompt\": prompt_text, \"completion\": completion_text}\n", + "\n", + "formatted_dataset = dataset.map(format_function)\n", + "\n", + "train_data = formatted_dataset['train']\n", + "\n", + "train_data.to_json(\"openr1_math_cot_formatted.jsonl\", orient=\"records\", lines=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from unsloth import FastLanguageModel\n", + "import torch\n", + "from datasets import load_dataset\n", + "from transformers import TrainingArguments\n", + "from trl import SFTTrainer\n", + "\n", + "model, tokenizer = FastLanguageModel.from_pretrained(\n", + " model_name = \"glm-4-9b-chat-hf\",\n", + " max_seq_length = 4096,\n", + " dtype = None,\n", + " load_in_4bit = True,\n", + ")\n", + "\n", + "\n", + "model = FastLanguageModel.get_peft_model(\n", + " model,\n", + " r = 16,\n", + " lora_alpha = 32,\n", + " lora_dropout = 0,\n", + ")\n", + "\n", + "dataset = load_dataset(\"json\", data_files={\"train\": \"openr1_math_cot_formatted.jsonl\"}, split=\"train\")\n", + "\n", + "def preprocess_function(examples):\n", + " \"\"\"\n", + " \u5728\u8bad\u7ec3\u524d\u5c06prompt\u548ccompletion\u62fc\u63a5\u6210\u4e00\u4e2a\u5b8c\u6574\u7684'text'\u5b57\u6bb5\u3002\n", + " \"\"\"\n", + " texts = [p + c for p, c in zip(examples['prompt'], examples['completion'])]\n", + " return {\"text\": texts}\n", + "\n", + "dataset = dataset.map(preprocess_function, batched=True)\n", + "\n", + "args = TrainingArguments(\n", + " output_dir = \"glm4-openr1-math\",\n", + " per_device_train_batch_size = 2,\n", + " gradient_accumulation_steps = 4,\n", + " learning_rate = 2e-5,\n", + " num_train_epochs = 2,\n", + " logging_steps = 50,\n", + " save_strategy = \"steps\",\n", + " save_steps = 1000,\n", + " optim = \"adamw_8bit\",\n", + " weight_decay = 0.01,\n", + " lr_scheduler_type = \"linear\",\n", + " seed = 3407,\n", + " fp16 = not torch.cuda.is_bf16_supported(),\n", + " bf16 = torch.cuda.is_bf16_supported(),\n", + " max_grad_norm = 1.0,\n", + ")\n", + "\n", + "\n", + "trainer = SFTTrainer(\n", + " model = model,\n", + " tokenizer = tokenizer,\n", + " train_dataset = dataset,\n", + " dataset_text_field = \"text\",\n", + " max_seq_length = 4096,\n", + " args = args,\n", + ")\n", + "\n", + "trainer.train()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord!\n", + "\n", + "Some other links:\n", + "1. Train your own reasoning model - Llama GRPO notebook [Free Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.1_(8B)-GRPO.ipynb)\n", + "2. Saving finetunes to Ollama. [Free notebook](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3_(8B)-Ollama.ipynb)\n", + "3. Llama 3.2 Vision finetuning - Radiography use case. [Free Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(11B)-Vision.ipynb)\n", + "6. See notebooks for DPO, ORPO, Continued pretraining, conversational finetuning and more on our [documentation](https://docs.unsloth.ai/get-started/unsloth-notebooks)!\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " Join Discord if you need help + \u2b50\ufe0f Star us on Github \u2b50\ufe0f\n", + "
\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + }, + "accelerator": "GPU", + "colab": { + "provenance": [], + "gpuType": "T4", + "include_colab_link": true + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {} + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/nb/Kaggle-GLM4_(9B)-Reasoning-Conversational.ipynb b/nb/Kaggle-GLM4_(9B)-Reasoning-Conversational.ipynb new file mode 100644 index 00000000..f75f39b5 --- /dev/null +++ b/nb/Kaggle-GLM4_(9B)-Reasoning-Conversational.ipynb @@ -0,0 +1,232 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To run this, press \"*Runtime*\" and press \"*Run all*\" on a **free** Tesla T4 Google Colab instance!\n", + "
\n", + "\n", + "\n", + " Join Discord if you need help + \u2b50 Star us on Github \u2b50\n", + "
\n", + "\n", + "To install Unsloth on your own computer, follow the installation instructions on our Github page [here](https://docs.unsloth.ai/get-started/installing-+-updating).\n", + "\n", + "You will learn how to do [data prep](#Data), how to [train](#Train), how to [run the model](#Inference), & [how to save it](#Save)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### News" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**NEW** Unsloth now supports training the new **gpt-oss** model from OpenAI! You can start finetune gpt-oss for free with our **[Colab notebook](https://x.com/UnslothAI/status/1953896997867729075)**!\n", + "\n", + "Unsloth now supports Text-to-Speech (TTS) models. Read our [guide here](https://docs.unsloth.ai/basics/text-to-speech-tts-fine-tuning).\n", + "\n", + "Read our **[Gemma 3N Guide](https://docs.unsloth.ai/basics/gemma-3n-how-to-run-and-fine-tune)** and check out our new **[Dynamic 2.0](https://docs.unsloth.ai/basics/unsloth-dynamic-2.0-ggufs)** quants which outperforms other quantization methods!\n", + "\n", + "Visit our docs for all our [model uploads](https://docs.unsloth.ai/get-started/all-our-models) and [notebooks](https://docs.unsloth.ai/get-started/unsloth-notebooks).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "%%capture\nimport os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n\n!pip install pip3-autoremove\n!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu128\n!pip install unsloth\n!pip install transformers==4.55.4\n" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Unsloth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datasets import load_dataset\n", + "\n", + "dataset = load_dataset(\"open-r1/OpenR1-Math-220k\")\n", + "\n", + "print(dataset)\n", + "\n", + "# \u67e5\u770b\u7b2c\u4e00\u4e2a\u8bad\u7ec3\u6837\u672c\n", + "sample = dataset['train'][0]\n", + "print(\"\\n--- Problem ---\")\n", + "print(sample['problem'])\n", + "print(\"\\n--- Solution (The Chain-of-Thought) ---\")\n", + "print(sample['solution'])\n", + "print(\"\\n--- Answer ---\")\n", + "print(sample['answer'])\n", + "\n", + "def format_function(sample):\n", + " \"\"\"\n", + " \u5c06\u539f\u59cb\u6570\u636e\u6837\u672c\u8f6c\u6362\u4e3a\u7528\u4e8e\u5fae\u8c03\u7684 prompt-completion \u5bf9\u3002\n", + " \"\"\"\n", + " prompt_text = f\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", + "You are an expert mathematician. Solve the following problem. Think step by step. End your response with a final answer in the format \\\\boxed{{answer}}.<|eot_id|>\n", + "<|start_header_id|>user<|eot_id|>\n", + "{sample['problem']}<|eot_id|>\n", + "<|start_header_id|>assistant<|eot_id|>\n", + "\"\"\"\n", + " \n", + " solution_text = sample['solution'].strip()\n", + " final_answer = sample['answer'].strip()\n", + " \n", + " if f\"\\\\boxed{{{final_answer}}}\" not in solution_text:\n", + " completion_text = f\"{solution_text}\\n\\\\boxed{{{final_answer}}}\"\n", + " else:\n", + " completion_text = solution_text\n", + "\n", + " return {\"prompt\": prompt_text, \"completion\": completion_text}\n", + "\n", + "formatted_dataset = dataset.map(format_function)\n", + "\n", + "train_data = formatted_dataset['train']\n", + "\n", + "train_data.to_json(\"openr1_math_cot_formatted.jsonl\", orient=\"records\", lines=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from unsloth import FastLanguageModel\n", + "import torch\n", + "from datasets import load_dataset\n", + "from transformers import TrainingArguments\n", + "from trl import SFTTrainer\n", + "\n", + "model, tokenizer = FastLanguageModel.from_pretrained(\n", + " model_name = \"glm-4-9b-chat-hf\",\n", + " max_seq_length = 4096,\n", + " dtype = None,\n", + " load_in_4bit = True,\n", + ")\n", + "\n", + "\n", + "model = FastLanguageModel.get_peft_model(\n", + " model,\n", + " r = 16,\n", + " lora_alpha = 32,\n", + " lora_dropout = 0,\n", + ")\n", + "\n", + "dataset = load_dataset(\"json\", data_files={\"train\": \"openr1_math_cot_formatted.jsonl\"}, split=\"train\")\n", + "\n", + "def preprocess_function(examples):\n", + " \"\"\"\n", + " \u5728\u8bad\u7ec3\u524d\u5c06prompt\u548ccompletion\u62fc\u63a5\u6210\u4e00\u4e2a\u5b8c\u6574\u7684'text'\u5b57\u6bb5\u3002\n", + " \"\"\"\n", + " texts = [p + c for p, c in zip(examples['prompt'], examples['completion'])]\n", + " return {\"text\": texts}\n", + "\n", + "dataset = dataset.map(preprocess_function, batched=True)\n", + "\n", + "args = TrainingArguments(\n", + " output_dir = \"glm4-openr1-math\",\n", + " per_device_train_batch_size = 2,\n", + " gradient_accumulation_steps = 4,\n", + " learning_rate = 2e-5,\n", + " num_train_epochs = 2,\n", + " logging_steps = 50,\n", + " save_strategy = \"steps\",\n", + " save_steps = 1000,\n", + " optim = \"adamw_8bit\",\n", + " weight_decay = 0.01,\n", + " lr_scheduler_type = \"linear\",\n", + " seed = 3407,\n", + " fp16 = not torch.cuda.is_bf16_supported(),\n", + " bf16 = torch.cuda.is_bf16_supported(),\n", + " max_grad_norm = 1.0,\n", + ")\n", + "\n", + "\n", + "trainer = SFTTrainer(\n", + " model = model,\n", + " tokenizer = tokenizer,\n", + " train_dataset = dataset,\n", + " dataset_text_field = \"text\",\n", + " max_seq_length = 4096,\n", + " args = args,\n", + ")\n", + "\n", + "trainer.train()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord!\n", + "\n", + "Some other links:\n", + "1. Train your own reasoning model - Llama GRPO notebook [Free Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.1_(8B)-GRPO.ipynb)\n", + "2. Saving finetunes to Ollama. [Free notebook](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3_(8B)-Ollama.ipynb)\n", + "3. Llama 3.2 Vision finetuning - Radiography use case. [Free Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(11B)-Vision.ipynb)\n", + "6. See notebooks for DPO, ORPO, Continued pretraining, conversational finetuning and more on our [documentation](https://docs.unsloth.ai/get-started/unsloth-notebooks)!\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " Join Discord if you need help + \u2b50\ufe0f Star us on Github \u2b50\ufe0f\n", + "
\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + }, + "accelerator": "GPU", + "colab": { + "provenance": [], + "gpuType": "T4", + "include_colab_link": true + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {} + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/original_template/GLM4_(9B)-Reasoning-Conversational.ipynb b/original_template/GLM4_(9B)-Reasoning-Conversational.ipynb new file mode 100644 index 00000000..44028c1b --- /dev/null +++ b/original_template/GLM4_(9B)-Reasoning-Conversational.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### News" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We use unsloth to finetune the glm4-9b-chat-hf model with OpenR1-Math-220k dataset." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install install unsloth" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Unsloth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datasets import load_dataset\n", + "\n", + "dataset = load_dataset(\"open-r1/OpenR1-Math-220k\")\n", + "\n", + "print(dataset)\n", + "\n", + "# 查看第一个训练样本\n", + "sample = dataset['train'][0]\n", + "print(\"\\n--- Problem ---\")\n", + "print(sample['problem'])\n", + "print(\"\\n--- Solution (The Chain-of-Thought) ---\")\n", + "print(sample['solution'])\n", + "print(\"\\n--- Answer ---\")\n", + "print(sample['answer'])\n", + "\n", + "def format_function(sample):\n", + " \"\"\"\n", + " 将原始数据样本转换为用于微调的 prompt-completion 对。\n", + " \"\"\"\n", + " prompt_text = f\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", + "You are an expert mathematician. Solve the following problem. Think step by step. End your response with a final answer in the format \\\\boxed{{answer}}.<|eot_id|>\n", + "<|start_header_id|>user<|eot_id|>\n", + "{sample['problem']}<|eot_id|>\n", + "<|start_header_id|>assistant<|eot_id|>\n", + "\"\"\"\n", + " \n", + " solution_text = sample['solution'].strip()\n", + " final_answer = sample['answer'].strip()\n", + " \n", + " if f\"\\\\boxed{{{final_answer}}}\" not in solution_text:\n", + " completion_text = f\"{solution_text}\\n\\\\boxed{{{final_answer}}}\"\n", + " else:\n", + " completion_text = solution_text\n", + "\n", + " return {\"prompt\": prompt_text, \"completion\": completion_text}\n", + "\n", + "formatted_dataset = dataset.map(format_function)\n", + "\n", + "train_data = formatted_dataset['train']\n", + "\n", + "train_data.to_json(\"openr1_math_cot_formatted.jsonl\", orient=\"records\", lines=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from unsloth import FastLanguageModel\n", + "import torch\n", + "from datasets import load_dataset\n", + "from transformers import TrainingArguments\n", + "from trl import SFTTrainer\n", + "\n", + "model, tokenizer = FastLanguageModel.from_pretrained(\n", + " model_name = \"glm-4-9b-chat-hf\",\n", + " max_seq_length = 4096,\n", + " dtype = None,\n", + " load_in_4bit = True,\n", + ")\n", + "\n", + "\n", + "model = FastLanguageModel.get_peft_model(\n", + " model,\n", + " r = 16,\n", + " lora_alpha = 32,\n", + " lora_dropout = 0,\n", + ")\n", + "\n", + "dataset = load_dataset(\"json\", data_files={\"train\": \"openr1_math_cot_formatted.jsonl\"}, split=\"train\")\n", + "\n", + "def preprocess_function(examples):\n", + " \"\"\"\n", + " 在训练前将prompt和completion拼接成一个完整的'text'字段。\n", + " \"\"\"\n", + " texts = [p + c for p, c in zip(examples['prompt'], examples['completion'])]\n", + " return {\"text\": texts}\n", + "\n", + "dataset = dataset.map(preprocess_function, batched=True)\n", + "\n", + "args = TrainingArguments(\n", + " output_dir = \"glm4-openr1-math\",\n", + " per_device_train_batch_size = 2,\n", + " gradient_accumulation_steps = 4,\n", + " learning_rate = 2e-5,\n", + " num_train_epochs = 2,\n", + " logging_steps = 50,\n", + " save_strategy = \"steps\",\n", + " save_steps = 1000,\n", + " optim = \"adamw_8bit\",\n", + " weight_decay = 0.01,\n", + " lr_scheduler_type = \"linear\",\n", + " seed = 3407,\n", + " fp16 = not torch.cuda.is_bf16_supported(),\n", + " bf16 = torch.cuda.is_bf16_supported(),\n", + " max_grad_norm = 1.0,\n", + ")\n", + "\n", + "\n", + "trainer = SFTTrainer(\n", + " model = model,\n", + " tokenizer = tokenizer,\n", + " train_dataset = dataset,\n", + " dataset_text_field = \"text\",\n", + " max_seq_length = 4096,\n", + " args = args,\n", + ")\n", + "\n", + "trainer.train()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 9fa0a5c3034b56ea498ae7050123cd946540b095 Mon Sep 17 00:00:00 2001 From: MengAiDev <3463526515@qq.com> Date: Fri, 29 Aug 2025 08:12:11 +0800 Subject: [PATCH 2/2] update notebook --- nb/GLM4_(9B)-Reasoning-Conversational.ipynb | 129 ++++++++++++++++-- ...e-GLM4_(9B)-Reasoning-Conversational.ipynb | 129 ++++++++++++++++-- .../GLM4_(9B)-Reasoning-Conversational.ipynb | 129 ++++++++++++++++-- 3 files changed, 363 insertions(+), 24 deletions(-) diff --git a/nb/GLM4_(9B)-Reasoning-Conversational.ipynb b/nb/GLM4_(9B)-Reasoning-Conversational.ipynb index d634c686..ba77b39f 100644 --- a/nb/GLM4_(9B)-Reasoning-Conversational.ipynb +++ b/nb/GLM4_(9B)-Reasoning-Conversational.ipynb @@ -57,6 +57,13 @@ "### Unsloth" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We use the open-r1/OpenR1-Math-220k, we need to format the data to `prompt-completion` pair." + ] + }, { "cell_type": "code", "execution_count": null, @@ -69,7 +76,7 @@ "\n", "print(dataset)\n", "\n", - "# \u67e5\u770b\u7b2c\u4e00\u4e2a\u8bad\u7ec3\u6837\u672c\n", + "# See the first sample\n", "sample = dataset['train'][0]\n", "print(\"\\n--- Problem ---\")\n", "print(sample['problem'])\n", @@ -79,9 +86,6 @@ "print(sample['answer'])\n", "\n", "def format_function(sample):\n", - " \"\"\"\n", - " \u5c06\u539f\u59cb\u6570\u636e\u6837\u672c\u8f6c\u6362\u4e3a\u7528\u4e8e\u5fae\u8c03\u7684 prompt-completion \u5bf9\u3002\n", - " \"\"\"\n", " prompt_text = f\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", "You are an expert mathematician. Solve the following problem. Think step by step. End your response with a final answer in the format \\\\boxed{{answer}}.<|eot_id|>\n", "<|start_header_id|>user<|eot_id|>\n", @@ -106,6 +110,13 @@ "train_data.to_json(\"openr1_math_cot_formatted.jsonl\", orient=\"records\", lines=True)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we get the formatted data, we can use it to train a model." + ] + }, { "cell_type": "code", "execution_count": null, @@ -119,7 +130,7 @@ "from trl import SFTTrainer\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", - " model_name = \"glm-4-9b-chat-hf\",\n", + " model_name = \"zai-org/glm-4-9b-chat-hf\",\n", " max_seq_length = 4096,\n", " dtype = None,\n", " load_in_4bit = True,\n", @@ -136,9 +147,6 @@ "dataset = load_dataset(\"json\", data_files={\"train\": \"openr1_math_cot_formatted.jsonl\"}, split=\"train\")\n", "\n", "def preprocess_function(examples):\n", - " \"\"\"\n", - " \u5728\u8bad\u7ec3\u524d\u5c06prompt\u548ccompletion\u62fc\u63a5\u6210\u4e00\u4e2a\u5b8c\u6574\u7684'text'\u5b57\u6bb5\u3002\n", - " \"\"\"\n", " texts = [p + c for p, c in zip(examples['prompt'], examples['completion'])]\n", " return {\"text\": texts}\n", "\n", @@ -175,6 +183,111 @@ "trainer.train()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Great, Now we got the model ready, let's see how it works. If you don't want to train the model from scratch, you can run:\n", + "\n", + "```bash\n", + "pip install modelscope\n", + "modelscope download MengAiDev/GLM4-OpenR1 --local_dir GLM4-OpenR1\n", + "```\n", + "\n", + "This model only trained with 6,250 steps because of the time limit. So it maybe not as good as the model that I expected to train for 20,000 steps." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import AutoModelForCausalLM, AutoTokenizer\n", + "from peft import PeftModel\n", + "import torch\n", + "\n", + "class MathProblemSolver:\n", + " def __init__(self, base_model_path: str, lora_path: str):\n", + " self.device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + " \n", + " self.tokenizer = AutoTokenizer.from_pretrained(\n", + " base_model_path,\n", + " trust_remote_code=True\n", + " )\n", + " \n", + " if self.tokenizer.pad_token is None:\n", + " self.tokenizer.pad_token = self.tokenizer.eos_token\n", + " \n", + " self.model = AutoModelForCausalLM.from_pretrained(\n", + " base_model_path,\n", + " torch_dtype=torch.float16,\n", + " device_map=\"auto\",\n", + " trust_remote_code=True\n", + " )\n", + " \n", + " # load lora\n", + " self.model = PeftModel.from_pretrained(self.model, lora_path)\n", + " self.model.eval()\n", + " \n", + " print(\"Model loaded successfully with LoRA adapter!\")\n", + "\n", + " def create_prompt(self, problem: str) -> str:\n", + " return f\"\"\"Solve this math problem carefully and step by step. \n", + "Provide detailed reasoning and put your final answer in \\\\boxed{{}}.\n", + "\n", + "Problem: {problem}\n", + "\n", + "Solution:\"\"\"\n", + "\n", + " def solve(self, problem: str) -> str:\n", + " prompt = self.create_prompt(problem)\n", + " \n", + " inputs = self.tokenizer(\n", + " prompt, \n", + " return_tensors=\"pt\",\n", + " truncation=True,\n", + " max_length=1024\n", + " ).to(self.device)\n", + " \n", + " with torch.no_grad():\n", + " outputs = self.model.generate(\n", + " **inputs,\n", + " max_new_tokens=512,\n", + " temperature=0.1,\n", + " do_sample=True,\n", + " pad_token_id=self.tokenizer.eos_token_id,\n", + " repetition_penalty=1.1\n", + " )\n", + " \n", + " full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + " return full_response[len(prompt):].strip()\n", + "\n", + "def main():\n", + " BASE_MODEL_PATH = \"zai-org/glm-4-9b-chat-hf\"\n", + " LORA_PATH = \"GLM4-OpenR1\" \n", + " \n", + " solver = MathProblemSolver(BASE_MODEL_PATH, LORA_PATH)\n", + "\n", + " problems = [\n", + " \"If x + y = 10 and x - y = 4, what is the value of x?\",\n", + " \"Find the sum of the roots of the equation x^2 - 5x + 6 = 0.\",\n", + " ]\n", + " \n", + " for i, problem in enumerate(problems, 1):\n", + " print(f\"\\n{'='*50}\")\n", + " print(f\"Problem #{i}: {problem}\")\n", + " print(f\"{'='*50}\")\n", + " \n", + " solution = solver.solve(problem)\n", + " print(\"Model's Solution:\")\n", + " print(solution)\n", + " print(f\"{'='*50}\")\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/nb/Kaggle-GLM4_(9B)-Reasoning-Conversational.ipynb b/nb/Kaggle-GLM4_(9B)-Reasoning-Conversational.ipynb index f75f39b5..d5cd9ee9 100644 --- a/nb/Kaggle-GLM4_(9B)-Reasoning-Conversational.ipynb +++ b/nb/Kaggle-GLM4_(9B)-Reasoning-Conversational.ipynb @@ -57,6 +57,13 @@ "### Unsloth" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We use the open-r1/OpenR1-Math-220k, we need to format the data to `prompt-completion` pair." + ] + }, { "cell_type": "code", "execution_count": null, @@ -69,7 +76,7 @@ "\n", "print(dataset)\n", "\n", - "# \u67e5\u770b\u7b2c\u4e00\u4e2a\u8bad\u7ec3\u6837\u672c\n", + "# See the first sample\n", "sample = dataset['train'][0]\n", "print(\"\\n--- Problem ---\")\n", "print(sample['problem'])\n", @@ -79,9 +86,6 @@ "print(sample['answer'])\n", "\n", "def format_function(sample):\n", - " \"\"\"\n", - " \u5c06\u539f\u59cb\u6570\u636e\u6837\u672c\u8f6c\u6362\u4e3a\u7528\u4e8e\u5fae\u8c03\u7684 prompt-completion \u5bf9\u3002\n", - " \"\"\"\n", " prompt_text = f\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", "You are an expert mathematician. Solve the following problem. Think step by step. End your response with a final answer in the format \\\\boxed{{answer}}.<|eot_id|>\n", "<|start_header_id|>user<|eot_id|>\n", @@ -106,6 +110,13 @@ "train_data.to_json(\"openr1_math_cot_formatted.jsonl\", orient=\"records\", lines=True)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we get the formatted data, we can use it to train a model." + ] + }, { "cell_type": "code", "execution_count": null, @@ -119,7 +130,7 @@ "from trl import SFTTrainer\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", - " model_name = \"glm-4-9b-chat-hf\",\n", + " model_name = \"zai-org/glm-4-9b-chat-hf\",\n", " max_seq_length = 4096,\n", " dtype = None,\n", " load_in_4bit = True,\n", @@ -136,9 +147,6 @@ "dataset = load_dataset(\"json\", data_files={\"train\": \"openr1_math_cot_formatted.jsonl\"}, split=\"train\")\n", "\n", "def preprocess_function(examples):\n", - " \"\"\"\n", - " \u5728\u8bad\u7ec3\u524d\u5c06prompt\u548ccompletion\u62fc\u63a5\u6210\u4e00\u4e2a\u5b8c\u6574\u7684'text'\u5b57\u6bb5\u3002\n", - " \"\"\"\n", " texts = [p + c for p, c in zip(examples['prompt'], examples['completion'])]\n", " return {\"text\": texts}\n", "\n", @@ -175,6 +183,111 @@ "trainer.train()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Great, Now we got the model ready, let's see how it works. If you don't want to train the model from scratch, you can run:\n", + "\n", + "```bash\n", + "pip install modelscope\n", + "modelscope download MengAiDev/GLM4-OpenR1 --local_dir GLM4-OpenR1\n", + "```\n", + "\n", + "This model only trained with 6,250 steps because of the time limit. So it maybe not as good as the model that I expected to train for 20,000 steps." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import AutoModelForCausalLM, AutoTokenizer\n", + "from peft import PeftModel\n", + "import torch\n", + "\n", + "class MathProblemSolver:\n", + " def __init__(self, base_model_path: str, lora_path: str):\n", + " self.device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + " \n", + " self.tokenizer = AutoTokenizer.from_pretrained(\n", + " base_model_path,\n", + " trust_remote_code=True\n", + " )\n", + " \n", + " if self.tokenizer.pad_token is None:\n", + " self.tokenizer.pad_token = self.tokenizer.eos_token\n", + " \n", + " self.model = AutoModelForCausalLM.from_pretrained(\n", + " base_model_path,\n", + " torch_dtype=torch.float16,\n", + " device_map=\"auto\",\n", + " trust_remote_code=True\n", + " )\n", + " \n", + " # load lora\n", + " self.model = PeftModel.from_pretrained(self.model, lora_path)\n", + " self.model.eval()\n", + " \n", + " print(\"Model loaded successfully with LoRA adapter!\")\n", + "\n", + " def create_prompt(self, problem: str) -> str:\n", + " return f\"\"\"Solve this math problem carefully and step by step. \n", + "Provide detailed reasoning and put your final answer in \\\\boxed{{}}.\n", + "\n", + "Problem: {problem}\n", + "\n", + "Solution:\"\"\"\n", + "\n", + " def solve(self, problem: str) -> str:\n", + " prompt = self.create_prompt(problem)\n", + " \n", + " inputs = self.tokenizer(\n", + " prompt, \n", + " return_tensors=\"pt\",\n", + " truncation=True,\n", + " max_length=1024\n", + " ).to(self.device)\n", + " \n", + " with torch.no_grad():\n", + " outputs = self.model.generate(\n", + " **inputs,\n", + " max_new_tokens=512,\n", + " temperature=0.1,\n", + " do_sample=True,\n", + " pad_token_id=self.tokenizer.eos_token_id,\n", + " repetition_penalty=1.1\n", + " )\n", + " \n", + " full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + " return full_response[len(prompt):].strip()\n", + "\n", + "def main():\n", + " BASE_MODEL_PATH = \"zai-org/glm-4-9b-chat-hf\"\n", + " LORA_PATH = \"GLM4-OpenR1\" \n", + " \n", + " solver = MathProblemSolver(BASE_MODEL_PATH, LORA_PATH)\n", + "\n", + " problems = [\n", + " \"If x + y = 10 and x - y = 4, what is the value of x?\",\n", + " \"Find the sum of the roots of the equation x^2 - 5x + 6 = 0.\",\n", + " ]\n", + " \n", + " for i, problem in enumerate(problems, 1):\n", + " print(f\"\\n{'='*50}\")\n", + " print(f\"Problem #{i}: {problem}\")\n", + " print(f\"{'='*50}\")\n", + " \n", + " solution = solver.solve(problem)\n", + " print(\"Model's Solution:\")\n", + " print(solution)\n", + " print(f\"{'='*50}\")\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/original_template/GLM4_(9B)-Reasoning-Conversational.ipynb b/original_template/GLM4_(9B)-Reasoning-Conversational.ipynb index 44028c1b..1b47f002 100644 --- a/original_template/GLM4_(9B)-Reasoning-Conversational.ipynb +++ b/original_template/GLM4_(9B)-Reasoning-Conversational.ipynb @@ -37,6 +37,13 @@ "### Unsloth" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We use the open-r1/OpenR1-Math-220k, we need to format the data to `prompt-completion` pair." + ] + }, { "cell_type": "code", "execution_count": null, @@ -49,7 +56,7 @@ "\n", "print(dataset)\n", "\n", - "# 查看第一个训练样本\n", + "# See the first sample\n", "sample = dataset['train'][0]\n", "print(\"\\n--- Problem ---\")\n", "print(sample['problem'])\n", @@ -59,9 +66,6 @@ "print(sample['answer'])\n", "\n", "def format_function(sample):\n", - " \"\"\"\n", - " 将原始数据样本转换为用于微调的 prompt-completion 对。\n", - " \"\"\"\n", " prompt_text = f\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", "You are an expert mathematician. Solve the following problem. Think step by step. End your response with a final answer in the format \\\\boxed{{answer}}.<|eot_id|>\n", "<|start_header_id|>user<|eot_id|>\n", @@ -86,6 +90,13 @@ "train_data.to_json(\"openr1_math_cot_formatted.jsonl\", orient=\"records\", lines=True)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we get the formatted data, we can use it to train a model." + ] + }, { "cell_type": "code", "execution_count": null, @@ -99,7 +110,7 @@ "from trl import SFTTrainer\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", - " model_name = \"glm-4-9b-chat-hf\",\n", + " model_name = \"zai-org/glm-4-9b-chat-hf\",\n", " max_seq_length = 4096,\n", " dtype = None,\n", " load_in_4bit = True,\n", @@ -116,9 +127,6 @@ "dataset = load_dataset(\"json\", data_files={\"train\": \"openr1_math_cot_formatted.jsonl\"}, split=\"train\")\n", "\n", "def preprocess_function(examples):\n", - " \"\"\"\n", - " 在训练前将prompt和completion拼接成一个完整的'text'字段。\n", - " \"\"\"\n", " texts = [p + c for p, c in zip(examples['prompt'], examples['completion'])]\n", " return {\"text\": texts}\n", "\n", @@ -154,6 +162,111 @@ "\n", "trainer.train()" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Great, Now we got the model ready, let's see how it works. If you don't want to train the model from scratch, you can run:\n", + "\n", + "```bash\n", + "pip install modelscope\n", + "modelscope download MengAiDev/GLM4-OpenR1 --local_dir GLM4-OpenR1\n", + "```\n", + "\n", + "This model only trained with 6,250 steps because of the time limit. So it maybe not as good as the model that I expected to train for 20,000 steps." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import AutoModelForCausalLM, AutoTokenizer\n", + "from peft import PeftModel\n", + "import torch\n", + "\n", + "class MathProblemSolver:\n", + " def __init__(self, base_model_path: str, lora_path: str):\n", + " self.device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + " \n", + " self.tokenizer = AutoTokenizer.from_pretrained(\n", + " base_model_path,\n", + " trust_remote_code=True\n", + " )\n", + " \n", + " if self.tokenizer.pad_token is None:\n", + " self.tokenizer.pad_token = self.tokenizer.eos_token\n", + " \n", + " self.model = AutoModelForCausalLM.from_pretrained(\n", + " base_model_path,\n", + " torch_dtype=torch.float16,\n", + " device_map=\"auto\",\n", + " trust_remote_code=True\n", + " )\n", + " \n", + " # load lora\n", + " self.model = PeftModel.from_pretrained(self.model, lora_path)\n", + " self.model.eval()\n", + " \n", + " print(\"Model loaded successfully with LoRA adapter!\")\n", + "\n", + " def create_prompt(self, problem: str) -> str:\n", + " return f\"\"\"Solve this math problem carefully and step by step. \n", + "Provide detailed reasoning and put your final answer in \\\\boxed{{}}.\n", + "\n", + "Problem: {problem}\n", + "\n", + "Solution:\"\"\"\n", + "\n", + " def solve(self, problem: str) -> str:\n", + " prompt = self.create_prompt(problem)\n", + " \n", + " inputs = self.tokenizer(\n", + " prompt, \n", + " return_tensors=\"pt\",\n", + " truncation=True,\n", + " max_length=1024\n", + " ).to(self.device)\n", + " \n", + " with torch.no_grad():\n", + " outputs = self.model.generate(\n", + " **inputs,\n", + " max_new_tokens=512,\n", + " temperature=0.1,\n", + " do_sample=True,\n", + " pad_token_id=self.tokenizer.eos_token_id,\n", + " repetition_penalty=1.1\n", + " )\n", + " \n", + " full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + " return full_response[len(prompt):].strip()\n", + "\n", + "def main():\n", + " BASE_MODEL_PATH = \"zai-org/glm-4-9b-chat-hf\"\n", + " LORA_PATH = \"GLM4-OpenR1\" \n", + " \n", + " solver = MathProblemSolver(BASE_MODEL_PATH, LORA_PATH)\n", + "\n", + " problems = [\n", + " \"If x + y = 10 and x - y = 4, what is the value of x?\",\n", + " \"Find the sum of the roots of the equation x^2 - 5x + 6 = 0.\",\n", + " ]\n", + " \n", + " for i, problem in enumerate(problems, 1):\n", + " print(f\"\\n{'='*50}\")\n", + " print(f\"Problem #{i}: {problem}\")\n", + " print(f\"{'='*50}\")\n", + " \n", + " solution = solver.solve(problem)\n", + " print(\"Model's Solution:\")\n", + " print(solution)\n", + " print(f\"{'='*50}\")\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()" + ] } ], "metadata": {