Skip to content

Commit 6b03b21

Browse files
committed
phi-4
1 parent 8db3ef5 commit 6b03b21

File tree

5 files changed

+29
-1
lines changed

5 files changed

+29
-1
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ Every model is written from scratch to maximize performance and remove layers of
135135
| OpenLLaMA | 3B, 7B, 13B | OpenLM Research | [Geng & Liu 2023](https://github.com/openlm-research/open_llama) |
136136
| Phi 1.5 & 2 | 1.3B, 2.7B | Microsoft Research | [Li et al. 2023](https://arxiv.org/abs/2309.05463) |
137137
| Phi 3 | 3.8B | Microsoft Research | [Abdin et al. 2024](https://arxiv.org/abs/2404.14219) |
138+
| Phi 4 | 14B | Microsoft Research | [Abdin et al. 2024](https://arxiv.org/abs/2412.08905) |
138139
| Platypus | 7B, 13B, 70B | Lee et al. | [Lee, Hunter, and Ruiz 2023](https://arxiv.org/abs/2308.07317) |
139140
| Pythia | {14,31,70,160,410}M, {1,1.4,2.8,6.9,12}B | EleutherAI | [Biderman et al. 2023](https://arxiv.org/abs/2304.01373) |
140141
| Qwen2.5 | 0.5B, 1.5B, 3B, 7B, 14B, 32B, 72B | Alibaba Group | [Qwen Team 2024](https://qwenlm.github.io/blog/qwen2.5/) |

litgpt/config.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1575,6 +1575,25 @@ def norm_class(self) -> Type:
15751575
mlp_class_name="LLaMAMLP",
15761576
parallel_residual=False,
15771577
),
1578+
# https://huggingface.co/microsoft/phi-4/blob/main/config.json
1579+
dict(
1580+
name="phi-4",
1581+
hf_config=dict(org="microsoft", name="phi-4"),
1582+
vocab_size=100257,
1583+
padded_vocab_size=100352,
1584+
block_size=16384,
1585+
n_embd=5120,
1586+
n_layer=40,
1587+
n_head=40,
1588+
n_query_groups=10,
1589+
rotary_percentage=1.0,
1590+
bias=False,
1591+
norm_class_name="RMSNorm",
1592+
intermediate_size=17920,
1593+
rope_base=250000,
1594+
mlp_class_name="LLaMAMLP",
1595+
parallel_residual=False,
1596+
),
15781597
]
15791598
configs.extend(phi)
15801599

litgpt/prompts.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,9 @@ class Phi3(PromptStyle):
268268
def apply(self, prompt: str, **kwargs: str) -> str:
269269
return f'<|system|>\nYou are a helpful assistant.<|end|>\n<|user|>\n{prompt}<|end|>\n<|assistant|>\n'
270270

271+
class Phi4(PromptStyle):
272+
def apply(self, prompt: str, **kwargs: str) -> str:
273+
return f'<|im_start|>user<|im_sep|>{prompt}<|im_end|><|im_start|>assistant<|im_sep|>'
271274

272275
class TinyLlama(PromptStyle):
273276
def apply(self, prompt: str, **kwargs: str) -> str:
@@ -337,6 +340,7 @@ def __init__(self):
337340
"phi-1": Phi1,
338341
"phi-2": Phi2,
339342
"phi-3": Phi3,
343+
"phi-4": Phi4,
340344
"tinyllama": TinyLlama,
341345
"gemma": Gemma,
342346
"llama3": Llama3,
@@ -380,6 +384,8 @@ def model_name_to_prompt_style(model_name: str) -> PromptStyle:
380384
return Phi2()
381385
if re.search("Phi-3", model_name):
382386
return Phi3()
387+
if re.search("phi-4", model_name):
388+
return Phi4()
383389
if re.search(r"tiny-llama.*chat", model_name):
384390
return TinyLlama()
385391
if re.search(r"(Code)?Gemma.*-it", model_name):

tests/test_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def test_against_hf_phi(model_name, device, dtype):
329329

330330

331331
@torch.inference_mode()
332-
@pytest.mark.parametrize("model_name", ("Phi-3-mini-4k-instruct", "Phi-3-mini-128k-instruct", "Phi-3.5-mini-instruct"))
332+
@pytest.mark.parametrize("model_name", ("Phi-3-mini-4k-instruct", "Phi-3-mini-128k-instruct", "Phi-3.5-mini-instruct", "phi-4"))
333333
@pytest.mark.parametrize(
334334
("device", "dtype"),
335335
[

tutorials/download_model_weights.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ LitGPT supports a variety of LLM architectures with publicly available weights.
3434
| OpenLLaMA | 3B, 7B, 13B | OpenLM Research | [Geng & Liu 2023](https://github.com/openlm-research/open_llama) |
3535
| Phi 1.5 & 2 | 1.3B, 2.7B | Microsoft Research | [Li et al. 2023](https://arxiv.org/abs/2309.05463) |
3636
| Phi 3 & 3.5 | 3.8B | Microsoft Research | [Abdin et al. 2024](https://arxiv.org/abs/2404.14219)
37+
| Phi 4 | 14B | Microsoft Research | [Abdin et al. 2024](https://arxiv.org/abs/2412.08905) |
3738
| Platypus | 7B, 13B, 70B | Lee et al. | [Lee, Hunter, and Ruiz 2023](https://arxiv.org/abs/2308.07317) |
3839
| Pythia | {14,31,70,160,410}M, {1,1.4,2.8,6.9,12}B | EleutherAI | [Biderman et al. 2023](https://arxiv.org/abs/2304.01373) |
3940
| Qwen2.5 | 0.5B, 1.5B, 3B, 7B, 14B, 32B, 72B | Alibaba Group | [Qwen Team 2024](https://qwenlm.github.io/blog/qwen2.5/) |
@@ -165,6 +166,7 @@ microsoft/phi-2
165166
microsoft/Phi-3-mini-128k-instruct
166167
microsoft/Phi-3-mini-4k-instruct
167168
microsoft/Phi-3.5-mini-instruct
169+
microsoft/phi-4
168170
mistralai/mathstral-7B-v0.1
169171
mistralai/Mistral-7B-Instruct-v0.1
170172
mistralai/Mistral-7B-Instruct-v0.2

0 commit comments

Comments
 (0)