@@ -403,6 +403,12 @@ def visualize_token_role(tokens: list[int], masks: list[int], tokenizer: PreTrai
403403 "{% if not has_system %}"
404404 "{{ '<|im_start|>system\n You are Olmo, a helpful AI assistant built by Ai2. Your date cutoff is December 2024, and your model weights are available at https://huggingface.co/allenai.<|im_end|>\n ' }}"
405405 "{% endif %}"
406+ "{% set last_user_index = -1 %}"
407+ "{% for message in messages %}"
408+ "{% if message['role'] == 'user' %}"
409+ "{% set last_user_index = loop.index0 %}"
410+ "{% endif %}"
411+ "{% endfor %}"
406412 "{% for message in messages %}"
407413 "{% if message['role'] == 'system' %}"
408414 "{{ '<|im_start|>system\n ' + message['content'] }}"
@@ -418,10 +424,18 @@ def visualize_token_role(tokens: list[int], masks: list[int], tokenizer: PreTrai
418424 "{{ '<|im_start|>user\n ' + message['content'] + '<|im_end|>\n ' }}"
419425 "{% endif %}"
420426 "{% elif message['role'] == 'assistant' %}"
427+ "{% set assistant_content = message.get('content', '') %}"
428+ "{% set reasoning_content = '' %}"
429+ "{% if '</think>' in assistant_content %}"
430+ "{% set think_split = assistant_content.split('</think>') %}"
431+ "{% set reasoning_content = think_split[0].rstrip('\\ n').split('<think>')[-1].lstrip('\\ n') %}"
432+ "{% set assistant_content = think_split[-1].lstrip('\\ n') %}"
433+ "{% endif %}"
421434 "{{ '<|im_start|>assistant\n ' }}"
422- "{% if message.get('content', none) is not none %}"
423- "{{ message['content'] }}"
435+ "{% if loop.index0 > last_user_index and reasoning_content.strip() %}"
436+ "{{ '<think> \\ n' + reasoning_content.strip(' \\ n') + ' \\ n</think> \\ n \\ n' }}"
424437 "{% endif %}"
438+ "{{ assistant_content }}"
425439 "{% if message.get('function_calls', none) is not none %}"
426440 "{{ '<function_calls>' + message['function_calls'] + '</function_calls>' }}"
427441 "{% endif %}"
0 commit comments