No eval loss and eval accuracy when calling trainer.evaluate() #542

songmh99 · 2023-05-05T07:33:18Z

Environment info

adapter-transformers version:
Platform: linux
Python version: 3.9
PyTorch version (GPU?): 1.12.1 gpu

Details

I want to use knowledge distillation to between two adapter models.
It occurs when the tearch model and student model are both adapter-model.
When the teacher model is adapter-model and the student model are huggingface-model, it can train and evaluate normally.

class DistillationTrainingArguments(TrainingArguments):
    def __init__(self, *args, alpha=0.5, temperature=2.0, **kwargs):
        super().__init__(*args, **kwargs)

        self.alpha = alpha
        self.temperature = temperature

class DistillationTrainer(Trainer):
    def __init__(self, *args, teacher_model=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.teacher = teacher_model
        # place teacher on same device as student
        self._move_model_to_device(self.teacher,self.model.device)
        self.teacher.eval()
        # self.teacher.train()
  
        
    def compute_loss(self, model, inputs, return_outputs=False):
        # compute student output
        # print('comput distill loss')
        outputs_student = model(**inputs)
        
        student_loss=outputs_student.loss
        
        # compute teacher output
        # self.teacher.train()
        with torch.no_grad():
            outputs_teacher = self.teacher(**inputs)

        # assert size
        assert outputs_student.logits.size() == outputs_teacher.logits.size()

        # Soften probabilities and compute distillation loss
        loss_function = nn.KLDivLoss(reduction="batchmean")
        loss_logits = (loss_function(
            F.log_softmax(outputs_student.logits / self.args.temperature, dim=-1),
            F.softmax(outputs_teacher.logits / self.args.temperature, dim=-1)) * (self.args.temperature ** 2))
        # Return weighted student loss
        loss = self.args.alpha * student_loss + (1. - self.args.alpha) * loss_logits
        return (loss, outputs_student) if return_outputs else loss
teacher_model3 = bert_adapter_model
student_model3 = adapter_distillBert_model

repo_name = "KD-try3"
training_args3 = DistillationTrainingArguments(
    output_dir="./saved/KD3/",
    num_train_epochs=10,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    fp16=True,
    learning_rate=5e-5,
    seed=32,
    overwrite_output_dir=True,
    
    # logging & evaluation strategies
    # logging_dir=f"./saved/KD3/logs",
    # logging_strategy="epoch", # to get more information to TB
    # evaluation_strategy="epoch",
    # save_strategy="epoch",
    # save_total_limit=2,
    # load_best_model_at_end=True,
    # metric_for_best_model="accuracy",
    
    # distilation parameters
    alpha=0.5,
    temperature=1.0,
    remove_unused_columns=False,
    logging_steps=200,
    )


trainer3 = DistillationTrainer(
    student_model3,
    training_args3,
    teacher_model=teacher_model3,
    # labels=torch.nn.functional.one_hot(dataset["train"]['labels']),
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    # test_dataset=dataset["test"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    # compute_metrics=compute_metrics,
    compute_metrics=compute_accuracy_adapter,
)

When I runing the code, it only reports the training loss (No validation loss). And when I run "trainer3.evaluate()", there are no "eval_loss" and "eval_accuracy" in the result, only have "eval_runtime" and so on.

When I add the commented lines of code:

# logging & evaluation strategies
# logging_dir=f"./saved/KD3/logs",
 # logging_strategy="epoch", # to get more information to TB
# evaluation_strategy="epoch",
 # save_strategy="epoch",
# save_total_limit=2,
# load_best_model_at_end=True,
 # metric_for_best_model="accuracy",

It will report the following error:

534 eyError Traceback (most recent call last)
Cell In[73], line 1
----> 1 trainer3.train()

File ~/anaconda3/envs/adapt/lib/python3.9/site-packages/transformers/trainer.py:1543, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1538 self.model_wrapped = self.model
1540 inner_training_loop = find_executable_batch_size(
1541 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1542 )
-> 1543 return inner_training_loop(
1544 args=args,
1545 resume_from_checkpoint=resume_from_checkpoint,
1546 trial=trial,
1547 ignore_keys_for_eval=ignore_keys_for_eval,
1548 )

File ~/anaconda3/envs/adapt/lib/python3.9/site-packages/transformers/trainer.py:1883, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1880 self.control.should_training_stop = True
1882 self.control = self.callback_handler.on_epoch_end(args, self.state, self.control)
-> 1883 self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
1885 if DebugOption.TPU_METRICS_DEBUG in self.args.debug:
1886 if is_torch_tpu_available():
1887 # tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.)

File ~/anaconda3/envs/adapt/lib/python3.9/site-packages/transformers/trainer.py:2135, in Trainer._maybe_log_save_evaluate(self, tr_loss, model, trial, epoch, ignore_keys_for_eval)
2132 self._report_to_hp_search(trial, self.state.global_step, metrics)
2134 if self.control.should_save:
-> 2135 self._save_checkpoint(model, trial, metrics=metrics)
2136 self.control = self.callback_handler.on_save(self.args, self.state, self.control)

File ~/anaconda3/envs/adapt/lib/python3.9/site-packages/transformers/trainer.py:2238, in Trainer.save_checkpoint(self, model, trial, metrics)
2236 if not metric_to_check.startswith("eval"):
2237 metric_to_check = f"eval_{metric_to_check}"
-> 2238 metric_value = metrics[metric_to_check]
2240 operator = np.greater if self.args.greater_is_better else np.less
2241 if (
2242 self.state.best_metric is None
2243 or self.state.best_model_checkpoint is None
2244 or operator(metric_value, self.state.best_metric)
2245 ):

KeyError: 'eval_accuracy'`

Thanks for your help!

The text was updated successfully, but these errors were encountered:

hSterz · 2023-05-05T08:53:46Z

Hey @songmh99 , one possible reason for this could be that the trainer does not get the labels in the format it expects. For example, if the labels are not named labels you have to specify label_names (see here) so that the model chooses the correct labels. So I would recommend checking that the labels are found by the trainer first.
If this does not help please provide the compute_accuracy_adaptermethod as well.

songmh99 · 2023-05-06T03:09:16Z

Hey @songmh99 , one possible reason for this could be that the trainer does not get the labels in the format it expects. For example, if the labels are not named labels you have to specify label_names (see here) so that the model chooses the correct labels. So I would recommend checking that the labels are found by the trainer first. If this does not help please provide the compute_accuracy_adaptermethod as well.

Thanks a lot! It does work ! :)
I have another question~
The code above is the knowledge distillation process :
I want to training teacher model: bert_adapter_model, and student model : adapter_distillBert_model together. In the training process only two adapters are trainable (one in teacher model, the other in student model)
When I run the trainer3.train() , the report message shows that (with distillation) :

Number of trainable parameters = 1040163

However, when I only train an adapter_distillBert_model (without distillation), the training report message shows that :

Number of trainable parameters = 1040163

The number of trainable parameters(without distillation) is the same as the distillation training process.
Does it means that: in my distillation process, the adapter in teacher model is been frozen?

Details :

The teacher model and student model are as follows:

#Adapter Bert Model
from transformers import BertConfig, BertModelWithHeads
bert_adapter_config = BertConfig.from_pretrained('bert-base-uncased',id2label=id2label)
bert_adapter_model = BertModelWithHeads.from_pretrained('bert-base-uncased',config=bert_adapter_config)

bert_adapter_model.load_adapter('nli/qnli@ukp', with_head=False)
bert_adapter_model.add_classification_head('cb', num_labels=len(id2label),overwrite_ok=True)
bert_adapter_model.train_adapter('qnli')

#DistilBertAdapterModel
from transformers import DistilBertConfig, DistilBertAdapterModel
adapter_distillBert_config = DistilBertConfig.from_pretrained('distilbert-base-uncased',id2label=id2label)
adapter_distillBert_model = DistilBertAdapterModel.from_pretrained('distilbert-base-uncased',config=adapter_distillBert_config)
adapter_distillBert_model.load_adapter('nli/qnli@ukp', with_head=False)
adapter_distillBert_model.add_classification_head('cb', num_labels=len(id2label),overwrite_ok=True)
# adapter_distillBert_model.train_adapter('qnli')
adapter_distillBert_model.train_adapter('qnli')

The code of only training an adapter_distillBert_model is as follows (without distillation):

import numpy as np
from transformers import TrainingArguments, AdapterTrainer, EvalPrediction
adapter_distillBert_training_args = TrainingArguments(
    learning_rate=5e-5,
    num_train_epochs=10,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    logging_steps=200,
    output_dir='./saved/distilladapter-bert/',
    overwrite_output_dir=True,
    remove_unused_columns=False,
)
def compute_accuracy_adapter(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    return {"acc": (preds == p.label_ids).mean()}

adapter_distillBert_trainer = AdapterTrainer(
    model=adapter_distillBert_model,
    args=adapter_distillBert_training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    compute_metrics=compute_accuracy_adapter,
)
adapter_distillBert_trainer.train()

adapter-hub-bert · 2023-08-04T06:14:51Z

This issue has been automatically marked as stale because it has been without activity for 90 days. This issue will be closed in 14 days unless you comment or remove the stale label.

adapter-hub-bert · 2023-08-19T06:12:52Z

This issue was closed because it was stale for 14 days without any activity.

songmh99 added the question Further information is requested label May 5, 2023

adapter-hub-bert added the Stale label Aug 4, 2023

adapter-hub-bert closed this as not planned Won't fix, can't repro, duplicate, stale Aug 19, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

No eval loss and eval accuracy when calling trainer.evaluate() #542

No eval loss and eval accuracy when calling trainer.evaluate() #542

songmh99 commented May 5, 2023 •

edited

Loading

hSterz commented May 5, 2023

songmh99 commented May 6, 2023 •

edited

Loading

adapter-hub-bert commented Aug 4, 2023

adapter-hub-bert commented Aug 19, 2023

No eval loss and eval accuracy when calling trainer.evaluate() #542

No eval loss and eval accuracy when calling trainer.evaluate() #542

Comments

songmh99 commented May 5, 2023 • edited Loading

Environment info

Details

hSterz commented May 5, 2023

songmh99 commented May 6, 2023 • edited Loading

Details :

adapter-hub-bert commented Aug 4, 2023

adapter-hub-bert commented Aug 19, 2023

songmh99 commented May 5, 2023 •

edited

Loading

songmh99 commented May 6, 2023 •

edited

Loading