From 55ba643158c76cb412a74bfc941ac5469cbc4fae Mon Sep 17 00:00:00 2001 From: YunLiu <55491388+KumoLiu@users.noreply.github.com> Date: Fri, 4 Oct 2024 22:00:08 +0800 Subject: [PATCH] Try to fix multi-gpu training issue in tumor-detection (#686) Fixes #685 ### Status **Ready/Work in progress/Hold** ### Please ensure all the checkboxes: - [x] Codeformat tests passed locally by running `./runtests.sh --codeformat`. - [ ] In-line docstrings updated. - [ ] Update `version` and `changelog` in `metadata.json` if changing an existing bundle. - [ ] Please ensure the naming rules in config files meet our requirements (please refer to: `CONTRIBUTING.md`). - [ ] Ensure versions of packages such as `monai`, `pytorch` and `numpy` are correct in `metadata.json`. - [ ] Descriptions should be consistent with the content, such as `eval_metrics` of the provided weights and TorchScript modules. - [ ] Files larger than 25MB are excluded and replaced by providing download links in `large_file.yml`. - [ ] Avoid using path that contains personal information within config files (such as use `/home/your_name/` for `"bundle_root"`). --------- Signed-off-by: YunLiu <55491388+KumoLiu@users.noreply.github.com> --- models/pathology_tumor_detection/configs/metadata.json | 3 ++- models/pathology_tumor_detection/configs/train.json | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/models/pathology_tumor_detection/configs/metadata.json b/models/pathology_tumor_detection/configs/metadata.json index f392e318..e3c7cfa0 100644 --- a/models/pathology_tumor_detection/configs/metadata.json +++ b/models/pathology_tumor_detection/configs/metadata.json @@ -1,7 +1,8 @@ { "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json", - "version": "0.6.0", + "version": "0.6.1", "changelog": { + "0.6.1": "fix multi-gpu issue", "0.6.0": "use monai 1.4 and update large files", "0.5.9": "update to use monai 1.3.1", "0.5.8": "update readme to add memory warning", diff --git a/models/pathology_tumor_detection/configs/train.json b/models/pathology_tumor_detection/configs/train.json index c44a8926..52344bad 100644 --- a/models/pathology_tumor_detection/configs/train.json +++ b/models/pathology_tumor_detection/configs/train.json @@ -196,6 +196,11 @@ ] }, "handlers": [ + { + "_target_": "LrScheduleHandler", + "lr_scheduler": "@lr_scheduler", + "print_lr": true + }, { "_target_": "ValidationHandler", "validator": "@validate#evaluator", @@ -207,11 +212,6 @@ "tag_name": "train_loss", "output_transform": "$monai.handlers.from_engine(['loss'], first=True)" }, - { - "_target_": "LrScheduleHandler", - "lr_scheduler": "@lr_scheduler", - "print_lr": true - }, { "_target_": "TensorBoardStatsHandler", "log_dir": "@output_dir",