From 5faa9df6f842d7488e391f06c28cecc60532bd53 Mon Sep 17 00:00:00 2001 From: junghye01 Date: Wed, 20 Dec 2023 20:39:13 +0000 Subject: [PATCH 01/10] add sdd300 code --- ssd300/config.py | 13 +++ ssd300/inference.py | 173 +++++++++++++++++++++++++++++++++++++ ssd300/train.py | 205 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 391 insertions(+) create mode 100644 ssd300/config.py create mode 100644 ssd300/inference.py create mode 100644 ssd300/train.py diff --git a/ssd300/config.py b/ssd300/config.py new file mode 100644 index 0000000..6e5ce74 --- /dev/null +++ b/ssd300/config.py @@ -0,0 +1,13 @@ +Config=dict( + EPOCHS=150, + LR=0.0001, + DR_RATE=0.35, + NUM_CLASSES=3, + TRAIN_BS=4, + VALID_BS=2, + NUM_WORKERS=4, + WEIGHT_DECAY=0.0005, + CONTRAST='AFTER' + +) + diff --git a/ssd300/inference.py b/ssd300/inference.py new file mode 100644 index 0000000..14bc200 --- /dev/null +++ b/ssd300/inference.py @@ -0,0 +1,173 @@ +import sys +sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils') +import os +import numpy as np +import pandas as pd + +# for ignoring warnings +import warnings +warnings.filterwarnings('ignore') + +import cv2 + +import torch +import torchvision +from torchvision import transforms as torchtrans +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.ops import nms + + +from tqdm import tqdm +import utils +import transforms as T +from dataset import RT_Dataset +from config import Config +from augment import get_transform + +from torchvision.models.detection.ssd import SSDClassificationHead +from torchvision.models.detection import _utils +from torchvision.models.detection import SSD300_VGG16_Weights + + +# model +def get_object_detection_model(num_classes=4,size=300): + # Load the Torchvision pretrained model. + model = torchvision.models.detection.ssd300_vgg16( + weights=SSD300_VGG16_Weights.COCO_V1 + ) + # Retrieve the list of input channels. + in_channels = _utils.retrieve_out_channels(model.backbone, (size, size)) + # List containing number of anchors based on aspect ratios. + num_anchors = model.anchor_generator.num_anchors_per_location() + # The classification head. + model.head.classification_head = SSDClassificationHead( + in_channels=in_channels, + num_anchors=num_anchors, + num_classes=num_classes, + ) + # Image size for transforms. + model.transform.min_size = (size,) + model.transform.max_size = size + return model + +# valid function +def valid_fn(val_data_loader, model, device): + model.eval() + outputs = [] + ground_truths=[] + + for images,targets in tqdm(val_data_loader): + + images=list(img.to(device) for img in images) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + output=model(images) + + #print(f'target : {targets[0]}') + for out,target in zip(output,targets): + scores=out['scores'].detach().cpu().numpy() + boxes=out['boxes'].detach().cpu().numpy() + labels=out['labels'].detach().cpu().numpy() + + #keep_idx=nms(boxes,scores,iou_threshold=0.1) + + #boxes=boxes[keep_idx] + #scores=scores[keep_idx] + #labels=labels[keep_idx] + + + outputs.append({'boxes': boxes, # 2중 리스트일 수도 + 'scores': scores, + 'labels': labels}) + + # ground truth 에 label 추가 + gt_boxes=target['boxes'].cpu().numpy() + gt_labels=target['labels'].cpu().numpy() + + ground_truths.append(list(zip(gt_labels,gt_boxes))) + + #ground_truths.append(target['boxes'].cpu().numpy()) # 이중 리스트일 수도.. + + + return outputs,ground_truths + + +def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.2): + global best_recall + outputs, ground_truths = valid_fn(valid_dataloader, model, device) + predictions = [] + + + for output in outputs: + valid_scores=output['scores']>score_threshold + + valid_boxes = output['boxes'][output['scores'] > score_threshold] + valid_labels=output['labels'][output['scores']> score_threshold] + + + predictions.append(list(zip(valid_labels,valid_boxes))) + + + + # utils 모듈에 있는 calculate_metrics 함수를 사용 + metrics = utils.calculate_metrics(predictions, ground_truths) + + # 전체 성능 + total_recall=metrics['total']['recall'] + total_precision=metrics['total']['precision'] + total_f1_score= metrics['total']['f1_score'] + + class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} + + return_outputs=metrics['total'] + return return_outputs,class_result + +if __name__=='__main__': + + + if torch.cuda.is_available(): + device=torch.device('cuda') + + + valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/test_total.csv') + image_dir='/content/drive/MyDrive/data/Image' + + + + + valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False)) + + + + valid_dataloader=torch.utils.data.DataLoader( + valid_dataset, + batch_size=Config['VALID_BS'], + shuffle=False, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, + + ) + + + model=get_object_detection_model(Config['NUM_CLASSES']) + + + model_save_path = "/content/drive/MyDrive/models/fasterrcnn_resnet50_fpnv2_8_4.pth" + saved_state=torch.load(model_save_path,map_location=device) + + model.load_state_dict(saved_state['model_state_dict']) + + model.to(device) + + + + params = [p for p in model.parameters() if p.requires_grad] + + optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY']) + + lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9) + + # valid data + return_outputs,class_result = validate_and_save_best_model(0, model, valid_dataloader, device, optimizer, lr_scheduler) + + print(f'test Total result : {return_outputs} , class result : {class_result} ') \ No newline at end of file diff --git a/ssd300/train.py b/ssd300/train.py new file mode 100644 index 0000000..5cc7df7 --- /dev/null +++ b/ssd300/train.py @@ -0,0 +1,205 @@ +import sys +sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils') +import os +import numpy as np +import pandas as pd + +# for ignoring warnings +import warnings +warnings.filterwarnings('ignore') + +import cv2 + +import torch +import torchvision +from torchvision import transforms as torchtrans +#from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +import torchvision.models.detection.ssd as ssd +from torchvision.ops import nms + +from engine import train_one_epoch,evaluate +from tqdm import tqdm +import utils +import transforms as T +from dataset import RT_Dataset +from config import Config +from augment import get_transform,get_ssd_transform +import wandb + +from torchvision.models.detection.ssd import SSDClassificationHead +from torchvision.models.detection import _utils +from torchvision.models.detection import SSD300_VGG16_Weights + + +# model +def get_object_detection_model(num_classes=4,size=300): + # Load the Torchvision pretrained model. + model = torchvision.models.detection.ssd300_vgg16( + weights=SSD300_VGG16_Weights.COCO_V1 + ) + # Retrieve the list of input channels. + in_channels = _utils.retrieve_out_channels(model.backbone, (size, size)) + # List containing number of anchors based on aspect ratios. + num_anchors = model.anchor_generator.num_anchors_per_location() + # The classification head. + model.head.classification_head = SSDClassificationHead( + in_channels=in_channels, + num_anchors=num_anchors, + num_classes=num_classes, + ) + # Image size for transforms. + model.transform.min_size = (size,) + model.transform.max_size = size + return model + + +# valid function +def valid_fn(val_data_loader, model, device): + model.eval() + outputs = [] + ground_truths=[] + + for images,targets in tqdm(val_data_loader): + + images=list(img.to(device) for img in images) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + output=model(images) + + #print(f'target : {targets[0]}') + for out,target in zip(output,targets): + scores=out['scores'].detach().cpu().numpy() + boxes=out['boxes'].detach().cpu().numpy() + labels=out['labels'].detach().cpu().numpy() + + # keep_idx=nms(boxes,scores,iou_threshold=0.1) + + #boxes=boxes[keep_idx] + #scores=scores[keep_idx] + #labels=labels[keep_idx] + # label을 모두 int 형으로 변환 + + outputs.append({'boxes': boxes, # 2중 리스트일 수도 + 'scores': scores, + 'labels': labels}) + + # label 포함시켜 ground truths에 추가 + gt_boxes=target['boxes'].cpu().numpy() + gt_labels=target['labels'].cpu().numpy() + + #ground_truths.append(target['boxes'].cpu().numpy()) + ground_truths.append(list(zip(gt_labels,gt_boxes))) + + + return outputs,ground_truths + + +def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.2): + global best_recall + outputs, ground_truths = valid_fn(valid_dataloader, model, device) + predictions = [] + for output in outputs: + valid_boxes = output['boxes'][output['scores'] > score_threshold] + valid_labels=output['labels'][output['scores']> score_threshold] + #predictions.append(valid_boxes) + predictions.append(list(zip(valid_labels,valid_boxes))) + + # utils 모듈에 있는 calculate_metrics 함수를 사용 + metrics = utils.calculate_metrics(predictions, ground_truths) + + # 전체 성능 + total_recall=metrics['total']['recall'] + total_precision=metrics['total']['precision'] + total_f1_score= metrics['total']['f1_score'] + + #wandb.log({"epoch": epoch, "recall": metrics['recall']}) # Recall을 W&B에 로그합니다. + wandb.log({"epoch": epoch, "total_recall": total_recall, "total_precision": total_precision,"total_f1_score":total_f1_score}) + + categories={2: 'Porosity', 3: 'Slag'} + class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} + # 각 클래스별 성능 로그 + for class_label,class_metrics in metrics['per_class'].items(): + #class_label=class_label.item() + if class_label==2 or class_label==3: + + wandb.log({ + f"class_{categories[class_label]}_recall" : class_metrics['recall'], + f"class_{categories[class_label]}_precision" : class_metrics['precision'], + f"class_{categories[class_label]}_f1_score" : class_metrics['f1_score'], + f"class_{categories[class_label]}_average_iou": class_metrics['average_iou'], + + }) + if total_recall > best_recall: + best_recall = total_recall + + model_save_path = f"/content/drive/MyDrive/models/ssd300_real_{Config['TRAIN_BS']}_{Config['VALID_BS']}.pth" + torch.save({ + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'lr_scheduler_state_dict': lr_scheduler.state_dict() + }, model_save_path) + #wandb.save(model_save_path) # 모델 파일을 W&B에 저장합니다. + + return_outputs=metrics['total'] + return return_outputs,class_result + +if __name__=='__main__': + wandb.init(project='capstone',name='SSD300_VGG16',reinit=True) + + if torch.cuda.is_available(): + device=torch.device('cuda') + + + train_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/train_total.csv') + valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/valid_total.csv') + image_dir='/content/drive/MyDrive/data/Image' + + result_dir_path=f'/content/drive/MyDrive/result/{Config["MODEL"]}' + os.makedirs(result_dir_path,exist_ok=True) + + train_dataset=RT_Dataset(train_df,image_dir,transforms=get_ssd_transform(train=True)) + valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_ssd_transform(train=False)) + + train_dataloader=torch.utils.data.DataLoader( + train_dataset, + batch_size=Config['TRAIN_BS'], + shuffle=True, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, + ) + + valid_dataloader=torch.utils.data.DataLoader( + valid_dataset, + batch_size=Config['VALID_BS'], + shuffle=False, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, + + ) + + model=get_object_detection_model(Config['NUM_CLASSES'],Config['IMG_SIZE']) + model.to(device) + + wandb.watch(model) + + params = [p for p in model.parameters() if p.requires_grad] + + #optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY']) + + optimizer = torch.optim.Adam(params, lr=Config['LR']) + + lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9) # 3epoch마다 학습률 10%씩 감소 + + best_recall=-100 + with open(f'{result_dir_path}/{Config["TRAIN_BS"]}_{Config["VALID_BS"]}_{Config["EPOCHS"]}.txt','w') as f: + for epoch in range(Config['EPOCHS']): + train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=10) + + lr_scheduler.step() + + # valid data + return_outputs,class_result = validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler) + + print(f'epoch : {epoch}, output : {return_outputs}') + f.write(f"Epoch {epoch} Total result:{return_outputs}, class_result : {class_result}\n") \ No newline at end of file From 69bb4779cd59cdcbca6acf06921c18b471529153 Mon Sep 17 00:00:00 2001 From: junghye01 Date: Wed, 20 Dec 2023 20:42:04 +0000 Subject: [PATCH 02/10] utils code --- utils/augment.py | 24 +++ utils/coco_eval.py | 349 +++++++++++++++++++++++++++++++++++ utils/coco_utils.py | 251 +++++++++++++++++++++++++ utils/dataset.py | 82 ++++++++ utils/engine.py | 116 ++++++++++++ utils/transforms.py | 50 +++++ utils/utils.py | 441 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 1313 insertions(+) create mode 100644 utils/augment.py create mode 100644 utils/coco_eval.py create mode 100644 utils/coco_utils.py create mode 100644 utils/dataset.py create mode 100644 utils/engine.py create mode 100644 utils/transforms.py create mode 100644 utils/utils.py diff --git a/utils/augment.py b/utils/augment.py new file mode 100644 index 0000000..7dc1442 --- /dev/null +++ b/utils/augment.py @@ -0,0 +1,24 @@ +# for image augmentations +import albumentations as A +from albumentations.pytorch.transforms import ToTensorV2 + + +def get_transform(train): + + if train: + return A.Compose([ + A.Resize(512,512), + A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5), + A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5), + + A.RandomGamma(gamma_limit=(80, 120), p=0.5), + A.VerticalFlip(p=0.5), + # ToTensorV2 converts image to pytorch tensor without div by 255 + ToTensorV2(p=1.0) + ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) + else: + return A.Compose([ + A.Resize(512,512), + ToTensorV2(p=1.0) + ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) + diff --git a/utils/coco_eval.py b/utils/coco_eval.py new file mode 100644 index 0000000..0771797 --- /dev/null +++ b/utils/coco_eval.py @@ -0,0 +1,349 @@ +import json +import tempfile + +import numpy as np +import copy +import time +import torch +#import torch._six + +from pycocotools.cocoeval import COCOeval +from pycocotools.coco import COCO +import pycocotools.mask as mask_util + +from collections import defaultdict + +import utils + + +class CocoEvaluator(object): + def __init__(self, coco_gt, iou_types): + assert isinstance(iou_types, (list, tuple)) + coco_gt = copy.deepcopy(coco_gt) + self.coco_gt = coco_gt + + self.iou_types = iou_types + self.coco_eval = {} + for iou_type in iou_types: + self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) + + self.img_ids = [] + self.eval_imgs = {k: [] for k in iou_types} + + def update(self, predictions): + img_ids = list(np.unique(list(predictions.keys()))) + self.img_ids.extend(img_ids) + + for iou_type in self.iou_types: + results = self.prepare(predictions, iou_type) + coco_dt = loadRes(self.coco_gt, results) if results else COCO() + coco_eval = self.coco_eval[iou_type] + + coco_eval.cocoDt = coco_dt + coco_eval.params.imgIds = list(img_ids) + img_ids, eval_imgs = evaluate(coco_eval) + + self.eval_imgs[iou_type].append(eval_imgs) + + def synchronize_between_processes(self): + for iou_type in self.iou_types: + self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) + create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) + + def accumulate(self): + for coco_eval in self.coco_eval.values(): + coco_eval.accumulate() + + def summarize(self): + for iou_type, coco_eval in self.coco_eval.items(): + print("IoU metric: {}".format(iou_type)) + coco_eval.summarize() + + def prepare(self, predictions, iou_type): + if iou_type == "bbox": + return self.prepare_for_coco_detection(predictions) + elif iou_type == "segm": + return self.prepare_for_coco_segmentation(predictions) + elif iou_type == "keypoints": + return self.prepare_for_coco_keypoint(predictions) + else: + raise ValueError("Unknown iou type {}".format(iou_type)) + + def prepare_for_coco_detection(self, predictions): + coco_results = [] + for original_id, prediction in predictions.items(): + if len(prediction) == 0: + continue + + boxes = prediction["boxes"] + boxes = convert_to_xywh(boxes).tolist() + scores = prediction["scores"].tolist() + labels = prediction["labels"].tolist() + + coco_results.extend( + [ + { + "image_id": original_id, + "category_id": labels[k], + "bbox": box, + "score": scores[k], + } + for k, box in enumerate(boxes) + ] + ) + return coco_results + + def prepare_for_coco_segmentation(self, predictions): + coco_results = [] + for original_id, prediction in predictions.items(): + if len(prediction) == 0: + continue + + scores = prediction["scores"] + labels = prediction["labels"] + masks = prediction["masks"] + + masks = masks > 0.5 + + scores = prediction["scores"].tolist() + labels = prediction["labels"].tolist() + + rles = [ + mask_util.encode(np.array(mask[0, :, :, np.newaxis], order="F"))[0] + for mask in masks + ] + for rle in rles: + rle["counts"] = rle["counts"].decode("utf-8") + + coco_results.extend( + [ + { + "image_id": original_id, + "category_id": labels[k], + "segmentation": rle, + "score": scores[k], + } + for k, rle in enumerate(rles) + ] + ) + return coco_results + + def prepare_for_coco_keypoint(self, predictions): + coco_results = [] + for original_id, prediction in predictions.items(): + if len(prediction) == 0: + continue + + boxes = prediction["boxes"] + boxes = convert_to_xywh(boxes).tolist() + scores = prediction["scores"].tolist() + labels = prediction["labels"].tolist() + keypoints = prediction["keypoints"] + keypoints = keypoints.flatten(start_dim=1).tolist() + + coco_results.extend( + [ + { + "image_id": original_id, + "category_id": labels[k], + 'keypoints': keypoint, + "score": scores[k], + } + for k, keypoint in enumerate(keypoints) + ] + ) + return coco_results + + +def convert_to_xywh(boxes): + xmin, ymin, xmax, ymax = boxes.unbind(1) + return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) + + +def merge(img_ids, eval_imgs): + all_img_ids = utils.all_gather(img_ids) + all_eval_imgs = utils.all_gather(eval_imgs) + + merged_img_ids = [] + for p in all_img_ids: + merged_img_ids.extend(p) + + merged_eval_imgs = [] + for p in all_eval_imgs: + merged_eval_imgs.append(p) + + merged_img_ids = np.array(merged_img_ids) + merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) + + # keep only unique (and in sorted order) images + merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) + merged_eval_imgs = merged_eval_imgs[..., idx] + + return merged_img_ids, merged_eval_imgs + + +def create_common_coco_eval(coco_eval, img_ids, eval_imgs): + img_ids, eval_imgs = merge(img_ids, eval_imgs) + img_ids = list(img_ids) + eval_imgs = list(eval_imgs.flatten()) + + coco_eval.evalImgs = eval_imgs + coco_eval.params.imgIds = img_ids + coco_eval._paramsEval = copy.deepcopy(coco_eval.params) + + +################################################################# +# From pycocotools, just removed the prints and fixed +# a Python3 bug about unicode not defined +################################################################# + +# Ideally, pycocotools wouldn't have hard-coded prints +# so that we could avoid copy-pasting those two functions + +def createIndex(self): + # create index + # print('creating index...') + anns, cats, imgs = {}, {}, {} + imgToAnns, catToImgs = defaultdict(list), defaultdict(list) + if 'annotations' in self.dataset: + for ann in self.dataset['annotations']: + imgToAnns[ann['image_id']].append(ann) + anns[ann['id']] = ann + + if 'images' in self.dataset: + for img in self.dataset['images']: + imgs[img['id']] = img + + if 'categories' in self.dataset: + for cat in self.dataset['categories']: + cats[cat['id']] = cat + + if 'annotations' in self.dataset and 'categories' in self.dataset: + for ann in self.dataset['annotations']: + catToImgs[ann['category_id']].append(ann['image_id']) + + # print('index created!') + + # create class members + self.anns = anns + self.imgToAnns = imgToAnns + self.catToImgs = catToImgs + self.imgs = imgs + self.cats = cats + + +maskUtils = mask_util + + +def loadRes(self, resFile): + """ + Load result file and return a result api object. + :param resFile (str) : file name of result file + :return: res (obj) : result api object + """ + res = COCO() + res.dataset['images'] = [img for img in self.dataset['images']] + + # print('Loading and preparing results...') + # tic = time.time() + if isinstance(resFile, str): + anns = json.load(open(resFile)) + elif type(resFile) == np.ndarray: + anns = self.loadNumpyAnnotations(resFile) + else: + anns = resFile + assert type(anns) == list, 'results in not an array of objects' + annsImgIds = [ann['image_id'] for ann in anns] + assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ + 'Results do not correspond to current coco set' + if 'caption' in anns[0]: + imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) + res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] + for id, ann in enumerate(anns): + ann['id'] = id + 1 + elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + bb = ann['bbox'] + x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] + if 'segmentation' not in ann: + ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] + ann['area'] = bb[2] * bb[3] + ann['id'] = id + 1 + ann['iscrowd'] = 0 + elif 'segmentation' in anns[0]: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + # now only support compressed RLE format as segmentation results + ann['area'] = maskUtils.area(ann['segmentation']) + if 'bbox' not in ann: + ann['bbox'] = maskUtils.toBbox(ann['segmentation']) + ann['id'] = id + 1 + ann['iscrowd'] = 0 + elif 'keypoints' in anns[0]: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + s = ann['keypoints'] + x = s[0::3] + y = s[1::3] + x0, x1, y0, y1 = np.min(x), np.max(x), np.min(y), np.max(y) + ann['area'] = (x1 - x0) * (y1 - y0) + ann['id'] = id + 1 + ann['bbox'] = [x0, y0, x1 - x0, y1 - y0] + # print('DONE (t={:0.2f}s)'.format(time.time()- tic)) + + res.dataset['annotations'] = anns + createIndex(res) + return res + + +def evaluate(self): + ''' + Run per image evaluation on given images and store results (a list of dict) in self.evalImgs + :return: None + ''' + # tic = time.time() + # print('Running per image evaluation...') + p = self.params + # add backward compatibility if useSegm is specified in params + if p.useSegm is not None: + p.iouType = 'segm' if p.useSegm == 1 else 'bbox' + print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) + # print('Evaluate annotation type *{}*'.format(p.iouType)) + p.imgIds = list(np.unique(p.imgIds)) + if p.useCats: + p.catIds = list(np.unique(p.catIds)) + p.maxDets = sorted(p.maxDets) + self.params = p + + self._prepare() + # loop through images, area range, max detection number + catIds = p.catIds if p.useCats else [-1] + + if p.iouType == 'segm' or p.iouType == 'bbox': + computeIoU = self.computeIoU + elif p.iouType == 'keypoints': + computeIoU = self.computeOks + self.ious = { + (imgId, catId): computeIoU(imgId, catId) + for imgId in p.imgIds + for catId in catIds} + + evaluateImg = self.evaluateImg + maxDet = p.maxDets[-1] + evalImgs = [ + evaluateImg(imgId, catId, areaRng, maxDet) + for catId in catIds + for areaRng in p.areaRng + for imgId in p.imgIds + ] + # this is NOT in the pycocotools code, but could be done outside + evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) + self._paramsEval = copy.deepcopy(self.params) + # toc = time.time() + # print('DONE (t={:0.2f}s).'.format(toc-tic)) + return p.imgIds, evalImgs + +################################################################# +# end of straight copy from pycocotools, just removing the prints +################################################################# diff --git a/utils/coco_utils.py b/utils/coco_utils.py new file mode 100644 index 0000000..31946b5 --- /dev/null +++ b/utils/coco_utils.py @@ -0,0 +1,251 @@ +import copy +import os +from PIL import Image + +import torch +import torch.utils.data +import torchvision + +from pycocotools import mask as coco_mask +from pycocotools.coco import COCO + +import transforms as T + + +class FilterAndRemapCocoCategories(object): + def __init__(self, categories, remap=True): + self.categories = categories + self.remap = remap + + def __call__(self, image, target): + anno = target["annotations"] + anno = [obj for obj in anno if obj["category_id"] in self.categories] + if not self.remap: + target["annotations"] = anno + return image, target + anno = copy.deepcopy(anno) + for obj in anno: + obj["category_id"] = self.categories.index(obj["category_id"]) + target["annotations"] = anno + return image, target + + +def convert_coco_poly_to_mask(segmentations, height, width): + masks = [] + for polygons in segmentations: + rles = coco_mask.frPyObjects(polygons, height, width) + mask = coco_mask.decode(rles) + if len(mask.shape) < 3: + mask = mask[..., None] + mask = torch.as_tensor(mask, dtype=torch.uint8) + mask = mask.any(dim=2) + masks.append(mask) + if masks: + masks = torch.stack(masks, dim=0) + else: + masks = torch.zeros((0, height, width), dtype=torch.uint8) + return masks + + +class ConvertCocoPolysToMask(object): + def __call__(self, image, target): + w, h = image.size + + image_id = target["image_id"] + image_id = torch.tensor([image_id]) + + anno = target["annotations"] + + anno = [obj for obj in anno if obj['iscrowd'] == 0] + + boxes = [obj["bbox"] for obj in anno] + # guard against no boxes via resizing + boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) + boxes[:, 2:] += boxes[:, :2] + boxes[:, 0::2].clamp_(min=0, max=w) + boxes[:, 1::2].clamp_(min=0, max=h) + + classes = [obj["category_id"] for obj in anno] + classes = torch.tensor(classes, dtype=torch.int64) + + segmentations = [obj["segmentation"] for obj in anno] + masks = convert_coco_poly_to_mask(segmentations, h, w) + + keypoints = None + if anno and "keypoints" in anno[0]: + keypoints = [obj["keypoints"] for obj in anno] + keypoints = torch.as_tensor(keypoints, dtype=torch.float32) + num_keypoints = keypoints.shape[0] + if num_keypoints: + keypoints = keypoints.view(num_keypoints, -1, 3) + + keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) + boxes = boxes[keep] + classes = classes[keep] + masks = masks[keep] + if keypoints is not None: + keypoints = keypoints[keep] + + target = {} + target["boxes"] = boxes + target["labels"] = classes + target["masks"] = masks + target["image_id"] = image_id + if keypoints is not None: + target["keypoints"] = keypoints + + # for conversion to coco api + area = torch.tensor([obj["area"] for obj in anno]) + iscrowd = torch.tensor([obj["iscrowd"] for obj in anno]) + target["area"] = area + target["iscrowd"] = iscrowd + + return image, target + + +def _coco_remove_images_without_annotations(dataset, cat_list=None): + def _has_only_empty_bbox(anno): + return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) + + def _count_visible_keypoints(anno): + return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) + + min_keypoints_per_image = 10 + + def _has_valid_annotation(anno): + # if it's empty, there is no annotation + if len(anno) == 0: + return False + # if all boxes have close to zero area, there is no annotation + if _has_only_empty_bbox(anno): + return False + # keypoints task have a slight different critera for considering + # if an annotation is valid + if "keypoints" not in anno[0]: + return True + # for keypoint detection tasks, only consider valid images those + # containing at least min_keypoints_per_image + if _count_visible_keypoints(anno) >= min_keypoints_per_image: + return True + return False + + assert isinstance(dataset, torchvision.datasets.CocoDetection) + ids = [] + for ds_idx, img_id in enumerate(dataset.ids): + ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) + anno = dataset.coco.loadAnns(ann_ids) + if cat_list: + anno = [obj for obj in anno if obj["category_id"] in cat_list] + if _has_valid_annotation(anno): + ids.append(ds_idx) + + dataset = torch.utils.data.Subset(dataset, ids) + return dataset + + +def convert_to_coco_api(ds): + coco_ds = COCO() + ann_id = 0 + dataset = {'images': [], 'categories': [], 'annotations': []} + categories = set() + for img_idx in range(len(ds)): + # find better way to get target + # targets = ds.get_annotations(img_idx) + img, targets = ds[img_idx] + image_id = targets["image_id"].item() + img_dict = {} + img_dict['id'] = image_id + img_dict['height'] = img.shape[-2] + img_dict['width'] = img.shape[-1] + dataset['images'].append(img_dict) + bboxes = targets["boxes"] + bboxes[:, 2:] -= bboxes[:, :2] + bboxes = bboxes.tolist() + labels = targets['labels'].tolist() + areas = targets['area'].tolist() + iscrowd = targets['iscrowd'].tolist() + if 'masks' in targets: + masks = targets['masks'] + # make masks Fortran contiguous for coco_mask + masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1) + if 'keypoints' in targets: + keypoints = targets['keypoints'] + keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist() + num_objs = len(bboxes) + for i in range(num_objs): + ann = {} + ann['image_id'] = image_id + ann['bbox'] = bboxes[i] + ann['category_id'] = labels[i] + categories.add(labels[i]) + ann['area'] = areas[i] + ann['iscrowd'] = iscrowd[i] + ann['id'] = ann_id + if 'masks' in targets: + ann["segmentation"] = coco_mask.encode(masks[i].numpy()) + if 'keypoints' in targets: + ann['keypoints'] = keypoints[i] + ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3]) + dataset['annotations'].append(ann) + ann_id += 1 + dataset['categories'] = [{'id': i} for i in sorted(categories)] + coco_ds.dataset = dataset + coco_ds.createIndex() + return coco_ds + + +def get_coco_api_from_dataset(dataset): + for i in range(10): + if isinstance(dataset, torchvision.datasets.CocoDetection): + break + if isinstance(dataset, torch.utils.data.Subset): + dataset = dataset.dataset + if isinstance(dataset, torchvision.datasets.CocoDetection): + return dataset.coco + return convert_to_coco_api(dataset) + + +class CocoDetection(torchvision.datasets.CocoDetection): + def __init__(self, img_folder, ann_file, transforms): + super(CocoDetection, self).__init__(img_folder, ann_file) + self._transforms = transforms + + def __getitem__(self, idx): + img, target = super(CocoDetection, self).__getitem__(idx) + image_id = self.ids[idx] + target = dict(image_id=image_id, annotations=target) + if self._transforms is not None: + img, target = self._transforms(img, target) + return img, target + + +def get_coco(root, image_set, transforms, mode='instances'): + anno_file_template = "{}_{}2017.json" + PATHS = { + "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))), + "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))), + # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))) + } + + t = [ConvertCocoPolysToMask()] + + if transforms is not None: + t.append(transforms) + transforms = T.Compose(t) + + img_folder, ann_file = PATHS[image_set] + img_folder = os.path.join(root, img_folder) + ann_file = os.path.join(root, ann_file) + + dataset = CocoDetection(img_folder, ann_file, transforms=transforms) + + if image_set == "train": + dataset = _coco_remove_images_without_annotations(dataset) + + # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)]) + + return dataset + + +def get_coco_kp(root, image_set, transforms): + return get_coco(root, image_set, transforms, mode="person_keypoints") diff --git a/utils/dataset.py b/utils/dataset.py new file mode 100644 index 0000000..3b01eb3 --- /dev/null +++ b/utils/dataset.py @@ -0,0 +1,82 @@ +import torch +from torch.utils.data import DataLoader, Dataset +import cv2 +import numpy as np +from config import Config + +class RT_Dataset(Dataset): + def __init__(self,dataframe,image_dir,transforms=None): + super().__init__() + self.image_ids=dataframe['image_number'].unique() + self.df=dataframe + self.image_dir=image_dir + self.transforms=transforms + #self.classes=[_,'Others','Porosity','Slag'] + + + def __getitem__(self,index:int): + image_id=self.image_ids[index] + records=self.df[self.df['image_number']==image_id] + + image=cv2.imread(f'{self.image_dir}/{records["dataset"].values[0]}/{records["image_name"].values[0]}',cv2.IMREAD_COLOR) + image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB).astype(np.float32) + #image_res=cv2.resize(image_rgb,(Config['IMG_SIZE'],Config['IMG_SIZE']),cv2.INTER_AREA) + image /=255.0 + + #print(f'image : {type(image)}') + labels=[] + + wt,ht=image.shape[1],image.shape[0] + + bndboxes=list((eval(records['bndbox'].values[0]))) + labels=list(eval(records['labels'].values[0])) + + if len(bndboxes)>0: + boxes = [[box[0] , box[1], box[2], box[3]] for box in bndboxes] + #labels=[int(label) for label in labels] + boxes=torch.as_tensor(boxes,dtype=torch.float32) + area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) + labels=[int(label)+1 for label in labels] + #labels = torch.as_tensor(labels, dtype=torch.int64) + + else: + boxes = torch.zeros((0,4),dtype=torch.float32) + labels = torch.zeros(0, dtype=torch.int64) # 더미 라벨 + area = torch.zeros(0, dtype=torch.float32) # 더미 면적 + + + + labels = torch.as_tensor(labels, dtype=torch.int64) + + #다 crowd x + iscrowd = torch.zeros((len(boxes),), dtype=torch.int64) + + + + target={} + target['boxes']=boxes + target['labels']=labels + target['area']=area + target['iscrowd']=iscrowd + target['image_id']=torch.tensor([image_id]) + + if self.transforms: + sample={ + 'image':image, + 'bboxes':target['boxes'], + 'labels':labels + } + + #target['boxes']=torch.Tensor(sample['bboxes']) + sample=self.transforms(**sample) + image=sample['image'] + + if len(sample['bboxes'])>0: + target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1,0) + else: + target['boxes']=torch.zeros((0,4),dtype=torch.float32) + + return image,target + + def __len__(self) -> int: + return self.image_ids.shape[0] \ No newline at end of file diff --git a/utils/engine.py b/utils/engine.py new file mode 100644 index 0000000..9de2ae7 --- /dev/null +++ b/utils/engine.py @@ -0,0 +1,116 @@ +import math +import sys +import time +import torch + +import torchvision.models.detection.mask_rcnn + +from coco_utils import get_coco_api_from_dataset +from coco_eval import CocoEvaluator +import utils +import wandb + +def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq,warmup_epochs=4): + model.train() + metric_logger = utils.MetricLogger(delimiter=" ") + metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) + header = 'Epoch: [{}]'.format(epoch) + + lr_scheduler = None + if epoch best_iou: + best_iou = iou + best_match = i + + # 모든 gt를 돌고 난 후 best_iou > iou_threshold이고 best_match일 경우 + # 1) 해당 gt가 best_match가 없을 경우 + if best_iou > iou_threshold and best_match is not None: + if not matched_gt[best_match]: + update_class_stats(class_stats, pred_class, 'TP', best_iou) + matched_gt[best_match] = True + else: + update_class_stats(class_stats, pred_class, 'FP') # 이미 임자가 있는데 잘못고름 + else: + update_class_stats(class_stats, pred_class, 'FP') + + for i, (gt_class, _) in enumerate(gt_boxes): + if not matched_gt[i]: + update_class_stats(class_stats, gt_class, 'FN') + + print(f'class_stats : {class_stats}') + return calculate_classwise_metrics(class_stats) + +def update_class_stats(stats, cls, update_type, iou_score=0): + if cls not in stats: + stats[cls] = {'TP': 0, 'FP': 0, 'FN': 0, 'total_iou': 0} + + if update_type == 'TP': + stats[cls]['TP'] += 1 + stats[cls]['total_iou'] += iou_score + elif update_type == 'FP': + stats[cls]['FP'] += 1 + elif update_type == 'FN': + stats[cls]['FN'] += 1 + +def calculate_classwise_metrics(stats): + class_metrics = {} + total_TP, total_FP, total_FN, total_iou = 0, 0, 0, 0 + for cls, counts in stats.items(): + precision = counts['TP'] / (counts['TP'] + counts['FP']) if (counts['TP'] + counts['FP']) > 0 else 0 + recall = counts['TP'] / (counts['TP'] + counts['FN']) if (counts['TP'] + counts['FN']) > 0 else 0 + f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0 + average_iou = counts['total_iou'] / counts['TP'] if counts['TP'] > 0 else 0 + + class_metrics[cls] = {'precision': precision, 'recall': recall, 'f1_score': f1_score, 'average_iou': average_iou} + + total_TP += counts['TP'] + total_FP += counts['FP'] + total_FN += counts['FN'] + total_iou += counts['total_iou'] + + # 전체 성능 계산 + total_precision = total_TP / (total_TP + total_FP) if (total_TP + total_FP) > 0 else 0 + total_recall = total_TP / (total_TP + total_FN) if (total_TP + total_FN) > 0 else 0 + if total_precision+total_recall==0: + total_f1_score=0 + else: + total_f1_score = 2 * total_precision * total_recall / (total_precision + total_recall) if (total_precision + total_recall) > 0 else 0 + total_average_iou = total_iou / total_TP if total_TP > 0 else 0 + + return {'total': {'precision': total_precision, 'recall': total_recall, 'f1_score': total_f1_score, 'average_iou': total_average_iou}, 'per_class': class_metrics} + + + +# IoU 계산 함수 +def calculate_iou(box1, box2): + """ + Calculate the Intersection over Union (IoU) of two bounding boxes. + """ + + x1, y1, x2, y2 = box1 + x1g, y1g, x2g, y2g = box2 + + # Calculate area of intersection + xi1 = max(x1, x1g) + yi1 = max(y1, y1g) + xi2 = min(x2, x2g) + yi2 = min(y2, y2g) + intersection_area = max(xi2 - xi1, 0) * max(yi2 - yi1, 0) + + # Calculate area of union + box1_area = (x2 - x1) * (y2 - y1) + box2_area = (x2g - x1g) * (y2g - y1g) + union_area = box1_area + box2_area - intersection_area + + # Calculate IoU + iou = intersection_area / union_area if union_area != 0 else 0 + + return iou + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self): + """ + Warning: does not synchronize the deque! + """ + if not is_dist_avail_and_initialized(): + return + t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') + dist.barrier() + dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, + avg=self.avg, + global_avg=self.global_avg, + max=self.max, + value=self.value) + + +def all_gather(data): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors) + Args: + data: any picklable object + Returns: + list[data]: list of data gathered from each rank + """ + world_size = get_world_size() + if world_size == 1: + return [data] + + # serialized to a Tensor + buffer = pickle.dumps(data) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to("cuda") + + # obtain Tensor size of each rank + local_size = torch.tensor([tensor.numel()], device="cuda") + size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] + dist.all_gather(size_list, local_size) + size_list = [int(size.item()) for size in size_list] + max_size = max(size_list) + + # receiving Tensor from all ranks + # we pad the tensor because torch all_gather does not support + # gathering tensors of different shapes + tensor_list = [] + for _ in size_list: + tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) + if local_size != max_size: + padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") + tensor = torch.cat((tensor, padding), dim=0) + dist.all_gather(tensor_list, tensor) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def reduce_dict(input_dict, average=True): + """ + Args: + input_dict (dict): all the values will be reduced + average (bool): whether to do average or sum + Reduce the values in the dictionary from all processes so that all processes + have the averaged results. Returns a dict with the same fields as + input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.all_reduce(values) + if average: + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict + + +class MetricLogger(object): + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError("'{}' object has no attribute '{}'".format( + type(self).__name__, attr)) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append( + "{}: {}".format(name, str(meter)) + ) + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self): + for meter in self.meters.values(): + meter.synchronize_between_processes() + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, print_freq, header=None): + i = 0 + if not header: + header = '' + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt='{avg:.4f}') + data_time = SmoothedValue(fmt='{avg:.4f}') + space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}', + 'max mem: {memory:.0f}' + ]) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if i % print_freq == 0 or i == len(iterable) - 1: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB)) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('{} Total time: {} ({:.4f} s / it)'.format( + header, total_time_str, total_time / len(iterable))) + + +def collate_fn(batch): + return tuple(zip(*batch)) + + +def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): + + def f(x): + if x >= warmup_iters: + return 1 + alpha = float(x) / warmup_iters + return warmup_factor * (1 - alpha) + alpha + + return torch.optim.lr_scheduler.LambdaLR(optimizer, f) + + +def mkdir(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop('force', False) + if is_master or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + if is_main_process(): + torch.save(*args, **kwargs) + + +def init_distributed_mode(args): + if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + args.rank = int(os.environ["RANK"]) + args.world_size = int(os.environ['WORLD_SIZE']) + args.gpu = int(os.environ['LOCAL_RANK']) + elif 'SLURM_PROCID' in os.environ: + args.rank = int(os.environ['SLURM_PROCID']) + args.gpu = args.rank % torch.cuda.device_count() + else: + print('Not using distributed mode') + args.distributed = False + return + + args.distributed = True + + torch.cuda.set_device(args.gpu) + args.dist_backend = 'nccl' + print('| distributed init (rank {}): {}'.format( + args.rank, args.dist_url), flush=True) + torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + torch.distributed.barrier() + setup_for_distributed(args.rank == 0) From 67bc7dde46b1b6209fcc776441c2e4b818cee40d Mon Sep 17 00:00:00 2001 From: junghye01 Date: Wed, 20 Dec 2023 20:42:31 +0000 Subject: [PATCH 03/10] utils code --- retinanet_/dataset.py | 39 ----- retinanet_/inference.py | 167 +++++++++++++++++++ retinanet_/test.ipynb | 346 --------------------------------------- retinanet_/train.py | 354 ++++++++++++++++++++-------------------- 4 files changed, 343 insertions(+), 563 deletions(-) delete mode 100644 retinanet_/dataset.py create mode 100644 retinanet_/inference.py delete mode 100644 retinanet_/test.ipynb diff --git a/retinanet_/dataset.py b/retinanet_/dataset.py deleted file mode 100644 index 13513d8..0000000 --- a/retinanet_/dataset.py +++ /dev/null @@ -1,39 +0,0 @@ -import torch -from torch.utils.data import DataLoader, Dataset -import cv2 -import numpy as np - - -class RT_Dataset(Dataset): - def __init__(self,dataframe,image_dir,transforms=None): - super().__init__() - self.image_ids=dataframe['image_number'].unique() - self.df=dataframe - self.image_dir=image_dir - self.transforms=transforms - - def __getitem__(self,index:int): - image_id=self.image_ids[index] - records=self.df[self.df['image_number']==image_id] - - image=cv2.imread(f'{self.image_dir}/{records["dataset"].values[0]}/{records["image_name"].values[0]}',cv2.IMREAD_COLOR) - image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB).astype(np.float32) - image/=255.0 - - bndbox=np.array(eval(records['bndbox'].values[0])) - labels=np.array(eval(records['labels'].values[0])) - boxes=np.zeros((bndbox.shape[0],5)) - - if bndbox.size>0: - boxes[:,0:4]=bndbox - boxes[:,4]=labels - - sample={'img':image, 'annot':boxes} - - if self.transforms: - sample=self.transforms(sample) - - return sample - - def __len__(self) -> int: - return self.image_ids.shape[0] \ No newline at end of file diff --git a/retinanet_/inference.py b/retinanet_/inference.py new file mode 100644 index 0000000..045c2ea --- /dev/null +++ b/retinanet_/inference.py @@ -0,0 +1,167 @@ +import sys +sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils') +import os +import numpy as np +import pandas as pd + +# for ignoring warnings +import warnings +warnings.filterwarnings('ignore') + +import cv2 + +import torch +import torchvision +from torchvision import transforms as torchtrans +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.ops import nms + + +from tqdm import tqdm +import utils +import transforms as T +from dataset import RT_Dataset +from config import Config +from augment import get_transform + +from torchvision.models.detection.ssd import SSDClassificationHead +from torchvision.models.detection import _utils +from torchvision.models.detection import SSD300_VGG16_Weights + + + + +# model +def get_object_detection_model(num_classes): + + # load a model pre-trained pre-trained on COCO + model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained=True) + + # get number of input features for the classifier + in_features = model.roi_heads.box_predictor.cls_score.in_features + # replace the pre-trained head with a new one + model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) + + return model + +# valid function +def valid_fn(val_data_loader, model, device): + model.eval() + outputs = [] + ground_truths=[] + + for images,targets in tqdm(val_data_loader): + + images=list(img.to(device) for img in images) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + output=model(images) + + #print(f'target : {targets[0]}') + for out,target in zip(output,targets): + scores=out['scores'].detach().cpu().numpy() + boxes=out['boxes'].detach().cpu().numpy() + labels=out['labels'].detach().cpu().numpy() + + #keep_idx=nms(boxes,scores,iou_threshold=0.1) + + #boxes=boxes[keep_idx] + #scores=scores[keep_idx] + #labels=labels[keep_idx] + + + outputs.append({'boxes': boxes, # 2중 리스트일 수도 + 'scores': scores, + 'labels': labels}) + + # ground truth 에 label 추가 + gt_boxes=target['boxes'].cpu().numpy() + gt_labels=target['labels'].cpu().numpy() + + ground_truths.append(list(zip(gt_labels,gt_boxes))) + + #ground_truths.append(target['boxes'].cpu().numpy()) # 이중 리스트일 수도.. + + + return outputs,ground_truths + + +def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.2): + global best_recall + outputs, ground_truths = valid_fn(valid_dataloader, model, device) + predictions = [] + + + for output in outputs: + valid_scores=output['scores']>score_threshold + + valid_boxes = output['boxes'][output['scores'] > score_threshold] + valid_labels=output['labels'][output['scores']> score_threshold] + + + predictions.append(list(zip(valid_labels,valid_boxes))) + + + + # utils 모듈에 있는 calculate_metrics 함수를 사용 + metrics = utils.calculate_metrics(predictions, ground_truths) + + # 전체 성능 + total_recall=metrics['total']['recall'] + total_precision=metrics['total']['precision'] + total_f1_score= metrics['total']['f1_score'] + + class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} + + return_outputs=metrics['total'] + return return_outputs,class_result + +if __name__=='__main__': + + + if torch.cuda.is_available(): + device=torch.device('cuda') + + + valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/test_total.csv') + image_dir='/content/drive/MyDrive/data/Image' + + + + + valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False)) + + + + valid_dataloader=torch.utils.data.DataLoader( + valid_dataset, + batch_size=Config['VALID_BS'], + shuffle=False, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, + + ) + + + model=torchvision.models.detection.retinanet_resnet50_fpn_v2(num_classes=4,pretrained=False,pretrained_backbone=True) + + + model_save_path = "/content/drive/MyDrive/models/fasterrcnn_resnet50_fpnv2_8_4.pth" + saved_state=torch.load(model_save_path,map_location=device) + + model.load_state_dict(saved_state['model_state_dict']) + + model.to(device) + + + + params = [p for p in model.parameters() if p.requires_grad] + + optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY']) + + lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9) + + # valid data + return_outputs,class_result = validate_and_save_best_model(0, model, valid_dataloader, device, optimizer, lr_scheduler) + + print(f'test Total result : {return_outputs} , class result : {class_result} ') \ No newline at end of file diff --git a/retinanet_/test.ipynb b/retinanet_/test.ipynb deleted file mode 100644 index 6c7d865..0000000 --- a/retinanet_/test.ipynb +++ /dev/null @@ -1,346 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\"\n", - "\n", - "import re\n", - "import cv2\n", - "import time\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "\n", - "import torch\n", - "import torch.optim as optim\n", - "import torchvision.transforms as T\n", - "from torchvision.utils import make_grid \n", - "from torch.utils.data import DataLoader, Dataset\n", - "\n", - "from retinanet.csv_eval_original import evaluate \n", - "from retinanet import model\n", - "from retinanet.dataloader import collater, Resizer_512, Augmenter, Normalizer, UnNormalizer,CSVDataset\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "test_dataset=CSVDataset('../annotations_v2/retinanet_test.csv','../annotations_v2/classes.csv',transform=T.Compose([Normalizer(),Resizer_512()]))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n", - "torch.cuda.empty_cache()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DataParallel(\n", - " (module): ResNet(\n", - " (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n", - " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n", - " (layer1): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " (downsample): Sequential(\n", - " (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (layer2): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " (downsample): Sequential(\n", - " (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", - " (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (3): Bottleneck(\n", - " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (layer3): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " (downsample): Sequential(\n", - " (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", - " (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (3): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (4): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (5): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (layer4): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " (downsample): Sequential(\n", - " (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", - " (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (fpn): PyramidFeatures(\n", - " (P5_1): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))\n", - " (P5_upsampled): Upsample(scale_factor=2.0, mode='nearest')\n", - " (P5_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (P4_1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))\n", - " (P4_upsampled): Upsample(scale_factor=2.0, mode='nearest')\n", - " (P4_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (P3_1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))\n", - " (P3_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (P6): Conv2d(2048, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", - " (P7_1): ReLU()\n", - " (P7_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", - " )\n", - " (regressionModel): RegressionModel(\n", - " (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act1): ReLU()\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act2): ReLU()\n", - " (conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act3): ReLU()\n", - " (conv4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act4): ReLU()\n", - " (output): Conv2d(256, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " )\n", - " (classificationModel): ClassificationModel(\n", - " (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act1): ReLU()\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act2): ReLU()\n", - " (conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act3): ReLU()\n", - " (conv4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act4): ReLU()\n", - " (output): Conv2d(256, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (output_act): Sigmoid()\n", - " )\n", - " (anchors): Anchors()\n", - " (regressBoxes): BBoxTransform()\n", - " (clipBoxes): ClipBoxes()\n", - " (focalLoss): FocalLoss()\n", - " )\n", - ")" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "retinanet = torch.load(\"/home/irteam/junghye-dcloud-dir/pathfinder/pathfinder_ai/models/retinanet_50_epoch3.pt\", map_location=device)\n", - "\n", - "# 모델을 평가 모드로 전환합니다.\n", - "retinanet.eval()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "698/698\n", - "mAP:\n", - "Others: 0.0\n", - "detect된 거 없음\n", - "Porosity: 0.0\n", - "detect된 거 없음\n", - "Slag: 0.0\n", - "detect된 거 없음\n", - "Normal: 0\n", - "detect된 거 없음\n" - ] - }, - { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click here for more info. View Jupyter log for further details." - ] - } - ], - "source": [ - "average_precisions = evaluate(test_dataset, retinanet, iou_threshold=0.000000001, score_threshold=0.0000000001, max_detections=1000000,save_path='./')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "pixt", - "language": "python", - "name": "pixt" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/retinanet_/train.py b/retinanet_/train.py index 66f3760..ee45437 100644 --- a/retinanet_/train.py +++ b/retinanet_/train.py @@ -1,193 +1,191 @@ +import sys +sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils') import os - -os.environ["CUDA_LAUNCH_BLOCKING"] = "1" - -import re -import cv2 -import time import numpy as np import pandas as pd +# for ignoring warnings +import warnings +warnings.filterwarnings('ignore') -import torch -import torch.optim as optim -import torchvision.transforms as T -from torchvision.utils import make_grid -from torch.utils.data import DataLoader, Dataset +import cv2 -from retinanet import model -from retinanet.dataloader import collater, Resizer, Augmenter, Normalizer, UnNormalizer,CSVDataset,AspectRatioBasedSampler -from config import Config +import torch +import torchvision +from torchvision import transforms as torchtrans +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.ops import nms + +from engine import train_one_epoch,evaluate +from tqdm import tqdm +import utils +import transforms as T from dataset import RT_Dataset +from config import Config +from augment import get_transform import wandb - - - -best_loss=float('inf') - -def train_one_epoch(retinanet,epoch_num,optimizer,scheduler, train_data_loader,device): - - print("Epoch - {} Started".format(epoch_num)) - st = time.time() - - retinanet.train() - - epoch_loss = [] - - for iter_num, data in enumerate(train_data_loader): - - # Reseting gradients after each iter - optimizer.zero_grad() - - # Forward - #print(f'{data["annot"]}') - classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda().float()]) - - # Calculating Loss - classification_loss = classification_loss.mean() - regression_loss = regression_loss.mean() - - loss = classification_loss + regression_loss - - if bool(loss == 0): - continue - - # Calculating Gradients - loss.backward() - - # Gradient Clipping - torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) - - # Updating Weights - optimizer.step() - - #Epoch Loss - epoch_loss.append(float(loss)) - - print( - 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( - epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(epoch_loss))) - - del classification_loss - del regression_loss - - wandb.log({ - 'avg_train_loss':round(np.mean(epoch_loss),4) - }) - # Update the learning rate - if lr_scheduler is not None: - lr_scheduler.step(np.mean(epoch_loss)) - - et = time.time() - print("\n Total Time - {}\n".format(int(et - st))) - - - -def valid_one_epoch(retinanet,epoch_num, valid_data_loader,device): - global best_loss #가장 좋은 손실값 업데이트하기 위해 global 변수로 선언 - - print("Epoch - {} Started".format(epoch_num)) - st = time.time() - - epoch_loss = [] - - for iter_num, data in enumerate(valid_data_loader): - - with torch.no_grad(): - - # Forward - classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda().float()]) - - # Calculating Loss - classification_loss = classification_loss.mean() - regression_loss = regression_loss.mean() - loss = classification_loss + regression_loss - - #Epoch Loss - epoch_loss.append(float(loss)) - - print( - 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( - epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(epoch_loss))) - - del classification_loss - del regression_loss - - avg_epoch_loss=np.mean(epoch_loss) - wandb.log({ - 'avg_val_loss': round(avg_epoch_loss,4) - }) - - if avg_epoch_loss score_threshold + + valid_boxes = output['boxes'][output['scores'] > score_threshold] + valid_labels=output['labels'][output['scores']> score_threshold] + + + predictions.append(list(zip(valid_labels,valid_boxes))) + + print(f'pred : {predictions[0:5]}\n') + print(f'gt : {ground_truths[0:5]}') + + # utils 모듈에 있는 calculate_metrics 함수를 사용 + metrics = utils.calculate_metrics(predictions, ground_truths) + + # 전체 성능 + total_recall=metrics['total']['recall'] + total_precision=metrics['total']['precision'] + total_f1_score= metrics['total']['f1_score'] + + #wandb.log({"epoch": epoch, "recall": metrics['recall']}) # Recall을 W&B에 로그합니다. + wandb.log({"epoch": epoch, "total_recall": total_recall, "total_precision": total_precision,"total_f1_score":total_f1_score}) + + categories={2: 'Porosity', 3: 'Slag'} + class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} + # 각 클래스별 성능 로그 + for class_label,class_metrics in metrics['per_class'].items(): + #class_label=class_label.item() + if class_label==2 or class_label==3: + + wandb.log({ + f"class_{categories[class_label]}_recall" : class_metrics['recall'], + f"class_{categories[class_label]}_precision" : class_metrics['precision'], + f"class_{categories[class_label]}_f1_score" : class_metrics['f1_score'], + f"class_{categories[class_label]}_average_iou": class_metrics['average_iou'], + + }) + if total_recall > best_recall: + best_recall = total_recall + + model_save_path = f"/content/drive/MyDrive/models/retinanet_resnet50_fpnv2_{Config['TRAIN_BS']}_{Config['VALID_BS']}.pth" + torch.save({ + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'lr_scheduler_state_dict': lr_scheduler.state_dict() + }, model_save_path) + # wandb.save(model_save_path) # 모델 파일을 W&B에 저장합니다. + + return_outputs=metrics['total'] + return return_outputs,class_result + if __name__=='__main__': - # wandb project - wandb.init(project='capstone',name='retinanet_1123',reinit=True) - device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') - torch.cuda.empty_cache() - - # dataset - #train_df=pd.read_csv('/home/irteam/junghye-dcloud-dir/pathfinder/pathfinder_ai/annotations_v2/train_total.csv') - #valid_df=pd.read_csv('/home/irteam/junghye-dcloud-dir/pathfinder/pathfinder_ai/annotations_v2/valid_total.csv') - #image_dir='/home/irteam/junghye-dcloud-dir/pathfinder/data_contrast/before/Image' - - #train_dataset=RT_Dataset(train_df,image_dir,transforms=T.Compose([Augmenter(),Normalizer(),Resizer()])) - #valid_dataset=RT_Dataset(valid_df,image_dir,transforms=T.Compose([Normalizer(),Resizer()])) - - # csv dataset - - train_dataset=CSVDataset('./annotations_v2/retinanet_train.csv','./annotations_v2/classes.csv',transform=T.Compose([Augmenter(),Normalizer()])) - valid_dataset=CSVDataset('./annotations_v2/retinanet_valid.csv','./annotations_v2/classes.csv',transform=T.Compose([Normalizer()])) - - sampler=AspectRatioBasedSampler(train_dataset,batch_size=Config['TRAIN_BS'],drop_last=False) - sampler_val=AspectRatioBasedSampler(valid_dataset,batch_size=Config['VALID_BS'],drop_last=False) - - - train_data_loader = DataLoader( + wandb.init(project='capstone',name='RetinaNet',reinit=True) + + if torch.cuda.is_available(): + device=torch.device('cuda') + + + train_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/train_total.csv') + valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/valid_total.csv') + image_dir='/content/drive/MyDrive/data/Image' + + + result_dir_path=f'/content/drive/MyDrive/result/RetinaNet' + os.makedirs(result_dir_path,exist_ok=True) + + train_dataset=RT_Dataset(train_df,image_dir,transforms=get_transform(train=True)) + valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False)) + + train_dataloader=torch.utils.data.DataLoader( train_dataset, - num_workers = Config['NUM_WORKERS'], - collate_fn = collater, - batch_sampler=sampler, + batch_size=Config['TRAIN_BS'], + shuffle=True, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, ) - valid_data_loader = DataLoader( + valid_dataloader=torch.utils.data.DataLoader( valid_dataset, - num_workers = Config['NUM_WORKERS'], - collate_fn = collater, - batch_sampler=sampler_val, + batch_size=Config['VALID_BS'], + shuffle=False, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, + ) - # load model - retinanet = model.resnet50(num_classes = Config['NUM_CLASSES'], pretrained = True) - retinanet.to(device) - wandb.watch(retinanet) - - - #optimizer = torch.optim.Adam(retinanet.parameters(), lr = Config['LR'],weight_decay=Config['WEIGHT_DECAY']) - optimizer=torch.optim.Adam(retinanet.parameters(),lr=Config['LR']) - - #lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 5, gamma=0.5) - lr_scheduler=torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,patience=3,verbose=True) - - ### Training Loop - for epoch in range(Config['EPOCHS']): - - # Call train function - train_one_epoch(retinanet,epoch, optimizer,lr_scheduler,train_data_loader,device=device) - # Call valid function - valid_one_epoch(retinanet,epoch, valid_data_loader,device=device) + + #model=get_object_detection_model(Config['NUM_CLASSES']) + model=torchvision.models.detection.retinanet_resnet50_fpn_v2(num_classes=4,pretrained=False,pretrained_backbone=True) + + model.to(device) + + wandb.watch(model) + + params = [p for p in model.parameters() if p.requires_grad] + + optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY']) + + #lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.9) + lr_scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15) + + best_recall=-100 + with open(f'{result_dir_path}/{Config["TRAIN_BS"]}_{Config["VALID_BS"]}_{Config["EPOCHS"]}.txt','w') as f: + for epoch in range(Config['EPOCHS']): + train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=10) + + lr_scheduler.step() + + # valid data + return_outputs,class_result = validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler) + + print(f'epoch : {epoch}, output : {return_outputs}') + f.write(f"Epoch {epoch} Total result:{return_outputs}, class_result : {class_result}\n") \ No newline at end of file From 42286300c48481e6e9fb6684ee0afaae3bad0b3f Mon Sep 17 00:00:00 2001 From: junghye01 Date: Wed, 20 Dec 2023 20:43:23 +0000 Subject: [PATCH 04/10] modify faster rcnn code --- faster-rcnn/config.py | 12 +-- faster-rcnn/inference.py | 166 +++++++++++++++++++++++++++++++++++++++ faster-rcnn/train.py | 158 +++++++++++++++++++++++++++++++------ 3 files changed, 306 insertions(+), 30 deletions(-) create mode 100644 faster-rcnn/inference.py diff --git a/faster-rcnn/config.py b/faster-rcnn/config.py index 91297a3..6a49ccb 100644 --- a/faster-rcnn/config.py +++ b/faster-rcnn/config.py @@ -1,13 +1,13 @@ Config=dict( - EPOCHS=150, - LR=2e-4, - IMG_SIZE=224, + EPOCHS=50, + LR=5e-4, + IMG_SIZE=640, DR_RATE=0.35, NUM_CLASSES=4, - TRAIN_BS=4, - VALID_BS=2, + TRAIN_BS=8, + VALID_BS=4, NUM_WORKERS=4, WEIGHT_DECAY=0.0005, - CONTRAST='AFTER' + CONTRAST='AFTER', ) \ No newline at end of file diff --git a/faster-rcnn/inference.py b/faster-rcnn/inference.py new file mode 100644 index 0000000..a3b1982 --- /dev/null +++ b/faster-rcnn/inference.py @@ -0,0 +1,166 @@ +import sys +sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils') +import os +import numpy as np +import pandas as pd + +# for ignoring warnings +import warnings +warnings.filterwarnings('ignore') + +import cv2 + +import torch +import torchvision +from torchvision import transforms as torchtrans +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.ops import nms + + +from tqdm import tqdm +import utils +import transforms as T +from dataset import RT_Dataset +from config import Config +from augment import get_transform + +from torchvision.models.detection.ssd import SSDClassificationHead +from torchvision.models.detection import _utils +from torchvision.models.detection import SSD300_VGG16_Weights + + + +# model +def get_object_detection_model(num_classes): + + # load a model pre-trained pre-trained on COCO + model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained=True) + + # get number of input features for the classifier + in_features = model.roi_heads.box_predictor.cls_score.in_features + # replace the pre-trained head with a new one + model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) + + return model + +# valid function +def valid_fn(val_data_loader, model, device): + model.eval() + outputs = [] + ground_truths=[] + + for images,targets in tqdm(val_data_loader): + + images=list(img.to(device) for img in images) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + output=model(images) + + #print(f'target : {targets[0]}') + for out,target in zip(output,targets): + scores=out['scores'].detach().cpu().numpy() + boxes=out['boxes'].detach().cpu().numpy() + labels=out['labels'].detach().cpu().numpy() + + #keep_idx=nms(boxes,scores,iou_threshold=0.1) + + #boxes=boxes[keep_idx] + #scores=scores[keep_idx] + #labels=labels[keep_idx] + + + outputs.append({'boxes': boxes, # 2중 리스트일 수도 + 'scores': scores, + 'labels': labels}) + + # ground truth 에 label 추가 + gt_boxes=target['boxes'].cpu().numpy() + gt_labels=target['labels'].cpu().numpy() + + ground_truths.append(list(zip(gt_labels,gt_boxes))) + + #ground_truths.append(target['boxes'].cpu().numpy()) # 이중 리스트일 수도.. + + + return outputs,ground_truths + + +def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.2): + global best_recall + outputs, ground_truths = valid_fn(valid_dataloader, model, device) + predictions = [] + + + for output in outputs: + valid_scores=output['scores']>score_threshold + + valid_boxes = output['boxes'][output['scores'] > score_threshold] + valid_labels=output['labels'][output['scores']> score_threshold] + + + predictions.append(list(zip(valid_labels,valid_boxes))) + + + + # utils 모듈에 있는 calculate_metrics 함수를 사용 + metrics = utils.calculate_metrics(predictions, ground_truths) + + # 전체 성능 + total_recall=metrics['total']['recall'] + total_precision=metrics['total']['precision'] + total_f1_score= metrics['total']['f1_score'] + + class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} + + return_outputs=metrics['total'] + return return_outputs,class_result + +if __name__=='__main__': + + + if torch.cuda.is_available(): + device=torch.device('cuda') + + + valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/test_total.csv') + image_dir='/content/drive/MyDrive/data/Image' + + + + + valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False)) + + + + valid_dataloader=torch.utils.data.DataLoader( + valid_dataset, + batch_size=Config['VALID_BS'], + shuffle=False, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, + + ) + + + model=get_object_detection_model(Config['NUM_CLASSES']) + + + model_save_path = "/content/drive/MyDrive/models/fasterrcnn_resnet50_fpnv2_8_4.pth" + saved_state=torch.load(model_save_path,map_location=device) + + model.load_state_dict(saved_state['model_state_dict']) + + model.to(device) + + + + params = [p for p in model.parameters() if p.requires_grad] + + optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY']) + + lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9) + + # valid data + return_outputs,class_result = validate_and_save_best_model(0, model, valid_dataloader, device, optimizer, lr_scheduler) + + print(f'test Total result : {return_outputs} , class result : {class_result} ') \ No newline at end of file diff --git a/faster-rcnn/train.py b/faster-rcnn/train.py index 24c702a..7ab5530 100644 --- a/faster-rcnn/train.py +++ b/faster-rcnn/train.py @@ -1,3 +1,5 @@ +import sys +sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils') import os import numpy as np import pandas as pd @@ -10,10 +12,12 @@ import torch import torchvision -from torchvision import transforms as torchtrans +from torchvision import transforms as torchtrans from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.ops import nms from engine import train_one_epoch,evaluate +from tqdm import tqdm import utils import transforms as T from dataset import RT_Dataset @@ -22,33 +26,134 @@ import wandb -# model - def get_object_detection_model(num_classes): # load a model pre-trained pre-trained on COCO - model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) - + model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained=True) + # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one - model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) + model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) return model - +# valid function +def valid_fn(val_data_loader, model, device): + model.eval() + outputs = [] + ground_truths=[] + + for images,targets in tqdm(val_data_loader): + + images=list(img.to(device) for img in images) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + output=model(images) + + #print(f'target : {targets[0]}') + for out,target in zip(output,targets): + scores=out['scores'].detach().cpu().numpy() + boxes=out['boxes'].detach().cpu().numpy() + labels=out['labels'].detach().cpu().numpy() + + keep_idx=nms(boxes,scores,iou_threshold=0.1) + + boxes=boxes[keep_idx] + scores=scores[keep_idx] + labels=labels[keep_idx] + + + outputs.append({'boxes': boxes, + 'scores': scores, + 'labels': labels}) + + # ground truth 에 label 추가 + gt_boxes=target['boxes'].cpu().numpy() + gt_labels=target['labels'].cpu().numpy() + + ground_truths.append(list(zip(gt_labels,gt_boxes))) + + + + + return outputs,ground_truths + + +def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.1): + global best_recall + outputs, ground_truths = valid_fn(valid_dataloader, model, device) + predictions = [] + + + for output in outputs: + valid_scores=output['scores']>score_threshold + + valid_boxes = output['boxes'][output['scores'] > score_threshold] + valid_labels=output['labels'][output['scores']> score_threshold] + + + predictions.append(list(zip(valid_labels,valid_boxes))) + + print(f'pred : {predictions[0:5]}\n') + print(f'gt : {ground_truths[0:5]}') + + # utils 모듈에 있는 calculate_metrics 함수를 사용 + metrics = utils.calculate_metrics(predictions, ground_truths) + + # 전체 성능 + total_recall=metrics['total']['recall'] + total_precision=metrics['total']['precision'] + total_f1_score= metrics['total']['f1_score'] + + #wandb.log({"epoch": epoch, "recall": metrics['recall']}) # Recall을 W&B에 로그합니다. + wandb.log({"epoch": epoch, "total_recall": total_recall, "total_precision": total_precision,"total_f1_score":total_f1_score}) + + categories={2: 'Porosity', 3: 'Slag'} + class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} + # 각 클래스별 성능 로그 + for class_label,class_metrics in metrics['per_class'].items(): + #class_label=class_label.item() + if class_label==2 or class_label==3: + + wandb.log({ + f"class_{categories[class_label]}_recall" : class_metrics['recall'], + f"class_{categories[class_label]}_precision" : class_metrics['precision'], + f"class_{categories[class_label]}_f1_score" : class_metrics['f1_score'], + f"class_{categories[class_label]}_average_iou": class_metrics['average_iou'], + + }) + if total_recall > best_recall: + best_recall = total_recall + + model_save_path = f"/content/drive/MyDrive/models/fasterrcnn_resnet50_fpnv2_{Config['TRAIN_BS']}_{Config['VALID_BS']}.pth" + torch.save({ + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'lr_scheduler_state_dict': lr_scheduler.state_dict() + }, model_save_path) + #wandb.save(model_save_path) # 모델 파일을 W&B에 저장합니다. + + return_outputs=metrics['total'] + return return_outputs,class_result + if __name__=='__main__': wandb.init(project='capstone',name='faster-rcnn',reinit=True) - + if torch.cuda.is_available(): device=torch.device('cuda') - - - train_df=pd.read_csv('../../data/annotations/train_total.csv') - valid_df=pd.read_csv('../../data/annotations/valid_total.csv') - image_dir='../../data_contrast/after/Image' + + + train_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/train_total.csv') + valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/valid_total.csv') + image_dir='/content/drive/MyDrive/data/Image' + + + result_dir_path=f'/content/drive/MyDrive/result/Faster-RCNN' + os.makedirs(result_dir_path,exist_ok=True) train_dataset=RT_Dataset(train_df,image_dir,transforms=get_transform(train=True)) valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False)) @@ -67,27 +172,32 @@ def get_object_detection_model(num_classes): shuffle=False, num_workers=Config['NUM_WORKERS'], collate_fn=utils.collate_fn, - + ) model=get_object_detection_model(Config['NUM_CLASSES']) model.to(device) - + wandb.watch(model) - + params = [p for p in model.parameters() if p.requires_grad] - + optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY']) - - lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1) - - + + lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9) + + best_recall=-100 + for epoch in range(Config['EPOCHS']): train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=10) - + lr_scheduler.step() - - evaluate(model, valid_dataloader, device=device) + + # valid data + return_outputs,class_result = validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler) + + print(f'epoch : {epoch}, output : {return_outputs}') + From 1f29e33f0fb90e8a654b3e3cce5cec3da906f060 Mon Sep 17 00:00:00 2001 From: junghye01 Date: Wed, 20 Dec 2023 20:44:46 +0000 Subject: [PATCH 05/10] delete unnecessary code --- faster-rcnn/augment.py | 20 --- faster-rcnn/coco_eval.py | 349 -------------------------------------- faster-rcnn/coco_utils.py | 251 --------------------------- faster-rcnn/dataset.py | 78 --------- faster-rcnn/engine.py | 111 ------------ faster-rcnn/test.ipynb | 48 ------ faster-rcnn/transforms.py | 50 ------ faster-rcnn/utils.py | 310 --------------------------------- 8 files changed, 1217 deletions(-) delete mode 100644 faster-rcnn/augment.py delete mode 100644 faster-rcnn/coco_eval.py delete mode 100644 faster-rcnn/coco_utils.py delete mode 100644 faster-rcnn/dataset.py delete mode 100644 faster-rcnn/engine.py delete mode 100644 faster-rcnn/test.ipynb delete mode 100644 faster-rcnn/transforms.py delete mode 100644 faster-rcnn/utils.py diff --git a/faster-rcnn/augment.py b/faster-rcnn/augment.py deleted file mode 100644 index c40b351..0000000 --- a/faster-rcnn/augment.py +++ /dev/null @@ -1,20 +0,0 @@ -# for image augmentations -import albumentations as A -from albumentations.pytorch.transforms import ToTensorV2 - - -def get_transform(train): - - if train: - return A.Compose([ - - A.HorizontalFlip(p=0.5), - A.RandomRotate90(p=0.5), - A.VerticalFlip(p=0.5), - # ToTensorV2 converts image to pytorch tensor without div by 255 - ToTensorV2(p=1.0) - ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) - else: - return A.Compose([ - ToTensorV2(p=1.0) - ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) \ No newline at end of file diff --git a/faster-rcnn/coco_eval.py b/faster-rcnn/coco_eval.py deleted file mode 100644 index 0771797..0000000 --- a/faster-rcnn/coco_eval.py +++ /dev/null @@ -1,349 +0,0 @@ -import json -import tempfile - -import numpy as np -import copy -import time -import torch -#import torch._six - -from pycocotools.cocoeval import COCOeval -from pycocotools.coco import COCO -import pycocotools.mask as mask_util - -from collections import defaultdict - -import utils - - -class CocoEvaluator(object): - def __init__(self, coco_gt, iou_types): - assert isinstance(iou_types, (list, tuple)) - coco_gt = copy.deepcopy(coco_gt) - self.coco_gt = coco_gt - - self.iou_types = iou_types - self.coco_eval = {} - for iou_type in iou_types: - self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) - - self.img_ids = [] - self.eval_imgs = {k: [] for k in iou_types} - - def update(self, predictions): - img_ids = list(np.unique(list(predictions.keys()))) - self.img_ids.extend(img_ids) - - for iou_type in self.iou_types: - results = self.prepare(predictions, iou_type) - coco_dt = loadRes(self.coco_gt, results) if results else COCO() - coco_eval = self.coco_eval[iou_type] - - coco_eval.cocoDt = coco_dt - coco_eval.params.imgIds = list(img_ids) - img_ids, eval_imgs = evaluate(coco_eval) - - self.eval_imgs[iou_type].append(eval_imgs) - - def synchronize_between_processes(self): - for iou_type in self.iou_types: - self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) - create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) - - def accumulate(self): - for coco_eval in self.coco_eval.values(): - coco_eval.accumulate() - - def summarize(self): - for iou_type, coco_eval in self.coco_eval.items(): - print("IoU metric: {}".format(iou_type)) - coco_eval.summarize() - - def prepare(self, predictions, iou_type): - if iou_type == "bbox": - return self.prepare_for_coco_detection(predictions) - elif iou_type == "segm": - return self.prepare_for_coco_segmentation(predictions) - elif iou_type == "keypoints": - return self.prepare_for_coco_keypoint(predictions) - else: - raise ValueError("Unknown iou type {}".format(iou_type)) - - def prepare_for_coco_detection(self, predictions): - coco_results = [] - for original_id, prediction in predictions.items(): - if len(prediction) == 0: - continue - - boxes = prediction["boxes"] - boxes = convert_to_xywh(boxes).tolist() - scores = prediction["scores"].tolist() - labels = prediction["labels"].tolist() - - coco_results.extend( - [ - { - "image_id": original_id, - "category_id": labels[k], - "bbox": box, - "score": scores[k], - } - for k, box in enumerate(boxes) - ] - ) - return coco_results - - def prepare_for_coco_segmentation(self, predictions): - coco_results = [] - for original_id, prediction in predictions.items(): - if len(prediction) == 0: - continue - - scores = prediction["scores"] - labels = prediction["labels"] - masks = prediction["masks"] - - masks = masks > 0.5 - - scores = prediction["scores"].tolist() - labels = prediction["labels"].tolist() - - rles = [ - mask_util.encode(np.array(mask[0, :, :, np.newaxis], order="F"))[0] - for mask in masks - ] - for rle in rles: - rle["counts"] = rle["counts"].decode("utf-8") - - coco_results.extend( - [ - { - "image_id": original_id, - "category_id": labels[k], - "segmentation": rle, - "score": scores[k], - } - for k, rle in enumerate(rles) - ] - ) - return coco_results - - def prepare_for_coco_keypoint(self, predictions): - coco_results = [] - for original_id, prediction in predictions.items(): - if len(prediction) == 0: - continue - - boxes = prediction["boxes"] - boxes = convert_to_xywh(boxes).tolist() - scores = prediction["scores"].tolist() - labels = prediction["labels"].tolist() - keypoints = prediction["keypoints"] - keypoints = keypoints.flatten(start_dim=1).tolist() - - coco_results.extend( - [ - { - "image_id": original_id, - "category_id": labels[k], - 'keypoints': keypoint, - "score": scores[k], - } - for k, keypoint in enumerate(keypoints) - ] - ) - return coco_results - - -def convert_to_xywh(boxes): - xmin, ymin, xmax, ymax = boxes.unbind(1) - return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) - - -def merge(img_ids, eval_imgs): - all_img_ids = utils.all_gather(img_ids) - all_eval_imgs = utils.all_gather(eval_imgs) - - merged_img_ids = [] - for p in all_img_ids: - merged_img_ids.extend(p) - - merged_eval_imgs = [] - for p in all_eval_imgs: - merged_eval_imgs.append(p) - - merged_img_ids = np.array(merged_img_ids) - merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) - - # keep only unique (and in sorted order) images - merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) - merged_eval_imgs = merged_eval_imgs[..., idx] - - return merged_img_ids, merged_eval_imgs - - -def create_common_coco_eval(coco_eval, img_ids, eval_imgs): - img_ids, eval_imgs = merge(img_ids, eval_imgs) - img_ids = list(img_ids) - eval_imgs = list(eval_imgs.flatten()) - - coco_eval.evalImgs = eval_imgs - coco_eval.params.imgIds = img_ids - coco_eval._paramsEval = copy.deepcopy(coco_eval.params) - - -################################################################# -# From pycocotools, just removed the prints and fixed -# a Python3 bug about unicode not defined -################################################################# - -# Ideally, pycocotools wouldn't have hard-coded prints -# so that we could avoid copy-pasting those two functions - -def createIndex(self): - # create index - # print('creating index...') - anns, cats, imgs = {}, {}, {} - imgToAnns, catToImgs = defaultdict(list), defaultdict(list) - if 'annotations' in self.dataset: - for ann in self.dataset['annotations']: - imgToAnns[ann['image_id']].append(ann) - anns[ann['id']] = ann - - if 'images' in self.dataset: - for img in self.dataset['images']: - imgs[img['id']] = img - - if 'categories' in self.dataset: - for cat in self.dataset['categories']: - cats[cat['id']] = cat - - if 'annotations' in self.dataset and 'categories' in self.dataset: - for ann in self.dataset['annotations']: - catToImgs[ann['category_id']].append(ann['image_id']) - - # print('index created!') - - # create class members - self.anns = anns - self.imgToAnns = imgToAnns - self.catToImgs = catToImgs - self.imgs = imgs - self.cats = cats - - -maskUtils = mask_util - - -def loadRes(self, resFile): - """ - Load result file and return a result api object. - :param resFile (str) : file name of result file - :return: res (obj) : result api object - """ - res = COCO() - res.dataset['images'] = [img for img in self.dataset['images']] - - # print('Loading and preparing results...') - # tic = time.time() - if isinstance(resFile, str): - anns = json.load(open(resFile)) - elif type(resFile) == np.ndarray: - anns = self.loadNumpyAnnotations(resFile) - else: - anns = resFile - assert type(anns) == list, 'results in not an array of objects' - annsImgIds = [ann['image_id'] for ann in anns] - assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ - 'Results do not correspond to current coco set' - if 'caption' in anns[0]: - imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) - res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] - for id, ann in enumerate(anns): - ann['id'] = id + 1 - elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: - res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) - for id, ann in enumerate(anns): - bb = ann['bbox'] - x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] - if 'segmentation' not in ann: - ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] - ann['area'] = bb[2] * bb[3] - ann['id'] = id + 1 - ann['iscrowd'] = 0 - elif 'segmentation' in anns[0]: - res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) - for id, ann in enumerate(anns): - # now only support compressed RLE format as segmentation results - ann['area'] = maskUtils.area(ann['segmentation']) - if 'bbox' not in ann: - ann['bbox'] = maskUtils.toBbox(ann['segmentation']) - ann['id'] = id + 1 - ann['iscrowd'] = 0 - elif 'keypoints' in anns[0]: - res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) - for id, ann in enumerate(anns): - s = ann['keypoints'] - x = s[0::3] - y = s[1::3] - x0, x1, y0, y1 = np.min(x), np.max(x), np.min(y), np.max(y) - ann['area'] = (x1 - x0) * (y1 - y0) - ann['id'] = id + 1 - ann['bbox'] = [x0, y0, x1 - x0, y1 - y0] - # print('DONE (t={:0.2f}s)'.format(time.time()- tic)) - - res.dataset['annotations'] = anns - createIndex(res) - return res - - -def evaluate(self): - ''' - Run per image evaluation on given images and store results (a list of dict) in self.evalImgs - :return: None - ''' - # tic = time.time() - # print('Running per image evaluation...') - p = self.params - # add backward compatibility if useSegm is specified in params - if p.useSegm is not None: - p.iouType = 'segm' if p.useSegm == 1 else 'bbox' - print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) - # print('Evaluate annotation type *{}*'.format(p.iouType)) - p.imgIds = list(np.unique(p.imgIds)) - if p.useCats: - p.catIds = list(np.unique(p.catIds)) - p.maxDets = sorted(p.maxDets) - self.params = p - - self._prepare() - # loop through images, area range, max detection number - catIds = p.catIds if p.useCats else [-1] - - if p.iouType == 'segm' or p.iouType == 'bbox': - computeIoU = self.computeIoU - elif p.iouType == 'keypoints': - computeIoU = self.computeOks - self.ious = { - (imgId, catId): computeIoU(imgId, catId) - for imgId in p.imgIds - for catId in catIds} - - evaluateImg = self.evaluateImg - maxDet = p.maxDets[-1] - evalImgs = [ - evaluateImg(imgId, catId, areaRng, maxDet) - for catId in catIds - for areaRng in p.areaRng - for imgId in p.imgIds - ] - # this is NOT in the pycocotools code, but could be done outside - evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) - self._paramsEval = copy.deepcopy(self.params) - # toc = time.time() - # print('DONE (t={:0.2f}s).'.format(toc-tic)) - return p.imgIds, evalImgs - -################################################################# -# end of straight copy from pycocotools, just removing the prints -################################################################# diff --git a/faster-rcnn/coco_utils.py b/faster-rcnn/coco_utils.py deleted file mode 100644 index 31946b5..0000000 --- a/faster-rcnn/coco_utils.py +++ /dev/null @@ -1,251 +0,0 @@ -import copy -import os -from PIL import Image - -import torch -import torch.utils.data -import torchvision - -from pycocotools import mask as coco_mask -from pycocotools.coco import COCO - -import transforms as T - - -class FilterAndRemapCocoCategories(object): - def __init__(self, categories, remap=True): - self.categories = categories - self.remap = remap - - def __call__(self, image, target): - anno = target["annotations"] - anno = [obj for obj in anno if obj["category_id"] in self.categories] - if not self.remap: - target["annotations"] = anno - return image, target - anno = copy.deepcopy(anno) - for obj in anno: - obj["category_id"] = self.categories.index(obj["category_id"]) - target["annotations"] = anno - return image, target - - -def convert_coco_poly_to_mask(segmentations, height, width): - masks = [] - for polygons in segmentations: - rles = coco_mask.frPyObjects(polygons, height, width) - mask = coco_mask.decode(rles) - if len(mask.shape) < 3: - mask = mask[..., None] - mask = torch.as_tensor(mask, dtype=torch.uint8) - mask = mask.any(dim=2) - masks.append(mask) - if masks: - masks = torch.stack(masks, dim=0) - else: - masks = torch.zeros((0, height, width), dtype=torch.uint8) - return masks - - -class ConvertCocoPolysToMask(object): - def __call__(self, image, target): - w, h = image.size - - image_id = target["image_id"] - image_id = torch.tensor([image_id]) - - anno = target["annotations"] - - anno = [obj for obj in anno if obj['iscrowd'] == 0] - - boxes = [obj["bbox"] for obj in anno] - # guard against no boxes via resizing - boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) - boxes[:, 2:] += boxes[:, :2] - boxes[:, 0::2].clamp_(min=0, max=w) - boxes[:, 1::2].clamp_(min=0, max=h) - - classes = [obj["category_id"] for obj in anno] - classes = torch.tensor(classes, dtype=torch.int64) - - segmentations = [obj["segmentation"] for obj in anno] - masks = convert_coco_poly_to_mask(segmentations, h, w) - - keypoints = None - if anno and "keypoints" in anno[0]: - keypoints = [obj["keypoints"] for obj in anno] - keypoints = torch.as_tensor(keypoints, dtype=torch.float32) - num_keypoints = keypoints.shape[0] - if num_keypoints: - keypoints = keypoints.view(num_keypoints, -1, 3) - - keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) - boxes = boxes[keep] - classes = classes[keep] - masks = masks[keep] - if keypoints is not None: - keypoints = keypoints[keep] - - target = {} - target["boxes"] = boxes - target["labels"] = classes - target["masks"] = masks - target["image_id"] = image_id - if keypoints is not None: - target["keypoints"] = keypoints - - # for conversion to coco api - area = torch.tensor([obj["area"] for obj in anno]) - iscrowd = torch.tensor([obj["iscrowd"] for obj in anno]) - target["area"] = area - target["iscrowd"] = iscrowd - - return image, target - - -def _coco_remove_images_without_annotations(dataset, cat_list=None): - def _has_only_empty_bbox(anno): - return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) - - def _count_visible_keypoints(anno): - return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) - - min_keypoints_per_image = 10 - - def _has_valid_annotation(anno): - # if it's empty, there is no annotation - if len(anno) == 0: - return False - # if all boxes have close to zero area, there is no annotation - if _has_only_empty_bbox(anno): - return False - # keypoints task have a slight different critera for considering - # if an annotation is valid - if "keypoints" not in anno[0]: - return True - # for keypoint detection tasks, only consider valid images those - # containing at least min_keypoints_per_image - if _count_visible_keypoints(anno) >= min_keypoints_per_image: - return True - return False - - assert isinstance(dataset, torchvision.datasets.CocoDetection) - ids = [] - for ds_idx, img_id in enumerate(dataset.ids): - ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) - anno = dataset.coco.loadAnns(ann_ids) - if cat_list: - anno = [obj for obj in anno if obj["category_id"] in cat_list] - if _has_valid_annotation(anno): - ids.append(ds_idx) - - dataset = torch.utils.data.Subset(dataset, ids) - return dataset - - -def convert_to_coco_api(ds): - coco_ds = COCO() - ann_id = 0 - dataset = {'images': [], 'categories': [], 'annotations': []} - categories = set() - for img_idx in range(len(ds)): - # find better way to get target - # targets = ds.get_annotations(img_idx) - img, targets = ds[img_idx] - image_id = targets["image_id"].item() - img_dict = {} - img_dict['id'] = image_id - img_dict['height'] = img.shape[-2] - img_dict['width'] = img.shape[-1] - dataset['images'].append(img_dict) - bboxes = targets["boxes"] - bboxes[:, 2:] -= bboxes[:, :2] - bboxes = bboxes.tolist() - labels = targets['labels'].tolist() - areas = targets['area'].tolist() - iscrowd = targets['iscrowd'].tolist() - if 'masks' in targets: - masks = targets['masks'] - # make masks Fortran contiguous for coco_mask - masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1) - if 'keypoints' in targets: - keypoints = targets['keypoints'] - keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist() - num_objs = len(bboxes) - for i in range(num_objs): - ann = {} - ann['image_id'] = image_id - ann['bbox'] = bboxes[i] - ann['category_id'] = labels[i] - categories.add(labels[i]) - ann['area'] = areas[i] - ann['iscrowd'] = iscrowd[i] - ann['id'] = ann_id - if 'masks' in targets: - ann["segmentation"] = coco_mask.encode(masks[i].numpy()) - if 'keypoints' in targets: - ann['keypoints'] = keypoints[i] - ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3]) - dataset['annotations'].append(ann) - ann_id += 1 - dataset['categories'] = [{'id': i} for i in sorted(categories)] - coco_ds.dataset = dataset - coco_ds.createIndex() - return coco_ds - - -def get_coco_api_from_dataset(dataset): - for i in range(10): - if isinstance(dataset, torchvision.datasets.CocoDetection): - break - if isinstance(dataset, torch.utils.data.Subset): - dataset = dataset.dataset - if isinstance(dataset, torchvision.datasets.CocoDetection): - return dataset.coco - return convert_to_coco_api(dataset) - - -class CocoDetection(torchvision.datasets.CocoDetection): - def __init__(self, img_folder, ann_file, transforms): - super(CocoDetection, self).__init__(img_folder, ann_file) - self._transforms = transforms - - def __getitem__(self, idx): - img, target = super(CocoDetection, self).__getitem__(idx) - image_id = self.ids[idx] - target = dict(image_id=image_id, annotations=target) - if self._transforms is not None: - img, target = self._transforms(img, target) - return img, target - - -def get_coco(root, image_set, transforms, mode='instances'): - anno_file_template = "{}_{}2017.json" - PATHS = { - "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))), - "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))), - # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))) - } - - t = [ConvertCocoPolysToMask()] - - if transforms is not None: - t.append(transforms) - transforms = T.Compose(t) - - img_folder, ann_file = PATHS[image_set] - img_folder = os.path.join(root, img_folder) - ann_file = os.path.join(root, ann_file) - - dataset = CocoDetection(img_folder, ann_file, transforms=transforms) - - if image_set == "train": - dataset = _coco_remove_images_without_annotations(dataset) - - # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)]) - - return dataset - - -def get_coco_kp(root, image_set, transforms): - return get_coco(root, image_set, transforms, mode="person_keypoints") diff --git a/faster-rcnn/dataset.py b/faster-rcnn/dataset.py deleted file mode 100644 index 7b27186..0000000 --- a/faster-rcnn/dataset.py +++ /dev/null @@ -1,78 +0,0 @@ -import torch -from torch.utils.data import DataLoader, Dataset -import cv2 -import numpy as np -from config import Config - -class RT_Dataset(Dataset): - def __init__(self,dataframe,image_dir,transforms=None): - super().__init__() - self.image_ids=dataframe['image_number'].unique() - self.df=dataframe - self.image_dir=image_dir - self.transforms=transforms - #self.classes=[_,'Others','Porosity','Slag'] - - - def __getitem__(self,index:int): - image_id=self.image_ids[index] - records=self.df[self.df['image_number']==image_id] - - image=cv2.imread(f'{self.image_dir}/{records["dataset"].values[0]}/{records["image_name"].values[0]}',cv2.IMREAD_COLOR) - image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB).astype(np.float32) - #image_res=cv2.resize(image_rgb,(Config['IMG_SIZE'],Config['IMG_SIZE']),cv2.INTER_AREA) - image /=255.0 - - #print(f'image : {type(image)}') - labels=[] - - wt,ht=image.shape[1],image.shape[0] - - bndboxes=list((eval(records['bndbox'].values[0]))) - labels=list(eval(records['labels'].values[0])) - - if len(bndboxes)>0: - boxes = [[box[0] , box[1], box[2], box[3]] for box in bndboxes] - labels=[int(label)+1 for label in labels] - boxes=torch.as_tensor(boxes,dtype=torch.float32) - area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) - labels = torch.as_tensor(labels, dtype=torch.int64) - - else: - boxes = torch.zeros((0,4),dtype=torch.float32) - labels = torch.zeros(0, dtype=torch.int64) # 더미 라벨 - area = torch.zeros(0, dtype=torch.float32) # 더미 면적 - - - #다 crowd x - iscrowd = torch.zeros((len(boxes),), dtype=torch.int64) - - - - target={} - target['boxes']=boxes - target['labels']=labels - target['area']=area - target['iscrowd']=iscrowd - target['image_id']=torch.tensor([image_id]) - - if self.transforms: - sample={ - 'image':image, - 'bboxes':target['boxes'], - 'labels':labels - } - - #target['boxes']=torch.Tensor(sample['bboxes']) - sample=self.transforms(**sample) - image=sample['image'] - - if len(sample['bboxes'])>0: - target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1,0) - else: - target['boxes']=torch.zeros((0,4),dtype=torch.float32) - - return image,target - - def __len__(self) -> int: - return self.image_ids.shape[0] \ No newline at end of file diff --git a/faster-rcnn/engine.py b/faster-rcnn/engine.py deleted file mode 100644 index bc50e60..0000000 --- a/faster-rcnn/engine.py +++ /dev/null @@ -1,111 +0,0 @@ -import math -import sys -import time -import torch - -import torchvision.models.detection.mask_rcnn - -from coco_utils import get_coco_api_from_dataset -from coco_eval import CocoEvaluator -import utils -import wandb - -def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): - model.train() - metric_logger = utils.MetricLogger(delimiter=" ") - metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) - header = 'Epoch: [{}]'.format(epoch) - - lr_scheduler = None - if epoch == 0: - warmup_factor = 1. / 1000 - warmup_iters = min(1000, len(data_loader) - 1) - - lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) - - for images, targets in metric_logger.log_every(data_loader, print_freq, header): - images = list(image.to(device) for image in images) - targets = [{k: v.to(device) for k, v in t.items()} for t in targets] - - loss_dict = model(images, targets) - - losses = sum(loss for loss in loss_dict.values()) - - # reduce losses over all GPUs for logging purposes - loss_dict_reduced = utils.reduce_dict(loss_dict) - losses_reduced = sum(loss for loss in loss_dict_reduced.values()) - - loss_value = losses_reduced.item() - - wandb.log({ - 'avg_train_loss': round(loss_value,4) - }) - if not math.isfinite(loss_value): - print("Loss is {}, stopping training".format(loss_value)) - print(loss_dict_reduced) - sys.exit(1) - - optimizer.zero_grad() - losses.backward() - optimizer.step() - - if lr_scheduler is not None: - lr_scheduler.step() - - metric_logger.update(loss=losses_reduced, **loss_dict_reduced) - metric_logger.update(lr=optimizer.param_groups[0]["lr"]) - - -def _get_iou_types(model): - model_without_ddp = model - if isinstance(model, torch.nn.parallel.DistributedDataParallel): - model_without_ddp = model.module - iou_types = ["bbox"] - if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN): - iou_types.append("segm") - if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN): - iou_types.append("keypoints") - return iou_types - - -@torch.no_grad() -def evaluate(model, data_loader, device): - n_threads = torch.get_num_threads() - # FIXME remove this and make paste_masks_in_image run on the GPU - torch.set_num_threads(1) - cpu_device = torch.device("cpu") - model.eval() - metric_logger = utils.MetricLogger(delimiter=" ") - header = 'Test:' - - coco = get_coco_api_from_dataset(data_loader.dataset) - iou_types = _get_iou_types(model) - coco_evaluator = CocoEvaluator(coco, iou_types) - - for image, targets in metric_logger.log_every(data_loader, 100, header): - image = list(img.to(device) for img in image) - targets = [{k: v.to(device) for k, v in t.items()} for t in targets] - - torch.cuda.synchronize() - model_time = time.time() - outputs = model(image) - - outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] - model_time = time.time() - model_time - - res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} - evaluator_time = time.time() - coco_evaluator.update(res) - evaluator_time = time.time() - evaluator_time - metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) - - # gather the stats from all processes - metric_logger.synchronize_between_processes() - print("Averaged stats:", metric_logger) - coco_evaluator.synchronize_between_processes() - - # accumulate predictions from all images - coco_evaluator.accumulate() - coco_evaluator.summarize() - torch.set_num_threads(n_threads) - return coco_evaluator diff --git a/faster-rcnn/test.ipynb b/faster-rcnn/test.ipynb deleted file mode 100644 index 4b49849..0000000 --- a/faster-rcnn/test.ipynb +++ /dev/null @@ -1,48 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([], shape=(0, 5), dtype=float64)" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "\n", - "\n", - "np.zeros((0,5))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "pixt", - "language": "python", - "name": "pixt" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/faster-rcnn/transforms.py b/faster-rcnn/transforms.py deleted file mode 100644 index 73efc92..0000000 --- a/faster-rcnn/transforms.py +++ /dev/null @@ -1,50 +0,0 @@ -import random -import torch - -from torchvision.transforms import functional as F - - -def _flip_coco_person_keypoints(kps, width): - flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] - flipped_data = kps[:, flip_inds] - flipped_data[..., 0] = width - flipped_data[..., 0] - # Maintain COCO convention that if visibility == 0, then x, y = 0 - inds = flipped_data[..., 2] == 0 - flipped_data[inds] = 0 - return flipped_data - - -class Compose(object): - def __init__(self, transforms): - self.transforms = transforms - - def __call__(self, image, target): - for t in self.transforms: - image, target = t(image, target) - return image, target - - -class RandomHorizontalFlip(object): - def __init__(self, prob): - self.prob = prob - - def __call__(self, image, target): - if random.random() < self.prob: - height, width = image.shape[-2:] - image = image.flip(-1) - bbox = target["boxes"] - bbox[:, [0, 2]] = width - bbox[:, [2, 0]] - target["boxes"] = bbox - if "masks" in target: - target["masks"] = target["masks"].flip(-1) - if "keypoints" in target: - keypoints = target["keypoints"] - keypoints = _flip_coco_person_keypoints(keypoints, width) - target["keypoints"] = keypoints - return image, target - - -class ToTensor(object): - def __call__(self, image, target): - image = F.to_tensor(image) - return image, target diff --git a/faster-rcnn/utils.py b/faster-rcnn/utils.py deleted file mode 100644 index 9a5b46b..0000000 --- a/faster-rcnn/utils.py +++ /dev/null @@ -1,310 +0,0 @@ -from __future__ import print_function - -from collections import defaultdict, deque -import datetime -import pickle -import time - -import torch -import torch.distributed as dist - -import errno -import os - - -class SmoothedValue(object): - """Track a series of values and provide access to smoothed values over a - window or the global series average. - """ - - def __init__(self, window_size=20, fmt=None): - if fmt is None: - fmt = "{median:.4f} ({global_avg:.4f})" - self.deque = deque(maxlen=window_size) - self.total = 0.0 - self.count = 0 - self.fmt = fmt - - def update(self, value, n=1): - self.deque.append(value) - self.count += n - self.total += value * n - - def synchronize_between_processes(self): - """ - Warning: does not synchronize the deque! - """ - if not is_dist_avail_and_initialized(): - return - t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') - dist.barrier() - dist.all_reduce(t) - t = t.tolist() - self.count = int(t[0]) - self.total = t[1] - - @property - def median(self): - d = torch.tensor(list(self.deque)) - return d.median().item() - - @property - def avg(self): - d = torch.tensor(list(self.deque), dtype=torch.float32) - return d.mean().item() - - @property - def global_avg(self): - return self.total / self.count - - @property - def max(self): - return max(self.deque) - - @property - def value(self): - return self.deque[-1] - - def __str__(self): - return self.fmt.format( - median=self.median, - avg=self.avg, - global_avg=self.global_avg, - max=self.max, - value=self.value) - - -def all_gather(data): - """ - Run all_gather on arbitrary picklable data (not necessarily tensors) - Args: - data: any picklable object - Returns: - list[data]: list of data gathered from each rank - """ - world_size = get_world_size() - if world_size == 1: - return [data] - - # serialized to a Tensor - buffer = pickle.dumps(data) - storage = torch.ByteStorage.from_buffer(buffer) - tensor = torch.ByteTensor(storage).to("cuda") - - # obtain Tensor size of each rank - local_size = torch.tensor([tensor.numel()], device="cuda") - size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] - dist.all_gather(size_list, local_size) - size_list = [int(size.item()) for size in size_list] - max_size = max(size_list) - - # receiving Tensor from all ranks - # we pad the tensor because torch all_gather does not support - # gathering tensors of different shapes - tensor_list = [] - for _ in size_list: - tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) - if local_size != max_size: - padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") - tensor = torch.cat((tensor, padding), dim=0) - dist.all_gather(tensor_list, tensor) - - data_list = [] - for size, tensor in zip(size_list, tensor_list): - buffer = tensor.cpu().numpy().tobytes()[:size] - data_list.append(pickle.loads(buffer)) - - return data_list - - -def reduce_dict(input_dict, average=True): - """ - Args: - input_dict (dict): all the values will be reduced - average (bool): whether to do average or sum - Reduce the values in the dictionary from all processes so that all processes - have the averaged results. Returns a dict with the same fields as - input_dict, after reduction. - """ - world_size = get_world_size() - if world_size < 2: - return input_dict - with torch.no_grad(): - names = [] - values = [] - # sort the keys so that they are consistent across processes - for k in sorted(input_dict.keys()): - names.append(k) - values.append(input_dict[k]) - values = torch.stack(values, dim=0) - dist.all_reduce(values) - if average: - values /= world_size - reduced_dict = {k: v for k, v in zip(names, values)} - return reduced_dict - - -class MetricLogger(object): - def __init__(self, delimiter="\t"): - self.meters = defaultdict(SmoothedValue) - self.delimiter = delimiter - - def update(self, **kwargs): - for k, v in kwargs.items(): - if isinstance(v, torch.Tensor): - v = v.item() - assert isinstance(v, (float, int)) - self.meters[k].update(v) - - def __getattr__(self, attr): - if attr in self.meters: - return self.meters[attr] - if attr in self.__dict__: - return self.__dict__[attr] - raise AttributeError("'{}' object has no attribute '{}'".format( - type(self).__name__, attr)) - - def __str__(self): - loss_str = [] - for name, meter in self.meters.items(): - loss_str.append( - "{}: {}".format(name, str(meter)) - ) - return self.delimiter.join(loss_str) - - def synchronize_between_processes(self): - for meter in self.meters.values(): - meter.synchronize_between_processes() - - def add_meter(self, name, meter): - self.meters[name] = meter - - def log_every(self, iterable, print_freq, header=None): - i = 0 - if not header: - header = '' - start_time = time.time() - end = time.time() - iter_time = SmoothedValue(fmt='{avg:.4f}') - data_time = SmoothedValue(fmt='{avg:.4f}') - space_fmt = ':' + str(len(str(len(iterable)))) + 'd' - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}', - 'max mem: {memory:.0f}' - ]) - MB = 1024.0 * 1024.0 - for obj in iterable: - data_time.update(time.time() - end) - yield obj - iter_time.update(time.time() - end) - if i % print_freq == 0 or i == len(iterable) - 1: - eta_seconds = iter_time.global_avg * (len(iterable) - i) - eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time), - memory=torch.cuda.max_memory_allocated() / MB)) - i += 1 - end = time.time() - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('{} Total time: {} ({:.4f} s / it)'.format( - header, total_time_str, total_time / len(iterable))) - - -def collate_fn(batch): - return tuple(zip(*batch)) - - -def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): - - def f(x): - if x >= warmup_iters: - return 1 - alpha = float(x) / warmup_iters - return warmup_factor * (1 - alpha) + alpha - - return torch.optim.lr_scheduler.LambdaLR(optimizer, f) - - -def mkdir(path): - try: - os.makedirs(path) - except OSError as e: - if e.errno != errno.EEXIST: - raise - - -def setup_for_distributed(is_master): - """ - This function disables printing when not in master process - """ - import builtins as __builtin__ - builtin_print = __builtin__.print - - def print(*args, **kwargs): - force = kwargs.pop('force', False) - if is_master or force: - builtin_print(*args, **kwargs) - - __builtin__.print = print - - -def is_dist_avail_and_initialized(): - if not dist.is_available(): - return False - if not dist.is_initialized(): - return False - return True - - -def get_world_size(): - if not is_dist_avail_and_initialized(): - return 1 - return dist.get_world_size() - - -def get_rank(): - if not is_dist_avail_and_initialized(): - return 0 - return dist.get_rank() - - -def is_main_process(): - return get_rank() == 0 - - -def save_on_master(*args, **kwargs): - if is_main_process(): - torch.save(*args, **kwargs) - - -def init_distributed_mode(args): - if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: - args.rank = int(os.environ["RANK"]) - args.world_size = int(os.environ['WORLD_SIZE']) - args.gpu = int(os.environ['LOCAL_RANK']) - elif 'SLURM_PROCID' in os.environ: - args.rank = int(os.environ['SLURM_PROCID']) - args.gpu = args.rank % torch.cuda.device_count() - else: - print('Not using distributed mode') - args.distributed = False - return - - args.distributed = True - - torch.cuda.set_device(args.gpu) - args.dist_backend = 'nccl' - print('| distributed init (rank {}): {}'.format( - args.rank, args.dist_url), flush=True) - torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - torch.distributed.barrier() - setup_for_distributed(args.rank == 0) From b9ebcfb96dead41bb3e421a09c89d4baab9e246c Mon Sep 17 00:00:00 2001 From: junghye01 Date: Wed, 20 Dec 2023 20:52:12 +0000 Subject: [PATCH 06/10] =?UTF-8?q?=EC=A3=BC=EC=84=9D=EC=A0=9C=EA=B1=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- faster-rcnn/inference.py | 14 +++++++------- faster-rcnn/train.py | 8 +++----- retinanet_/inference.py | 15 +++++++-------- ssd300/config.py | 1 + ssd300/inference.py | 18 +++++++++--------- ssd300/train.py | 28 ++++++++++++++-------------- 6 files changed, 41 insertions(+), 43 deletions(-) diff --git a/faster-rcnn/inference.py b/faster-rcnn/inference.py index a3b1982..9f689cd 100644 --- a/faster-rcnn/inference.py +++ b/faster-rcnn/inference.py @@ -56,20 +56,20 @@ def valid_fn(val_data_loader, model, device): output=model(images) - #print(f'target : {targets[0]}') + for out,target in zip(output,targets): scores=out['scores'].detach().cpu().numpy() boxes=out['boxes'].detach().cpu().numpy() labels=out['labels'].detach().cpu().numpy() - #keep_idx=nms(boxes,scores,iou_threshold=0.1) + keep_idx=nms(boxes,scores,iou_threshold=0.1) - #boxes=boxes[keep_idx] - #scores=scores[keep_idx] - #labels=labels[keep_idx] + boxes=boxes[keep_idx] + scores=scores[keep_idx] + labels=labels[keep_idx] - outputs.append({'boxes': boxes, # 2중 리스트일 수도 + outputs.append({'boxes': boxes, 'scores': scores, 'labels': labels}) @@ -79,7 +79,7 @@ def valid_fn(val_data_loader, model, device): ground_truths.append(list(zip(gt_labels,gt_boxes))) - #ground_truths.append(target['boxes'].cpu().numpy()) # 이중 리스트일 수도.. + return outputs,ground_truths diff --git a/faster-rcnn/train.py b/faster-rcnn/train.py index 7ab5530..8323510 100644 --- a/faster-rcnn/train.py +++ b/faster-rcnn/train.py @@ -53,7 +53,7 @@ def valid_fn(val_data_loader, model, device): output=model(images) - #print(f'target : {targets[0]}') + for out,target in zip(output,targets): scores=out['scores'].detach().cpu().numpy() boxes=out['boxes'].detach().cpu().numpy() @@ -108,14 +108,13 @@ def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimiz total_precision=metrics['total']['precision'] total_f1_score= metrics['total']['f1_score'] - #wandb.log({"epoch": epoch, "recall": metrics['recall']}) # Recall을 W&B에 로그합니다. wandb.log({"epoch": epoch, "total_recall": total_recall, "total_precision": total_precision,"total_f1_score":total_f1_score}) categories={2: 'Porosity', 3: 'Slag'} class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} # 각 클래스별 성능 로그 for class_label,class_metrics in metrics['per_class'].items(): - #class_label=class_label.item() + if class_label==2 or class_label==3: wandb.log({ @@ -134,8 +133,7 @@ def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimiz 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'lr_scheduler_state_dict': lr_scheduler.state_dict() - }, model_save_path) - #wandb.save(model_save_path) # 모델 파일을 W&B에 저장합니다. + },model_save_path) return_outputs=metrics['total'] return return_outputs,class_result diff --git a/retinanet_/inference.py b/retinanet_/inference.py index 045c2ea..ddf8176 100644 --- a/retinanet_/inference.py +++ b/retinanet_/inference.py @@ -57,20 +57,20 @@ def valid_fn(val_data_loader, model, device): output=model(images) - #print(f'target : {targets[0]}') + for out,target in zip(output,targets): scores=out['scores'].detach().cpu().numpy() boxes=out['boxes'].detach().cpu().numpy() labels=out['labels'].detach().cpu().numpy() - #keep_idx=nms(boxes,scores,iou_threshold=0.1) + keep_idx=nms(boxes,scores,iou_threshold=0.1) - #boxes=boxes[keep_idx] - #scores=scores[keep_idx] - #labels=labels[keep_idx] + boxes=boxes[keep_idx] + scores=scores[keep_idx] + labels=labels[keep_idx] - outputs.append({'boxes': boxes, # 2중 리스트일 수도 + outputs.append({'boxes': boxes, 'scores': scores, 'labels': labels}) @@ -80,7 +80,6 @@ def valid_fn(val_data_loader, model, device): ground_truths.append(list(zip(gt_labels,gt_boxes))) - #ground_truths.append(target['boxes'].cpu().numpy()) # 이중 리스트일 수도.. return outputs,ground_truths @@ -146,7 +145,7 @@ def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimiz model=torchvision.models.detection.retinanet_resnet50_fpn_v2(num_classes=4,pretrained=False,pretrained_backbone=True) - model_save_path = "/content/drive/MyDrive/models/fasterrcnn_resnet50_fpnv2_8_4.pth" + model_save_path = "/content/drive/MyDrive/models/retinanet_resnet50_fpnv2_8_4.pth" saved_state=torch.load(model_save_path,map_location=device) model.load_state_dict(saved_state['model_state_dict']) diff --git a/ssd300/config.py b/ssd300/config.py index 6e5ce74..0978d46 100644 --- a/ssd300/config.py +++ b/ssd300/config.py @@ -1,6 +1,7 @@ Config=dict( EPOCHS=150, LR=0.0001, + IMG_SIZE=512, DR_RATE=0.35, NUM_CLASSES=3, TRAIN_BS=4, diff --git a/ssd300/inference.py b/ssd300/inference.py index 14bc200..1399fe4 100644 --- a/ssd300/inference.py +++ b/ssd300/inference.py @@ -63,20 +63,20 @@ def valid_fn(val_data_loader, model, device): output=model(images) - #print(f'target : {targets[0]}') + for out,target in zip(output,targets): scores=out['scores'].detach().cpu().numpy() boxes=out['boxes'].detach().cpu().numpy() labels=out['labels'].detach().cpu().numpy() - #keep_idx=nms(boxes,scores,iou_threshold=0.1) + keep_idx=nms(boxes,scores,iou_threshold=0.1) - #boxes=boxes[keep_idx] - #scores=scores[keep_idx] - #labels=labels[keep_idx] + boxes=boxes[keep_idx] + scores=scores[keep_idx] + labels=labels[keep_idx] - outputs.append({'boxes': boxes, # 2중 리스트일 수도 + outputs.append({'boxes': boxes, 'scores': scores, 'labels': labels}) @@ -86,7 +86,7 @@ def valid_fn(val_data_loader, model, device): ground_truths.append(list(zip(gt_labels,gt_boxes))) - #ground_truths.append(target['boxes'].cpu().numpy()) # 이중 리스트일 수도.. + return outputs,ground_truths @@ -149,10 +149,10 @@ def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimiz ) - model=get_object_detection_model(Config['NUM_CLASSES']) + model=get_object_detection_model(Config['NUM_CLASSES'],Config['IMG_SIZE']) - model_save_path = "/content/drive/MyDrive/models/fasterrcnn_resnet50_fpnv2_8_4.pth" + model_save_path = "/content/drive/MyDrive/models/SSD300_8_4.pth" saved_state=torch.load(model_save_path,map_location=device) model.load_state_dict(saved_state['model_state_dict']) diff --git a/ssd300/train.py b/ssd300/train.py index 5cc7df7..fd95745 100644 --- a/ssd300/train.py +++ b/ssd300/train.py @@ -66,20 +66,20 @@ def valid_fn(val_data_loader, model, device): output=model(images) - #print(f'target : {targets[0]}') + for out,target in zip(output,targets): scores=out['scores'].detach().cpu().numpy() boxes=out['boxes'].detach().cpu().numpy() labels=out['labels'].detach().cpu().numpy() - # keep_idx=nms(boxes,scores,iou_threshold=0.1) + keep_idx=nms(boxes,scores,iou_threshold=0.1) - #boxes=boxes[keep_idx] - #scores=scores[keep_idx] - #labels=labels[keep_idx] - # label을 모두 int 형으로 변환 + boxes=boxes[keep_idx] + scores=scores[keep_idx] + labels=labels[keep_idx] + - outputs.append({'boxes': boxes, # 2중 리스트일 수도 + outputs.append({'boxes': boxes, 'scores': scores, 'labels': labels}) @@ -87,7 +87,7 @@ def valid_fn(val_data_loader, model, device): gt_boxes=target['boxes'].cpu().numpy() gt_labels=target['labels'].cpu().numpy() - #ground_truths.append(target['boxes'].cpu().numpy()) + ground_truths.append(list(zip(gt_labels,gt_boxes))) @@ -112,14 +112,14 @@ def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimiz total_precision=metrics['total']['precision'] total_f1_score= metrics['total']['f1_score'] - #wandb.log({"epoch": epoch, "recall": metrics['recall']}) # Recall을 W&B에 로그합니다. + wandb.log({"epoch": epoch, "total_recall": total_recall, "total_precision": total_precision,"total_f1_score":total_f1_score}) categories={2: 'Porosity', 3: 'Slag'} class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} # 각 클래스별 성능 로그 for class_label,class_metrics in metrics['per_class'].items(): - #class_label=class_label.item() + if class_label==2 or class_label==3: wandb.log({ @@ -139,7 +139,7 @@ def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimiz 'optimizer_state_dict': optimizer.state_dict(), 'lr_scheduler_state_dict': lr_scheduler.state_dict() }, model_save_path) - #wandb.save(model_save_path) # 모델 파일을 W&B에 저장합니다. + return_outputs=metrics['total'] return return_outputs,class_result @@ -158,8 +158,8 @@ def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimiz result_dir_path=f'/content/drive/MyDrive/result/{Config["MODEL"]}' os.makedirs(result_dir_path,exist_ok=True) - train_dataset=RT_Dataset(train_df,image_dir,transforms=get_ssd_transform(train=True)) - valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_ssd_transform(train=False)) + train_dataset=RT_Dataset(train_df,image_dir,transforms=get_transform(train=True)) + valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False)) train_dataloader=torch.utils.data.DataLoader( train_dataset, @@ -185,7 +185,7 @@ def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimiz params = [p for p in model.parameters() if p.requires_grad] - #optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY']) + optimizer = torch.optim.Adam(params, lr=Config['LR']) From 863c752fe0765ab5be14ddbd2a06d57e40e18bc6 Mon Sep 17 00:00:00 2001 From: Chaelsy-kim <80692512+Chaelsy-kim@users.noreply.github.com> Date: Thu, 21 Dec 2023 11:26:12 +0900 Subject: [PATCH 07/10] Modify inference.py --- efficientdet_/inference.py | 103 +++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 44 deletions(-) diff --git a/efficientdet_/inference.py b/efficientdet_/inference.py index a54d9e7..0a7f7db 100644 --- a/efficientdet_/inference.py +++ b/efficientdet_/inference.py @@ -67,7 +67,7 @@ def get_test_transform(): # Effdet config를 통해 모델 불러오기 + ckpt load def load_net(checkpoint_path, device): config = get_efficientdet_config('tf_efficientdet_d3') - config.num_classes = 3 + config.num_classes = 4 config.image_size = (512,512) config.soft_nms = False @@ -89,14 +89,25 @@ def load_net(checkpoint_path, device): def valid_fn(val_data_loader,test_ann, model, device): outputs = [] ground_truths=[] + labels=[] + for images, image_ids in tqdm(val_data_loader): # gpu 계산을 위해 image.to(device) + image_dir='./data_test/01. data/Image' + record=test_ann.iloc[image_ids] + image_name=record['image_name'] + image_path=os.path.join(image_dir,record['dataset']) + test=cv2.imread(os.path.join(image_path,image_name)) + h, w, c = test.shape + + a=images images = torch.stack(images) # bs, ch, w, h images = images.to(device).float() - print(image_ids) + output = model(images) + for out in output: - outputs.append({'boxes': out.detach().cpu().numpy()[:,:4], + outputs.append({'boxes': out.detach().cpu().numpy()[:,:4]*np.array([w,h,w,h])/512, 'scores': out.detach().cpu().numpy()[:,4], 'labels': out.detach().cpu().numpy()[:,-1]}) for image_id in image_ids: @@ -105,7 +116,7 @@ def valid_fn(val_data_loader,test_ann, model, device): ground_truths.append(boxes) - return outputs,ground_truths + return outputs,ground_truths,labels def collate_fn(batch): return tuple(zip(*batch)) @@ -137,59 +148,63 @@ def calculate_iou(box1, box2): return iou # 메트릭 계산 함수 -def calculate_metrics(predictions, ground_truths, iou_threshold=0.0001): +def calculate_metrics(predictions_boxes,predictions_labels, ground_truths,labels, iou_threshold=0.2): """ Calculate precision, recall, and IoU score for a set of predictions and ground truth boxes. """ - total_true_positives = 0 - total_false_positives = 0 - total_false_negatives = 0 - total_iou_score = 0 - - for pred_boxes, gt_boxes in zip(predictions, ground_truths): - matched = [False] * len(gt_boxes) - - for pred_box in pred_boxes: # 모든 prediction에 대해 + result=np.zeros((4,4)) + + for pred_boxes, pred_labels, gt_boxes,gt_labels in zip(predictions_boxes, predictions_labels, ground_truths, labels): + if len(pred_labels)==0: + pred_labels=np.append(pred_labels,3) + pred_boxes=np.append(pred_boxes,0) + + for pred_box,pred_label in zip(pred_boxes,pred_labels): # 모든 prediction에 대해 best_iou = 0 - best_match = None - - for i, gt_box in enumerate(gt_boxes): - iou = calculate_iou(pred_box, gt_box) - - if iou > best_iou: - best_iou = iou - best_match = i + for gt_box, gt_label in zip(gt_boxes,gt_labels): + if pred_label==3: + x=int(pred_label) + else: + iou = calculate_iou(pred_box, gt_box) + if iou > best_iou: + best_iou = iou + x=int(gt_label) + y=int(pred_label) - if best_iou > iou_threshold: - if not matched[best_match]: - total_true_positives += 1 - total_iou_score += best_iou - matched[best_match] = True + if best_iou > iou_threshold : + result[x][y]+=1 + else: - total_false_positives += 1 - - total_false_negatives += len(gt_boxes) - sum(matched) - - precision = total_true_positives / (total_true_positives + total_false_positives) if (total_true_positives + total_false_positives) != 0 else 0 - recall = total_true_positives / (total_true_positives + total_false_negatives) if (total_true_positives + total_false_negatives) != 0 else 0 - average_iou = total_iou_score / total_true_positives if total_true_positives != 0 else 0 - - return {'precision': precision, 'recall': recall, 'average_iou': average_iou} + if len(gt_boxes)==0: + result[3][int(pred_label)]+=1 + else: + result[x][3]+=1 + + print(result) + Porosity_precision = result[1][1]/sum(result[:,1]) + Porosity_recall = result[1][1]/sum(result[1,:]) + Porosity_F1_score = 2*Porosity_precision*Porosity_recall/(Porosity_precision+Porosity_recall) + slag_precision = result[2][2]/sum(result[:,2]) + slag_recall = result[2][2]/sum(result[2,:]) + slag_F1_score = 2*slag_precision*slag_recall/(slag_precision+slag_recall) + + return {'Porosity_precision': Porosity_precision, 'Porosity_recall': Porosity_recall, 'Porosity_F1_score': Porosity_F1_score, + 'slag_precision': slag_precision, 'slag_recall': slag_recall, 'slag_F1_score': slag_F1_score} def main(): - test_ann_dir='.annotations/test_total.csv' - image_dir='/home/irteam/junghye-dcloud-dir/pathfinder/data/Image' + test_ann_dir='./data_test/01. data/annotations_v2/test_total.csv' + image_dir='./data_test/01. data/Image' test_dataset=RT_dataset(image_dir,test_ann_dir,transforms=get_test_transform()) test_ann=pd.read_csv(test_ann_dir) #epoch=50 checkpoint_path=f'/home/irteam/junghye-dcloud-dir/pathfinder/models/effdet_best_loss_modifiedann.pth' - score_threshold=0.01 # score : 모델이 해당 객체를 올바르게 감지했다고 확신하는 정도 + score_threshold=0.2 # score : 모델이 해당 객체를 올바르게 감지했다고 확신하는 정도 test_data_loader=DataLoader( test_dataset, - batch_size=2, + batch_size=1, shuffle=False, - num_workers=4, + num_workers=0, collate_fn=collate_fn ) @@ -198,7 +213,7 @@ def main(): model=load_net(checkpoint_path,device) - outputs,ground_truths=valid_fn(test_data_loader,test_ann,model,device) + outputs,gt_boxes,gt_labels=valid_fn(test_data_loader,test_ann,model,device) # calculate precision, recall, average_iou scores # 테스트 데이터에서 상위 점수를 가진 bounding box만 선택 @@ -208,8 +223,8 @@ def main(): predictions.append(valid_boxes) # calculate precision, recall, average_iou scores - metrics = calculate_metrics(predictions, ground_truths) + metrics = calculate_metrics(predictions_boxes,predictions_labels,gt_boxes,gt_labels) print(metrics) if __name__ == "__main__": - main() \ No newline at end of file + main() From 8046fe069a580ee24296817a28d78696637d6a33 Mon Sep 17 00:00:00 2001 From: Chaelsy-kim <80692512+Chaelsy-kim@users.noreply.github.com> Date: Thu, 21 Dec 2023 11:27:30 +0900 Subject: [PATCH 08/10] Modify inference.py --- efficientdet_/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/efficientdet_/inference.py b/efficientdet_/inference.py index 0a7f7db..df2af34 100644 --- a/efficientdet_/inference.py +++ b/efficientdet_/inference.py @@ -198,7 +198,7 @@ def main(): test_dataset=RT_dataset(image_dir,test_ann_dir,transforms=get_test_transform()) test_ann=pd.read_csv(test_ann_dir) #epoch=50 - checkpoint_path=f'/home/irteam/junghye-dcloud-dir/pathfinder/models/effdet_best_loss_modifiedann.pth' + checkpoint_path=f'./models/effdet_best_loss_modifiedann.pth' score_threshold=0.2 # score : 모델이 해당 객체를 올바르게 감지했다고 확신하는 정도 test_data_loader=DataLoader( test_dataset, From 1d240e6eef2742f7cf674e155cd46ec87f393a05 Mon Sep 17 00:00:00 2001 From: Chaelsy-kim <80692512+Chaelsy-kim@users.noreply.github.com> Date: Thu, 21 Dec 2023 11:28:41 +0900 Subject: [PATCH 09/10] Modify model.py --- efficientdet_/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/efficientdet_/model.py b/efficientdet_/model.py index 37c80cd..22eb068 100644 --- a/efficientdet_/model.py +++ b/efficientdet_/model.py @@ -5,7 +5,7 @@ def get_net(checkpoint_path=None): config = get_efficientdet_config('tf_efficientdet_d3') - config.num_classes = 3 + config.num_classes = 4 config.image_size = (512,512) config.soft_nms = False @@ -18,4 +18,4 @@ def get_net(checkpoint_path=None): checkpoint = torch.load(checkpoint_path) net.load_state_dict(checkpoint['model_state_dict']) - return DetBenchTrain(net) \ No newline at end of file + return DetBenchTrain(net) From 908cb6da8c2b494f966382df3a55744f774c53a0 Mon Sep 17 00:00:00 2001 From: Chaelsy-kim <80692512+Chaelsy-kim@users.noreply.github.com> Date: Fri, 5 Apr 2024 13:50:12 +0900 Subject: [PATCH 10/10] Update config.py --- efficientdet_/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/efficientdet_/config.py b/efficientdet_/config.py index 9d52298..b0d2433 100644 --- a/efficientdet_/config.py +++ b/efficientdet_/config.py @@ -1,9 +1,9 @@ num_epochs = 150 -batch_size=4 +batch_size=16 -num_workers=4 +num_workers=0 lr=0.005