diff --git a/efficientdet_/config.py b/efficientdet_/config.py index 9d52298..b0d2433 100644 --- a/efficientdet_/config.py +++ b/efficientdet_/config.py @@ -1,9 +1,9 @@ num_epochs = 150 -batch_size=4 +batch_size=16 -num_workers=4 +num_workers=0 lr=0.005 diff --git a/efficientdet_/inference.py b/efficientdet_/inference.py index a54d9e7..df2af34 100644 --- a/efficientdet_/inference.py +++ b/efficientdet_/inference.py @@ -67,7 +67,7 @@ def get_test_transform(): # Effdet config를 통해 모델 불러오기 + ckpt load def load_net(checkpoint_path, device): config = get_efficientdet_config('tf_efficientdet_d3') - config.num_classes = 3 + config.num_classes = 4 config.image_size = (512,512) config.soft_nms = False @@ -89,14 +89,25 @@ def load_net(checkpoint_path, device): def valid_fn(val_data_loader,test_ann, model, device): outputs = [] ground_truths=[] + labels=[] + for images, image_ids in tqdm(val_data_loader): # gpu 계산을 위해 image.to(device) + image_dir='./data_test/01. data/Image' + record=test_ann.iloc[image_ids] + image_name=record['image_name'] + image_path=os.path.join(image_dir,record['dataset']) + test=cv2.imread(os.path.join(image_path,image_name)) + h, w, c = test.shape + + a=images images = torch.stack(images) # bs, ch, w, h images = images.to(device).float() - print(image_ids) + output = model(images) + for out in output: - outputs.append({'boxes': out.detach().cpu().numpy()[:,:4], + outputs.append({'boxes': out.detach().cpu().numpy()[:,:4]*np.array([w,h,w,h])/512, 'scores': out.detach().cpu().numpy()[:,4], 'labels': out.detach().cpu().numpy()[:,-1]}) for image_id in image_ids: @@ -105,7 +116,7 @@ def valid_fn(val_data_loader,test_ann, model, device): ground_truths.append(boxes) - return outputs,ground_truths + return outputs,ground_truths,labels def collate_fn(batch): return tuple(zip(*batch)) @@ -137,59 +148,63 @@ def calculate_iou(box1, box2): return iou # 메트릭 계산 함수 -def calculate_metrics(predictions, ground_truths, iou_threshold=0.0001): +def calculate_metrics(predictions_boxes,predictions_labels, ground_truths,labels, iou_threshold=0.2): """ Calculate precision, recall, and IoU score for a set of predictions and ground truth boxes. """ - total_true_positives = 0 - total_false_positives = 0 - total_false_negatives = 0 - total_iou_score = 0 - - for pred_boxes, gt_boxes in zip(predictions, ground_truths): - matched = [False] * len(gt_boxes) - - for pred_box in pred_boxes: # 모든 prediction에 대해 + result=np.zeros((4,4)) + + for pred_boxes, pred_labels, gt_boxes,gt_labels in zip(predictions_boxes, predictions_labels, ground_truths, labels): + if len(pred_labels)==0: + pred_labels=np.append(pred_labels,3) + pred_boxes=np.append(pred_boxes,0) + + for pred_box,pred_label in zip(pred_boxes,pred_labels): # 모든 prediction에 대해 best_iou = 0 - best_match = None - - for i, gt_box in enumerate(gt_boxes): - iou = calculate_iou(pred_box, gt_box) - - if iou > best_iou: - best_iou = iou - best_match = i + for gt_box, gt_label in zip(gt_boxes,gt_labels): + if pred_label==3: + x=int(pred_label) + else: + iou = calculate_iou(pred_box, gt_box) + if iou > best_iou: + best_iou = iou + x=int(gt_label) + y=int(pred_label) - if best_iou > iou_threshold: - if not matched[best_match]: - total_true_positives += 1 - total_iou_score += best_iou - matched[best_match] = True + if best_iou > iou_threshold : + result[x][y]+=1 + else: - total_false_positives += 1 - - total_false_negatives += len(gt_boxes) - sum(matched) - - precision = total_true_positives / (total_true_positives + total_false_positives) if (total_true_positives + total_false_positives) != 0 else 0 - recall = total_true_positives / (total_true_positives + total_false_negatives) if (total_true_positives + total_false_negatives) != 0 else 0 - average_iou = total_iou_score / total_true_positives if total_true_positives != 0 else 0 - - return {'precision': precision, 'recall': recall, 'average_iou': average_iou} + if len(gt_boxes)==0: + result[3][int(pred_label)]+=1 + else: + result[x][3]+=1 + + print(result) + Porosity_precision = result[1][1]/sum(result[:,1]) + Porosity_recall = result[1][1]/sum(result[1,:]) + Porosity_F1_score = 2*Porosity_precision*Porosity_recall/(Porosity_precision+Porosity_recall) + slag_precision = result[2][2]/sum(result[:,2]) + slag_recall = result[2][2]/sum(result[2,:]) + slag_F1_score = 2*slag_precision*slag_recall/(slag_precision+slag_recall) + + return {'Porosity_precision': Porosity_precision, 'Porosity_recall': Porosity_recall, 'Porosity_F1_score': Porosity_F1_score, + 'slag_precision': slag_precision, 'slag_recall': slag_recall, 'slag_F1_score': slag_F1_score} def main(): - test_ann_dir='.annotations/test_total.csv' - image_dir='/home/irteam/junghye-dcloud-dir/pathfinder/data/Image' + test_ann_dir='./data_test/01. data/annotations_v2/test_total.csv' + image_dir='./data_test/01. data/Image' test_dataset=RT_dataset(image_dir,test_ann_dir,transforms=get_test_transform()) test_ann=pd.read_csv(test_ann_dir) #epoch=50 - checkpoint_path=f'/home/irteam/junghye-dcloud-dir/pathfinder/models/effdet_best_loss_modifiedann.pth' - score_threshold=0.01 # score : 모델이 해당 객체를 올바르게 감지했다고 확신하는 정도 + checkpoint_path=f'./models/effdet_best_loss_modifiedann.pth' + score_threshold=0.2 # score : 모델이 해당 객체를 올바르게 감지했다고 확신하는 정도 test_data_loader=DataLoader( test_dataset, - batch_size=2, + batch_size=1, shuffle=False, - num_workers=4, + num_workers=0, collate_fn=collate_fn ) @@ -198,7 +213,7 @@ def main(): model=load_net(checkpoint_path,device) - outputs,ground_truths=valid_fn(test_data_loader,test_ann,model,device) + outputs,gt_boxes,gt_labels=valid_fn(test_data_loader,test_ann,model,device) # calculate precision, recall, average_iou scores # 테스트 데이터에서 상위 점수를 가진 bounding box만 선택 @@ -208,8 +223,8 @@ def main(): predictions.append(valid_boxes) # calculate precision, recall, average_iou scores - metrics = calculate_metrics(predictions, ground_truths) + metrics = calculate_metrics(predictions_boxes,predictions_labels,gt_boxes,gt_labels) print(metrics) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/efficientdet_/model.py b/efficientdet_/model.py index 37c80cd..22eb068 100644 --- a/efficientdet_/model.py +++ b/efficientdet_/model.py @@ -5,7 +5,7 @@ def get_net(checkpoint_path=None): config = get_efficientdet_config('tf_efficientdet_d3') - config.num_classes = 3 + config.num_classes = 4 config.image_size = (512,512) config.soft_nms = False @@ -18,4 +18,4 @@ def get_net(checkpoint_path=None): checkpoint = torch.load(checkpoint_path) net.load_state_dict(checkpoint['model_state_dict']) - return DetBenchTrain(net) \ No newline at end of file + return DetBenchTrain(net) diff --git a/faster-rcnn/config.py b/faster-rcnn/config.py index 91297a3..6a49ccb 100644 --- a/faster-rcnn/config.py +++ b/faster-rcnn/config.py @@ -1,13 +1,13 @@ Config=dict( - EPOCHS=150, - LR=2e-4, - IMG_SIZE=224, + EPOCHS=50, + LR=5e-4, + IMG_SIZE=640, DR_RATE=0.35, NUM_CLASSES=4, - TRAIN_BS=4, - VALID_BS=2, + TRAIN_BS=8, + VALID_BS=4, NUM_WORKERS=4, WEIGHT_DECAY=0.0005, - CONTRAST='AFTER' + CONTRAST='AFTER', ) \ No newline at end of file diff --git a/faster-rcnn/inference.py b/faster-rcnn/inference.py new file mode 100644 index 0000000..9f689cd --- /dev/null +++ b/faster-rcnn/inference.py @@ -0,0 +1,166 @@ +import sys +sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils') +import os +import numpy as np +import pandas as pd + +# for ignoring warnings +import warnings +warnings.filterwarnings('ignore') + +import cv2 + +import torch +import torchvision +from torchvision import transforms as torchtrans +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.ops import nms + + +from tqdm import tqdm +import utils +import transforms as T +from dataset import RT_Dataset +from config import Config +from augment import get_transform + +from torchvision.models.detection.ssd import SSDClassificationHead +from torchvision.models.detection import _utils +from torchvision.models.detection import SSD300_VGG16_Weights + + + +# model +def get_object_detection_model(num_classes): + + # load a model pre-trained pre-trained on COCO + model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained=True) + + # get number of input features for the classifier + in_features = model.roi_heads.box_predictor.cls_score.in_features + # replace the pre-trained head with a new one + model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) + + return model + +# valid function +def valid_fn(val_data_loader, model, device): + model.eval() + outputs = [] + ground_truths=[] + + for images,targets in tqdm(val_data_loader): + + images=list(img.to(device) for img in images) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + output=model(images) + + + for out,target in zip(output,targets): + scores=out['scores'].detach().cpu().numpy() + boxes=out['boxes'].detach().cpu().numpy() + labels=out['labels'].detach().cpu().numpy() + + keep_idx=nms(boxes,scores,iou_threshold=0.1) + + boxes=boxes[keep_idx] + scores=scores[keep_idx] + labels=labels[keep_idx] + + + outputs.append({'boxes': boxes, + 'scores': scores, + 'labels': labels}) + + # ground truth 에 label 추가 + gt_boxes=target['boxes'].cpu().numpy() + gt_labels=target['labels'].cpu().numpy() + + ground_truths.append(list(zip(gt_labels,gt_boxes))) + + + + + return outputs,ground_truths + + +def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.2): + global best_recall + outputs, ground_truths = valid_fn(valid_dataloader, model, device) + predictions = [] + + + for output in outputs: + valid_scores=output['scores']>score_threshold + + valid_boxes = output['boxes'][output['scores'] > score_threshold] + valid_labels=output['labels'][output['scores']> score_threshold] + + + predictions.append(list(zip(valid_labels,valid_boxes))) + + + + # utils 모듈에 있는 calculate_metrics 함수를 사용 + metrics = utils.calculate_metrics(predictions, ground_truths) + + # 전체 성능 + total_recall=metrics['total']['recall'] + total_precision=metrics['total']['precision'] + total_f1_score= metrics['total']['f1_score'] + + class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} + + return_outputs=metrics['total'] + return return_outputs,class_result + +if __name__=='__main__': + + + if torch.cuda.is_available(): + device=torch.device('cuda') + + + valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/test_total.csv') + image_dir='/content/drive/MyDrive/data/Image' + + + + + valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False)) + + + + valid_dataloader=torch.utils.data.DataLoader( + valid_dataset, + batch_size=Config['VALID_BS'], + shuffle=False, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, + + ) + + + model=get_object_detection_model(Config['NUM_CLASSES']) + + + model_save_path = "/content/drive/MyDrive/models/fasterrcnn_resnet50_fpnv2_8_4.pth" + saved_state=torch.load(model_save_path,map_location=device) + + model.load_state_dict(saved_state['model_state_dict']) + + model.to(device) + + + + params = [p for p in model.parameters() if p.requires_grad] + + optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY']) + + lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9) + + # valid data + return_outputs,class_result = validate_and_save_best_model(0, model, valid_dataloader, device, optimizer, lr_scheduler) + + print(f'test Total result : {return_outputs} , class result : {class_result} ') \ No newline at end of file diff --git a/faster-rcnn/test.ipynb b/faster-rcnn/test.ipynb deleted file mode 100644 index 4b49849..0000000 --- a/faster-rcnn/test.ipynb +++ /dev/null @@ -1,48 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([], shape=(0, 5), dtype=float64)" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "\n", - "\n", - "np.zeros((0,5))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "pixt", - "language": "python", - "name": "pixt" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/faster-rcnn/train.py b/faster-rcnn/train.py index 24c702a..8323510 100644 --- a/faster-rcnn/train.py +++ b/faster-rcnn/train.py @@ -1,3 +1,5 @@ +import sys +sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils') import os import numpy as np import pandas as pd @@ -10,10 +12,12 @@ import torch import torchvision -from torchvision import transforms as torchtrans +from torchvision import transforms as torchtrans from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.ops import nms from engine import train_one_epoch,evaluate +from tqdm import tqdm import utils import transforms as T from dataset import RT_Dataset @@ -22,33 +26,132 @@ import wandb -# model - def get_object_detection_model(num_classes): # load a model pre-trained pre-trained on COCO - model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) - + model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained=True) + # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one - model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) + model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) return model - +# valid function +def valid_fn(val_data_loader, model, device): + model.eval() + outputs = [] + ground_truths=[] + + for images,targets in tqdm(val_data_loader): + + images=list(img.to(device) for img in images) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + output=model(images) + + + for out,target in zip(output,targets): + scores=out['scores'].detach().cpu().numpy() + boxes=out['boxes'].detach().cpu().numpy() + labels=out['labels'].detach().cpu().numpy() + + keep_idx=nms(boxes,scores,iou_threshold=0.1) + + boxes=boxes[keep_idx] + scores=scores[keep_idx] + labels=labels[keep_idx] + + + outputs.append({'boxes': boxes, + 'scores': scores, + 'labels': labels}) + + # ground truth 에 label 추가 + gt_boxes=target['boxes'].cpu().numpy() + gt_labels=target['labels'].cpu().numpy() + + ground_truths.append(list(zip(gt_labels,gt_boxes))) + + + + + return outputs,ground_truths + + +def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.1): + global best_recall + outputs, ground_truths = valid_fn(valid_dataloader, model, device) + predictions = [] + + + for output in outputs: + valid_scores=output['scores']>score_threshold + + valid_boxes = output['boxes'][output['scores'] > score_threshold] + valid_labels=output['labels'][output['scores']> score_threshold] + + + predictions.append(list(zip(valid_labels,valid_boxes))) + + print(f'pred : {predictions[0:5]}\n') + print(f'gt : {ground_truths[0:5]}') + + # utils 모듈에 있는 calculate_metrics 함수를 사용 + metrics = utils.calculate_metrics(predictions, ground_truths) + + # 전체 성능 + total_recall=metrics['total']['recall'] + total_precision=metrics['total']['precision'] + total_f1_score= metrics['total']['f1_score'] + + wandb.log({"epoch": epoch, "total_recall": total_recall, "total_precision": total_precision,"total_f1_score":total_f1_score}) + + categories={2: 'Porosity', 3: 'Slag'} + class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} + # 각 클래스별 성능 로그 + for class_label,class_metrics in metrics['per_class'].items(): + + if class_label==2 or class_label==3: + + wandb.log({ + f"class_{categories[class_label]}_recall" : class_metrics['recall'], + f"class_{categories[class_label]}_precision" : class_metrics['precision'], + f"class_{categories[class_label]}_f1_score" : class_metrics['f1_score'], + f"class_{categories[class_label]}_average_iou": class_metrics['average_iou'], + + }) + if total_recall > best_recall: + best_recall = total_recall + + model_save_path = f"/content/drive/MyDrive/models/fasterrcnn_resnet50_fpnv2_{Config['TRAIN_BS']}_{Config['VALID_BS']}.pth" + torch.save({ + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'lr_scheduler_state_dict': lr_scheduler.state_dict() + },model_save_path) + + return_outputs=metrics['total'] + return return_outputs,class_result + if __name__=='__main__': wandb.init(project='capstone',name='faster-rcnn',reinit=True) - + if torch.cuda.is_available(): device=torch.device('cuda') - - - train_df=pd.read_csv('../../data/annotations/train_total.csv') - valid_df=pd.read_csv('../../data/annotations/valid_total.csv') - image_dir='../../data_contrast/after/Image' + + + train_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/train_total.csv') + valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/valid_total.csv') + image_dir='/content/drive/MyDrive/data/Image' + + + result_dir_path=f'/content/drive/MyDrive/result/Faster-RCNN' + os.makedirs(result_dir_path,exist_ok=True) train_dataset=RT_Dataset(train_df,image_dir,transforms=get_transform(train=True)) valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False)) @@ -67,27 +170,32 @@ def get_object_detection_model(num_classes): shuffle=False, num_workers=Config['NUM_WORKERS'], collate_fn=utils.collate_fn, - + ) model=get_object_detection_model(Config['NUM_CLASSES']) model.to(device) - + wandb.watch(model) - + params = [p for p in model.parameters() if p.requires_grad] - + optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY']) - - lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1) - - + + lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9) + + best_recall=-100 + for epoch in range(Config['EPOCHS']): train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=10) - + lr_scheduler.step() - - evaluate(model, valid_dataloader, device=device) + + # valid data + return_outputs,class_result = validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler) + + print(f'epoch : {epoch}, output : {return_outputs}') + diff --git a/retinanet_/dataset.py b/retinanet_/dataset.py deleted file mode 100644 index 13513d8..0000000 --- a/retinanet_/dataset.py +++ /dev/null @@ -1,39 +0,0 @@ -import torch -from torch.utils.data import DataLoader, Dataset -import cv2 -import numpy as np - - -class RT_Dataset(Dataset): - def __init__(self,dataframe,image_dir,transforms=None): - super().__init__() - self.image_ids=dataframe['image_number'].unique() - self.df=dataframe - self.image_dir=image_dir - self.transforms=transforms - - def __getitem__(self,index:int): - image_id=self.image_ids[index] - records=self.df[self.df['image_number']==image_id] - - image=cv2.imread(f'{self.image_dir}/{records["dataset"].values[0]}/{records["image_name"].values[0]}',cv2.IMREAD_COLOR) - image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB).astype(np.float32) - image/=255.0 - - bndbox=np.array(eval(records['bndbox'].values[0])) - labels=np.array(eval(records['labels'].values[0])) - boxes=np.zeros((bndbox.shape[0],5)) - - if bndbox.size>0: - boxes[:,0:4]=bndbox - boxes[:,4]=labels - - sample={'img':image, 'annot':boxes} - - if self.transforms: - sample=self.transforms(sample) - - return sample - - def __len__(self) -> int: - return self.image_ids.shape[0] \ No newline at end of file diff --git a/retinanet_/inference.py b/retinanet_/inference.py new file mode 100644 index 0000000..ddf8176 --- /dev/null +++ b/retinanet_/inference.py @@ -0,0 +1,166 @@ +import sys +sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils') +import os +import numpy as np +import pandas as pd + +# for ignoring warnings +import warnings +warnings.filterwarnings('ignore') + +import cv2 + +import torch +import torchvision +from torchvision import transforms as torchtrans +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.ops import nms + + +from tqdm import tqdm +import utils +import transforms as T +from dataset import RT_Dataset +from config import Config +from augment import get_transform + +from torchvision.models.detection.ssd import SSDClassificationHead +from torchvision.models.detection import _utils +from torchvision.models.detection import SSD300_VGG16_Weights + + + + +# model +def get_object_detection_model(num_classes): + + # load a model pre-trained pre-trained on COCO + model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained=True) + + # get number of input features for the classifier + in_features = model.roi_heads.box_predictor.cls_score.in_features + # replace the pre-trained head with a new one + model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) + + return model + +# valid function +def valid_fn(val_data_loader, model, device): + model.eval() + outputs = [] + ground_truths=[] + + for images,targets in tqdm(val_data_loader): + + images=list(img.to(device) for img in images) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + output=model(images) + + + for out,target in zip(output,targets): + scores=out['scores'].detach().cpu().numpy() + boxes=out['boxes'].detach().cpu().numpy() + labels=out['labels'].detach().cpu().numpy() + + keep_idx=nms(boxes,scores,iou_threshold=0.1) + + boxes=boxes[keep_idx] + scores=scores[keep_idx] + labels=labels[keep_idx] + + + outputs.append({'boxes': boxes, + 'scores': scores, + 'labels': labels}) + + # ground truth 에 label 추가 + gt_boxes=target['boxes'].cpu().numpy() + gt_labels=target['labels'].cpu().numpy() + + ground_truths.append(list(zip(gt_labels,gt_boxes))) + + + + return outputs,ground_truths + + +def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.2): + global best_recall + outputs, ground_truths = valid_fn(valid_dataloader, model, device) + predictions = [] + + + for output in outputs: + valid_scores=output['scores']>score_threshold + + valid_boxes = output['boxes'][output['scores'] > score_threshold] + valid_labels=output['labels'][output['scores']> score_threshold] + + + predictions.append(list(zip(valid_labels,valid_boxes))) + + + + # utils 모듈에 있는 calculate_metrics 함수를 사용 + metrics = utils.calculate_metrics(predictions, ground_truths) + + # 전체 성능 + total_recall=metrics['total']['recall'] + total_precision=metrics['total']['precision'] + total_f1_score= metrics['total']['f1_score'] + + class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} + + return_outputs=metrics['total'] + return return_outputs,class_result + +if __name__=='__main__': + + + if torch.cuda.is_available(): + device=torch.device('cuda') + + + valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/test_total.csv') + image_dir='/content/drive/MyDrive/data/Image' + + + + + valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False)) + + + + valid_dataloader=torch.utils.data.DataLoader( + valid_dataset, + batch_size=Config['VALID_BS'], + shuffle=False, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, + + ) + + + model=torchvision.models.detection.retinanet_resnet50_fpn_v2(num_classes=4,pretrained=False,pretrained_backbone=True) + + + model_save_path = "/content/drive/MyDrive/models/retinanet_resnet50_fpnv2_8_4.pth" + saved_state=torch.load(model_save_path,map_location=device) + + model.load_state_dict(saved_state['model_state_dict']) + + model.to(device) + + + + params = [p for p in model.parameters() if p.requires_grad] + + optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY']) + + lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9) + + # valid data + return_outputs,class_result = validate_and_save_best_model(0, model, valid_dataloader, device, optimizer, lr_scheduler) + + print(f'test Total result : {return_outputs} , class result : {class_result} ') \ No newline at end of file diff --git a/retinanet_/test.ipynb b/retinanet_/test.ipynb deleted file mode 100644 index 6c7d865..0000000 --- a/retinanet_/test.ipynb +++ /dev/null @@ -1,346 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\"\n", - "\n", - "import re\n", - "import cv2\n", - "import time\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "\n", - "import torch\n", - "import torch.optim as optim\n", - "import torchvision.transforms as T\n", - "from torchvision.utils import make_grid \n", - "from torch.utils.data import DataLoader, Dataset\n", - "\n", - "from retinanet.csv_eval_original import evaluate \n", - "from retinanet import model\n", - "from retinanet.dataloader import collater, Resizer_512, Augmenter, Normalizer, UnNormalizer,CSVDataset\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "test_dataset=CSVDataset('../annotations_v2/retinanet_test.csv','../annotations_v2/classes.csv',transform=T.Compose([Normalizer(),Resizer_512()]))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n", - "torch.cuda.empty_cache()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DataParallel(\n", - " (module): ResNet(\n", - " (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n", - " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n", - " (layer1): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " (downsample): Sequential(\n", - " (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (layer2): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " (downsample): Sequential(\n", - " (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", - " (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (3): Bottleneck(\n", - " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (layer3): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " (downsample): Sequential(\n", - " (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", - " (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (3): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (4): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (5): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (layer4): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " (downsample): Sequential(\n", - " (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", - " (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (fpn): PyramidFeatures(\n", - " (P5_1): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))\n", - " (P5_upsampled): Upsample(scale_factor=2.0, mode='nearest')\n", - " (P5_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (P4_1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))\n", - " (P4_upsampled): Upsample(scale_factor=2.0, mode='nearest')\n", - " (P4_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (P3_1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))\n", - " (P3_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (P6): Conv2d(2048, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", - " (P7_1): ReLU()\n", - " (P7_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", - " )\n", - " (regressionModel): RegressionModel(\n", - " (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act1): ReLU()\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act2): ReLU()\n", - " (conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act3): ReLU()\n", - " (conv4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act4): ReLU()\n", - " (output): Conv2d(256, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " )\n", - " (classificationModel): ClassificationModel(\n", - " (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act1): ReLU()\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act2): ReLU()\n", - " (conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act3): ReLU()\n", - " (conv4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (act4): ReLU()\n", - " (output): Conv2d(256, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (output_act): Sigmoid()\n", - " )\n", - " (anchors): Anchors()\n", - " (regressBoxes): BBoxTransform()\n", - " (clipBoxes): ClipBoxes()\n", - " (focalLoss): FocalLoss()\n", - " )\n", - ")" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "retinanet = torch.load(\"/home/irteam/junghye-dcloud-dir/pathfinder/pathfinder_ai/models/retinanet_50_epoch3.pt\", map_location=device)\n", - "\n", - "# 모델을 평가 모드로 전환합니다.\n", - "retinanet.eval()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "698/698\n", - "mAP:\n", - "Others: 0.0\n", - "detect된 거 없음\n", - "Porosity: 0.0\n", - "detect된 거 없음\n", - "Slag: 0.0\n", - "detect된 거 없음\n", - "Normal: 0\n", - "detect된 거 없음\n" - ] - }, - { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click here for more info. View Jupyter log for further details." - ] - } - ], - "source": [ - "average_precisions = evaluate(test_dataset, retinanet, iou_threshold=0.000000001, score_threshold=0.0000000001, max_detections=1000000,save_path='./')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "pixt", - "language": "python", - "name": "pixt" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/retinanet_/train.py b/retinanet_/train.py index 66f3760..ee45437 100644 --- a/retinanet_/train.py +++ b/retinanet_/train.py @@ -1,193 +1,191 @@ +import sys +sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils') import os - -os.environ["CUDA_LAUNCH_BLOCKING"] = "1" - -import re -import cv2 -import time import numpy as np import pandas as pd +# for ignoring warnings +import warnings +warnings.filterwarnings('ignore') -import torch -import torch.optim as optim -import torchvision.transforms as T -from torchvision.utils import make_grid -from torch.utils.data import DataLoader, Dataset +import cv2 -from retinanet import model -from retinanet.dataloader import collater, Resizer, Augmenter, Normalizer, UnNormalizer,CSVDataset,AspectRatioBasedSampler -from config import Config +import torch +import torchvision +from torchvision import transforms as torchtrans +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.ops import nms + +from engine import train_one_epoch,evaluate +from tqdm import tqdm +import utils +import transforms as T from dataset import RT_Dataset +from config import Config +from augment import get_transform import wandb - - - -best_loss=float('inf') - -def train_one_epoch(retinanet,epoch_num,optimizer,scheduler, train_data_loader,device): - - print("Epoch - {} Started".format(epoch_num)) - st = time.time() - - retinanet.train() - - epoch_loss = [] - - for iter_num, data in enumerate(train_data_loader): - - # Reseting gradients after each iter - optimizer.zero_grad() - - # Forward - #print(f'{data["annot"]}') - classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda().float()]) - - # Calculating Loss - classification_loss = classification_loss.mean() - regression_loss = regression_loss.mean() - - loss = classification_loss + regression_loss - - if bool(loss == 0): - continue - - # Calculating Gradients - loss.backward() - - # Gradient Clipping - torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) - - # Updating Weights - optimizer.step() - - #Epoch Loss - epoch_loss.append(float(loss)) - - print( - 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( - epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(epoch_loss))) - - del classification_loss - del regression_loss - - wandb.log({ - 'avg_train_loss':round(np.mean(epoch_loss),4) - }) - # Update the learning rate - if lr_scheduler is not None: - lr_scheduler.step(np.mean(epoch_loss)) - - et = time.time() - print("\n Total Time - {}\n".format(int(et - st))) - - - -def valid_one_epoch(retinanet,epoch_num, valid_data_loader,device): - global best_loss #가장 좋은 손실값 업데이트하기 위해 global 변수로 선언 - - print("Epoch - {} Started".format(epoch_num)) - st = time.time() - - epoch_loss = [] - - for iter_num, data in enumerate(valid_data_loader): - - with torch.no_grad(): - - # Forward - classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda().float()]) - - # Calculating Loss - classification_loss = classification_loss.mean() - regression_loss = regression_loss.mean() - loss = classification_loss + regression_loss - - #Epoch Loss - epoch_loss.append(float(loss)) - - print( - 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( - epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(epoch_loss))) - - del classification_loss - del regression_loss - - avg_epoch_loss=np.mean(epoch_loss) - wandb.log({ - 'avg_val_loss': round(avg_epoch_loss,4) - }) - - if avg_epoch_loss score_threshold + + valid_boxes = output['boxes'][output['scores'] > score_threshold] + valid_labels=output['labels'][output['scores']> score_threshold] + + + predictions.append(list(zip(valid_labels,valid_boxes))) + + print(f'pred : {predictions[0:5]}\n') + print(f'gt : {ground_truths[0:5]}') + + # utils 모듈에 있는 calculate_metrics 함수를 사용 + metrics = utils.calculate_metrics(predictions, ground_truths) + + # 전체 성능 + total_recall=metrics['total']['recall'] + total_precision=metrics['total']['precision'] + total_f1_score= metrics['total']['f1_score'] + + #wandb.log({"epoch": epoch, "recall": metrics['recall']}) # Recall을 W&B에 로그합니다. + wandb.log({"epoch": epoch, "total_recall": total_recall, "total_precision": total_precision,"total_f1_score":total_f1_score}) + + categories={2: 'Porosity', 3: 'Slag'} + class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} + # 각 클래스별 성능 로그 + for class_label,class_metrics in metrics['per_class'].items(): + #class_label=class_label.item() + if class_label==2 or class_label==3: + + wandb.log({ + f"class_{categories[class_label]}_recall" : class_metrics['recall'], + f"class_{categories[class_label]}_precision" : class_metrics['precision'], + f"class_{categories[class_label]}_f1_score" : class_metrics['f1_score'], + f"class_{categories[class_label]}_average_iou": class_metrics['average_iou'], + + }) + if total_recall > best_recall: + best_recall = total_recall + + model_save_path = f"/content/drive/MyDrive/models/retinanet_resnet50_fpnv2_{Config['TRAIN_BS']}_{Config['VALID_BS']}.pth" + torch.save({ + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'lr_scheduler_state_dict': lr_scheduler.state_dict() + }, model_save_path) + # wandb.save(model_save_path) # 모델 파일을 W&B에 저장합니다. + + return_outputs=metrics['total'] + return return_outputs,class_result + if __name__=='__main__': - # wandb project - wandb.init(project='capstone',name='retinanet_1123',reinit=True) - device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') - torch.cuda.empty_cache() - - # dataset - #train_df=pd.read_csv('/home/irteam/junghye-dcloud-dir/pathfinder/pathfinder_ai/annotations_v2/train_total.csv') - #valid_df=pd.read_csv('/home/irteam/junghye-dcloud-dir/pathfinder/pathfinder_ai/annotations_v2/valid_total.csv') - #image_dir='/home/irteam/junghye-dcloud-dir/pathfinder/data_contrast/before/Image' - - #train_dataset=RT_Dataset(train_df,image_dir,transforms=T.Compose([Augmenter(),Normalizer(),Resizer()])) - #valid_dataset=RT_Dataset(valid_df,image_dir,transforms=T.Compose([Normalizer(),Resizer()])) - - # csv dataset - - train_dataset=CSVDataset('./annotations_v2/retinanet_train.csv','./annotations_v2/classes.csv',transform=T.Compose([Augmenter(),Normalizer()])) - valid_dataset=CSVDataset('./annotations_v2/retinanet_valid.csv','./annotations_v2/classes.csv',transform=T.Compose([Normalizer()])) - - sampler=AspectRatioBasedSampler(train_dataset,batch_size=Config['TRAIN_BS'],drop_last=False) - sampler_val=AspectRatioBasedSampler(valid_dataset,batch_size=Config['VALID_BS'],drop_last=False) - - - train_data_loader = DataLoader( + wandb.init(project='capstone',name='RetinaNet',reinit=True) + + if torch.cuda.is_available(): + device=torch.device('cuda') + + + train_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/train_total.csv') + valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/valid_total.csv') + image_dir='/content/drive/MyDrive/data/Image' + + + result_dir_path=f'/content/drive/MyDrive/result/RetinaNet' + os.makedirs(result_dir_path,exist_ok=True) + + train_dataset=RT_Dataset(train_df,image_dir,transforms=get_transform(train=True)) + valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False)) + + train_dataloader=torch.utils.data.DataLoader( train_dataset, - num_workers = Config['NUM_WORKERS'], - collate_fn = collater, - batch_sampler=sampler, + batch_size=Config['TRAIN_BS'], + shuffle=True, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, ) - valid_data_loader = DataLoader( + valid_dataloader=torch.utils.data.DataLoader( valid_dataset, - num_workers = Config['NUM_WORKERS'], - collate_fn = collater, - batch_sampler=sampler_val, + batch_size=Config['VALID_BS'], + shuffle=False, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, + ) - # load model - retinanet = model.resnet50(num_classes = Config['NUM_CLASSES'], pretrained = True) - retinanet.to(device) - wandb.watch(retinanet) - - - #optimizer = torch.optim.Adam(retinanet.parameters(), lr = Config['LR'],weight_decay=Config['WEIGHT_DECAY']) - optimizer=torch.optim.Adam(retinanet.parameters(),lr=Config['LR']) - - #lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 5, gamma=0.5) - lr_scheduler=torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,patience=3,verbose=True) - - ### Training Loop - for epoch in range(Config['EPOCHS']): - - # Call train function - train_one_epoch(retinanet,epoch, optimizer,lr_scheduler,train_data_loader,device=device) - # Call valid function - valid_one_epoch(retinanet,epoch, valid_data_loader,device=device) + + #model=get_object_detection_model(Config['NUM_CLASSES']) + model=torchvision.models.detection.retinanet_resnet50_fpn_v2(num_classes=4,pretrained=False,pretrained_backbone=True) + + model.to(device) + + wandb.watch(model) + + params = [p for p in model.parameters() if p.requires_grad] + + optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY']) + + #lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.9) + lr_scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15) + + best_recall=-100 + with open(f'{result_dir_path}/{Config["TRAIN_BS"]}_{Config["VALID_BS"]}_{Config["EPOCHS"]}.txt','w') as f: + for epoch in range(Config['EPOCHS']): + train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=10) + + lr_scheduler.step() + + # valid data + return_outputs,class_result = validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler) + + print(f'epoch : {epoch}, output : {return_outputs}') + f.write(f"Epoch {epoch} Total result:{return_outputs}, class_result : {class_result}\n") \ No newline at end of file diff --git a/ssd300/config.py b/ssd300/config.py new file mode 100644 index 0000000..0978d46 --- /dev/null +++ b/ssd300/config.py @@ -0,0 +1,14 @@ +Config=dict( + EPOCHS=150, + LR=0.0001, + IMG_SIZE=512, + DR_RATE=0.35, + NUM_CLASSES=3, + TRAIN_BS=4, + VALID_BS=2, + NUM_WORKERS=4, + WEIGHT_DECAY=0.0005, + CONTRAST='AFTER' + +) + diff --git a/ssd300/inference.py b/ssd300/inference.py new file mode 100644 index 0000000..1399fe4 --- /dev/null +++ b/ssd300/inference.py @@ -0,0 +1,173 @@ +import sys +sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils') +import os +import numpy as np +import pandas as pd + +# for ignoring warnings +import warnings +warnings.filterwarnings('ignore') + +import cv2 + +import torch +import torchvision +from torchvision import transforms as torchtrans +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.ops import nms + + +from tqdm import tqdm +import utils +import transforms as T +from dataset import RT_Dataset +from config import Config +from augment import get_transform + +from torchvision.models.detection.ssd import SSDClassificationHead +from torchvision.models.detection import _utils +from torchvision.models.detection import SSD300_VGG16_Weights + + +# model +def get_object_detection_model(num_classes=4,size=300): + # Load the Torchvision pretrained model. + model = torchvision.models.detection.ssd300_vgg16( + weights=SSD300_VGG16_Weights.COCO_V1 + ) + # Retrieve the list of input channels. + in_channels = _utils.retrieve_out_channels(model.backbone, (size, size)) + # List containing number of anchors based on aspect ratios. + num_anchors = model.anchor_generator.num_anchors_per_location() + # The classification head. + model.head.classification_head = SSDClassificationHead( + in_channels=in_channels, + num_anchors=num_anchors, + num_classes=num_classes, + ) + # Image size for transforms. + model.transform.min_size = (size,) + model.transform.max_size = size + return model + +# valid function +def valid_fn(val_data_loader, model, device): + model.eval() + outputs = [] + ground_truths=[] + + for images,targets in tqdm(val_data_loader): + + images=list(img.to(device) for img in images) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + output=model(images) + + + for out,target in zip(output,targets): + scores=out['scores'].detach().cpu().numpy() + boxes=out['boxes'].detach().cpu().numpy() + labels=out['labels'].detach().cpu().numpy() + + keep_idx=nms(boxes,scores,iou_threshold=0.1) + + boxes=boxes[keep_idx] + scores=scores[keep_idx] + labels=labels[keep_idx] + + + outputs.append({'boxes': boxes, + 'scores': scores, + 'labels': labels}) + + # ground truth 에 label 추가 + gt_boxes=target['boxes'].cpu().numpy() + gt_labels=target['labels'].cpu().numpy() + + ground_truths.append(list(zip(gt_labels,gt_boxes))) + + + + + return outputs,ground_truths + + +def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.2): + global best_recall + outputs, ground_truths = valid_fn(valid_dataloader, model, device) + predictions = [] + + + for output in outputs: + valid_scores=output['scores']>score_threshold + + valid_boxes = output['boxes'][output['scores'] > score_threshold] + valid_labels=output['labels'][output['scores']> score_threshold] + + + predictions.append(list(zip(valid_labels,valid_boxes))) + + + + # utils 모듈에 있는 calculate_metrics 함수를 사용 + metrics = utils.calculate_metrics(predictions, ground_truths) + + # 전체 성능 + total_recall=metrics['total']['recall'] + total_precision=metrics['total']['precision'] + total_f1_score= metrics['total']['f1_score'] + + class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} + + return_outputs=metrics['total'] + return return_outputs,class_result + +if __name__=='__main__': + + + if torch.cuda.is_available(): + device=torch.device('cuda') + + + valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/test_total.csv') + image_dir='/content/drive/MyDrive/data/Image' + + + + + valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False)) + + + + valid_dataloader=torch.utils.data.DataLoader( + valid_dataset, + batch_size=Config['VALID_BS'], + shuffle=False, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, + + ) + + + model=get_object_detection_model(Config['NUM_CLASSES'],Config['IMG_SIZE']) + + + model_save_path = "/content/drive/MyDrive/models/SSD300_8_4.pth" + saved_state=torch.load(model_save_path,map_location=device) + + model.load_state_dict(saved_state['model_state_dict']) + + model.to(device) + + + + params = [p for p in model.parameters() if p.requires_grad] + + optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY']) + + lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9) + + # valid data + return_outputs,class_result = validate_and_save_best_model(0, model, valid_dataloader, device, optimizer, lr_scheduler) + + print(f'test Total result : {return_outputs} , class result : {class_result} ') \ No newline at end of file diff --git a/ssd300/train.py b/ssd300/train.py new file mode 100644 index 0000000..fd95745 --- /dev/null +++ b/ssd300/train.py @@ -0,0 +1,205 @@ +import sys +sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils') +import os +import numpy as np +import pandas as pd + +# for ignoring warnings +import warnings +warnings.filterwarnings('ignore') + +import cv2 + +import torch +import torchvision +from torchvision import transforms as torchtrans +#from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +import torchvision.models.detection.ssd as ssd +from torchvision.ops import nms + +from engine import train_one_epoch,evaluate +from tqdm import tqdm +import utils +import transforms as T +from dataset import RT_Dataset +from config import Config +from augment import get_transform,get_ssd_transform +import wandb + +from torchvision.models.detection.ssd import SSDClassificationHead +from torchvision.models.detection import _utils +from torchvision.models.detection import SSD300_VGG16_Weights + + +# model +def get_object_detection_model(num_classes=4,size=300): + # Load the Torchvision pretrained model. + model = torchvision.models.detection.ssd300_vgg16( + weights=SSD300_VGG16_Weights.COCO_V1 + ) + # Retrieve the list of input channels. + in_channels = _utils.retrieve_out_channels(model.backbone, (size, size)) + # List containing number of anchors based on aspect ratios. + num_anchors = model.anchor_generator.num_anchors_per_location() + # The classification head. + model.head.classification_head = SSDClassificationHead( + in_channels=in_channels, + num_anchors=num_anchors, + num_classes=num_classes, + ) + # Image size for transforms. + model.transform.min_size = (size,) + model.transform.max_size = size + return model + + +# valid function +def valid_fn(val_data_loader, model, device): + model.eval() + outputs = [] + ground_truths=[] + + for images,targets in tqdm(val_data_loader): + + images=list(img.to(device) for img in images) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + output=model(images) + + + for out,target in zip(output,targets): + scores=out['scores'].detach().cpu().numpy() + boxes=out['boxes'].detach().cpu().numpy() + labels=out['labels'].detach().cpu().numpy() + + keep_idx=nms(boxes,scores,iou_threshold=0.1) + + boxes=boxes[keep_idx] + scores=scores[keep_idx] + labels=labels[keep_idx] + + + outputs.append({'boxes': boxes, + 'scores': scores, + 'labels': labels}) + + # label 포함시켜 ground truths에 추가 + gt_boxes=target['boxes'].cpu().numpy() + gt_labels=target['labels'].cpu().numpy() + + + ground_truths.append(list(zip(gt_labels,gt_boxes))) + + + return outputs,ground_truths + + +def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.2): + global best_recall + outputs, ground_truths = valid_fn(valid_dataloader, model, device) + predictions = [] + for output in outputs: + valid_boxes = output['boxes'][output['scores'] > score_threshold] + valid_labels=output['labels'][output['scores']> score_threshold] + #predictions.append(valid_boxes) + predictions.append(list(zip(valid_labels,valid_boxes))) + + # utils 모듈에 있는 calculate_metrics 함수를 사용 + metrics = utils.calculate_metrics(predictions, ground_truths) + + # 전체 성능 + total_recall=metrics['total']['recall'] + total_precision=metrics['total']['precision'] + total_f1_score= metrics['total']['f1_score'] + + + wandb.log({"epoch": epoch, "total_recall": total_recall, "total_precision": total_precision,"total_f1_score":total_f1_score}) + + categories={2: 'Porosity', 3: 'Slag'} + class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0} + # 각 클래스별 성능 로그 + for class_label,class_metrics in metrics['per_class'].items(): + + if class_label==2 or class_label==3: + + wandb.log({ + f"class_{categories[class_label]}_recall" : class_metrics['recall'], + f"class_{categories[class_label]}_precision" : class_metrics['precision'], + f"class_{categories[class_label]}_f1_score" : class_metrics['f1_score'], + f"class_{categories[class_label]}_average_iou": class_metrics['average_iou'], + + }) + if total_recall > best_recall: + best_recall = total_recall + + model_save_path = f"/content/drive/MyDrive/models/ssd300_real_{Config['TRAIN_BS']}_{Config['VALID_BS']}.pth" + torch.save({ + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'lr_scheduler_state_dict': lr_scheduler.state_dict() + }, model_save_path) + + + return_outputs=metrics['total'] + return return_outputs,class_result + +if __name__=='__main__': + wandb.init(project='capstone',name='SSD300_VGG16',reinit=True) + + if torch.cuda.is_available(): + device=torch.device('cuda') + + + train_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/train_total.csv') + valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/valid_total.csv') + image_dir='/content/drive/MyDrive/data/Image' + + result_dir_path=f'/content/drive/MyDrive/result/{Config["MODEL"]}' + os.makedirs(result_dir_path,exist_ok=True) + + train_dataset=RT_Dataset(train_df,image_dir,transforms=get_transform(train=True)) + valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False)) + + train_dataloader=torch.utils.data.DataLoader( + train_dataset, + batch_size=Config['TRAIN_BS'], + shuffle=True, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, + ) + + valid_dataloader=torch.utils.data.DataLoader( + valid_dataset, + batch_size=Config['VALID_BS'], + shuffle=False, + num_workers=Config['NUM_WORKERS'], + collate_fn=utils.collate_fn, + + ) + + model=get_object_detection_model(Config['NUM_CLASSES'],Config['IMG_SIZE']) + model.to(device) + + wandb.watch(model) + + params = [p for p in model.parameters() if p.requires_grad] + + + + optimizer = torch.optim.Adam(params, lr=Config['LR']) + + lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9) # 3epoch마다 학습률 10%씩 감소 + + best_recall=-100 + with open(f'{result_dir_path}/{Config["TRAIN_BS"]}_{Config["VALID_BS"]}_{Config["EPOCHS"]}.txt','w') as f: + for epoch in range(Config['EPOCHS']): + train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=10) + + lr_scheduler.step() + + # valid data + return_outputs,class_result = validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler) + + print(f'epoch : {epoch}, output : {return_outputs}') + f.write(f"Epoch {epoch} Total result:{return_outputs}, class_result : {class_result}\n") \ No newline at end of file diff --git a/faster-rcnn/augment.py b/utils/augment.py similarity index 58% rename from faster-rcnn/augment.py rename to utils/augment.py index c40b351..7dc1442 100644 --- a/faster-rcnn/augment.py +++ b/utils/augment.py @@ -7,14 +7,18 @@ def get_transform(train): if train: return A.Compose([ - - A.HorizontalFlip(p=0.5), - A.RandomRotate90(p=0.5), + A.Resize(512,512), + A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5), + A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5), + + A.RandomGamma(gamma_limit=(80, 120), p=0.5), A.VerticalFlip(p=0.5), # ToTensorV2 converts image to pytorch tensor without div by 255 ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) else: return A.Compose([ + A.Resize(512,512), ToTensorV2(p=1.0) - ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) \ No newline at end of file + ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) + diff --git a/faster-rcnn/coco_eval.py b/utils/coco_eval.py similarity index 100% rename from faster-rcnn/coco_eval.py rename to utils/coco_eval.py diff --git a/faster-rcnn/coco_utils.py b/utils/coco_utils.py similarity index 100% rename from faster-rcnn/coco_utils.py rename to utils/coco_utils.py diff --git a/faster-rcnn/dataset.py b/utils/dataset.py similarity index 93% rename from faster-rcnn/dataset.py rename to utils/dataset.py index 7b27186..3b01eb3 100644 --- a/faster-rcnn/dataset.py +++ b/utils/dataset.py @@ -33,16 +33,20 @@ def __getitem__(self,index:int): if len(bndboxes)>0: boxes = [[box[0] , box[1], box[2], box[3]] for box in bndboxes] - labels=[int(label)+1 for label in labels] + #labels=[int(label) for label in labels] boxes=torch.as_tensor(boxes,dtype=torch.float32) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) - labels = torch.as_tensor(labels, dtype=torch.int64) + labels=[int(label)+1 for label in labels] + #labels = torch.as_tensor(labels, dtype=torch.int64) else: boxes = torch.zeros((0,4),dtype=torch.float32) labels = torch.zeros(0, dtype=torch.int64) # 더미 라벨 area = torch.zeros(0, dtype=torch.float32) # 더미 면적 + + + labels = torch.as_tensor(labels, dtype=torch.int64) #다 crowd x iscrowd = torch.zeros((len(boxes),), dtype=torch.int64) diff --git a/faster-rcnn/engine.py b/utils/engine.py similarity index 91% rename from faster-rcnn/engine.py rename to utils/engine.py index bc50e60..9de2ae7 100644 --- a/faster-rcnn/engine.py +++ b/utils/engine.py @@ -10,22 +10,22 @@ import utils import wandb -def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): +def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq,warmup_epochs=4): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None - if epoch == 0: + if epoch best_iou: + best_iou = iou + best_match = i + + # 모든 gt를 돌고 난 후 best_iou > iou_threshold이고 best_match일 경우 + # 1) 해당 gt가 best_match가 없을 경우 + if best_iou > iou_threshold and best_match is not None: + if not matched_gt[best_match]: + update_class_stats(class_stats, pred_class, 'TP', best_iou) + matched_gt[best_match] = True + else: + update_class_stats(class_stats, pred_class, 'FP') # 이미 임자가 있는데 잘못고름 + else: + update_class_stats(class_stats, pred_class, 'FP') + + for i, (gt_class, _) in enumerate(gt_boxes): + if not matched_gt[i]: + update_class_stats(class_stats, gt_class, 'FN') + + print(f'class_stats : {class_stats}') + return calculate_classwise_metrics(class_stats) + +def update_class_stats(stats, cls, update_type, iou_score=0): + if cls not in stats: + stats[cls] = {'TP': 0, 'FP': 0, 'FN': 0, 'total_iou': 0} + + if update_type == 'TP': + stats[cls]['TP'] += 1 + stats[cls]['total_iou'] += iou_score + elif update_type == 'FP': + stats[cls]['FP'] += 1 + elif update_type == 'FN': + stats[cls]['FN'] += 1 + +def calculate_classwise_metrics(stats): + class_metrics = {} + total_TP, total_FP, total_FN, total_iou = 0, 0, 0, 0 + for cls, counts in stats.items(): + precision = counts['TP'] / (counts['TP'] + counts['FP']) if (counts['TP'] + counts['FP']) > 0 else 0 + recall = counts['TP'] / (counts['TP'] + counts['FN']) if (counts['TP'] + counts['FN']) > 0 else 0 + f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0 + average_iou = counts['total_iou'] / counts['TP'] if counts['TP'] > 0 else 0 + + class_metrics[cls] = {'precision': precision, 'recall': recall, 'f1_score': f1_score, 'average_iou': average_iou} + + total_TP += counts['TP'] + total_FP += counts['FP'] + total_FN += counts['FN'] + total_iou += counts['total_iou'] + + # 전체 성능 계산 + total_precision = total_TP / (total_TP + total_FP) if (total_TP + total_FP) > 0 else 0 + total_recall = total_TP / (total_TP + total_FN) if (total_TP + total_FN) > 0 else 0 + if total_precision+total_recall==0: + total_f1_score=0 + else: + total_f1_score = 2 * total_precision * total_recall / (total_precision + total_recall) if (total_precision + total_recall) > 0 else 0 + total_average_iou = total_iou / total_TP if total_TP > 0 else 0 + + return {'total': {'precision': total_precision, 'recall': total_recall, 'f1_score': total_f1_score, 'average_iou': total_average_iou}, 'per_class': class_metrics} + + + +# IoU 계산 함수 +def calculate_iou(box1, box2): + """ + Calculate the Intersection over Union (IoU) of two bounding boxes. + """ + + x1, y1, x2, y2 = box1 + x1g, y1g, x2g, y2g = box2 + + # Calculate area of intersection + xi1 = max(x1, x1g) + yi1 = max(y1, y1g) + xi2 = min(x2, x2g) + yi2 = min(y2, y2g) + intersection_area = max(xi2 - xi1, 0) * max(yi2 - yi1, 0) + + # Calculate area of union + box1_area = (x2 - x1) * (y2 - y1) + box2_area = (x2g - x1g) * (y2g - y1g) + union_area = box1_area + box2_area - intersection_area + + # Calculate IoU + iou = intersection_area / union_area if union_area != 0 else 0 + + return iou + + class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average.