diff --git a/efficientdet_/config.py b/efficientdet_/config.py
index 9d52298..b0d2433 100644
--- a/efficientdet_/config.py
+++ b/efficientdet_/config.py
@@ -1,9 +1,9 @@
num_epochs = 150
-batch_size=4
+batch_size=16
-num_workers=4
+num_workers=0
lr=0.005
diff --git a/efficientdet_/inference.py b/efficientdet_/inference.py
index a54d9e7..df2af34 100644
--- a/efficientdet_/inference.py
+++ b/efficientdet_/inference.py
@@ -67,7 +67,7 @@ def get_test_transform():
# Effdet config를 통해 모델 불러오기 + ckpt load
def load_net(checkpoint_path, device):
config = get_efficientdet_config('tf_efficientdet_d3')
- config.num_classes = 3
+ config.num_classes = 4
config.image_size = (512,512)
config.soft_nms = False
@@ -89,14 +89,25 @@ def load_net(checkpoint_path, device):
def valid_fn(val_data_loader,test_ann, model, device):
outputs = []
ground_truths=[]
+ labels=[]
+
for images, image_ids in tqdm(val_data_loader):
# gpu 계산을 위해 image.to(device)
+ image_dir='./data_test/01. data/Image'
+ record=test_ann.iloc[image_ids]
+ image_name=record['image_name']
+ image_path=os.path.join(image_dir,record['dataset'])
+ test=cv2.imread(os.path.join(image_path,image_name))
+ h, w, c = test.shape
+
+ a=images
images = torch.stack(images) # bs, ch, w, h
images = images.to(device).float()
- print(image_ids)
+
output = model(images)
+
for out in output:
- outputs.append({'boxes': out.detach().cpu().numpy()[:,:4],
+ outputs.append({'boxes': out.detach().cpu().numpy()[:,:4]*np.array([w,h,w,h])/512,
'scores': out.detach().cpu().numpy()[:,4],
'labels': out.detach().cpu().numpy()[:,-1]})
for image_id in image_ids:
@@ -105,7 +116,7 @@ def valid_fn(val_data_loader,test_ann, model, device):
ground_truths.append(boxes)
- return outputs,ground_truths
+ return outputs,ground_truths,labels
def collate_fn(batch):
return tuple(zip(*batch))
@@ -137,59 +148,63 @@ def calculate_iou(box1, box2):
return iou
# 메트릭 계산 함수
-def calculate_metrics(predictions, ground_truths, iou_threshold=0.0001):
+def calculate_metrics(predictions_boxes,predictions_labels, ground_truths,labels, iou_threshold=0.2):
"""
Calculate precision, recall, and IoU score for a set of predictions and ground truth boxes.
"""
- total_true_positives = 0
- total_false_positives = 0
- total_false_negatives = 0
- total_iou_score = 0
-
- for pred_boxes, gt_boxes in zip(predictions, ground_truths):
- matched = [False] * len(gt_boxes)
-
- for pred_box in pred_boxes: # 모든 prediction에 대해
+ result=np.zeros((4,4))
+
+ for pred_boxes, pred_labels, gt_boxes,gt_labels in zip(predictions_boxes, predictions_labels, ground_truths, labels):
+ if len(pred_labels)==0:
+ pred_labels=np.append(pred_labels,3)
+ pred_boxes=np.append(pred_boxes,0)
+
+ for pred_box,pred_label in zip(pred_boxes,pred_labels): # 모든 prediction에 대해
best_iou = 0
- best_match = None
-
- for i, gt_box in enumerate(gt_boxes):
- iou = calculate_iou(pred_box, gt_box)
-
- if iou > best_iou:
- best_iou = iou
- best_match = i
+ for gt_box, gt_label in zip(gt_boxes,gt_labels):
+ if pred_label==3:
+ x=int(pred_label)
+ else:
+ iou = calculate_iou(pred_box, gt_box)
+ if iou > best_iou:
+ best_iou = iou
+ x=int(gt_label)
+ y=int(pred_label)
- if best_iou > iou_threshold:
- if not matched[best_match]:
- total_true_positives += 1
- total_iou_score += best_iou
- matched[best_match] = True
+ if best_iou > iou_threshold :
+ result[x][y]+=1
+
else:
- total_false_positives += 1
-
- total_false_negatives += len(gt_boxes) - sum(matched)
-
- precision = total_true_positives / (total_true_positives + total_false_positives) if (total_true_positives + total_false_positives) != 0 else 0
- recall = total_true_positives / (total_true_positives + total_false_negatives) if (total_true_positives + total_false_negatives) != 0 else 0
- average_iou = total_iou_score / total_true_positives if total_true_positives != 0 else 0
-
- return {'precision': precision, 'recall': recall, 'average_iou': average_iou}
+ if len(gt_boxes)==0:
+ result[3][int(pred_label)]+=1
+ else:
+ result[x][3]+=1
+
+ print(result)
+ Porosity_precision = result[1][1]/sum(result[:,1])
+ Porosity_recall = result[1][1]/sum(result[1,:])
+ Porosity_F1_score = 2*Porosity_precision*Porosity_recall/(Porosity_precision+Porosity_recall)
+ slag_precision = result[2][2]/sum(result[:,2])
+ slag_recall = result[2][2]/sum(result[2,:])
+ slag_F1_score = 2*slag_precision*slag_recall/(slag_precision+slag_recall)
+
+ return {'Porosity_precision': Porosity_precision, 'Porosity_recall': Porosity_recall, 'Porosity_F1_score': Porosity_F1_score,
+ 'slag_precision': slag_precision, 'slag_recall': slag_recall, 'slag_F1_score': slag_F1_score}
def main():
- test_ann_dir='.annotations/test_total.csv'
- image_dir='/home/irteam/junghye-dcloud-dir/pathfinder/data/Image'
+ test_ann_dir='./data_test/01. data/annotations_v2/test_total.csv'
+ image_dir='./data_test/01. data/Image'
test_dataset=RT_dataset(image_dir,test_ann_dir,transforms=get_test_transform())
test_ann=pd.read_csv(test_ann_dir)
#epoch=50
- checkpoint_path=f'/home/irteam/junghye-dcloud-dir/pathfinder/models/effdet_best_loss_modifiedann.pth'
- score_threshold=0.01 # score : 모델이 해당 객체를 올바르게 감지했다고 확신하는 정도
+ checkpoint_path=f'./models/effdet_best_loss_modifiedann.pth'
+ score_threshold=0.2 # score : 모델이 해당 객체를 올바르게 감지했다고 확신하는 정도
test_data_loader=DataLoader(
test_dataset,
- batch_size=2,
+ batch_size=1,
shuffle=False,
- num_workers=4,
+ num_workers=0,
collate_fn=collate_fn
)
@@ -198,7 +213,7 @@ def main():
model=load_net(checkpoint_path,device)
- outputs,ground_truths=valid_fn(test_data_loader,test_ann,model,device)
+ outputs,gt_boxes,gt_labels=valid_fn(test_data_loader,test_ann,model,device)
# calculate precision, recall, average_iou scores
# 테스트 데이터에서 상위 점수를 가진 bounding box만 선택
@@ -208,8 +223,8 @@ def main():
predictions.append(valid_boxes)
# calculate precision, recall, average_iou scores
- metrics = calculate_metrics(predictions, ground_truths)
+ metrics = calculate_metrics(predictions_boxes,predictions_labels,gt_boxes,gt_labels)
print(metrics)
if __name__ == "__main__":
- main()
\ No newline at end of file
+ main()
diff --git a/efficientdet_/model.py b/efficientdet_/model.py
index 37c80cd..22eb068 100644
--- a/efficientdet_/model.py
+++ b/efficientdet_/model.py
@@ -5,7 +5,7 @@
def get_net(checkpoint_path=None):
config = get_efficientdet_config('tf_efficientdet_d3')
- config.num_classes = 3
+ config.num_classes = 4
config.image_size = (512,512)
config.soft_nms = False
@@ -18,4 +18,4 @@ def get_net(checkpoint_path=None):
checkpoint = torch.load(checkpoint_path)
net.load_state_dict(checkpoint['model_state_dict'])
- return DetBenchTrain(net)
\ No newline at end of file
+ return DetBenchTrain(net)
diff --git a/faster-rcnn/config.py b/faster-rcnn/config.py
index 91297a3..6a49ccb 100644
--- a/faster-rcnn/config.py
+++ b/faster-rcnn/config.py
@@ -1,13 +1,13 @@
Config=dict(
- EPOCHS=150,
- LR=2e-4,
- IMG_SIZE=224,
+ EPOCHS=50,
+ LR=5e-4,
+ IMG_SIZE=640,
DR_RATE=0.35,
NUM_CLASSES=4,
- TRAIN_BS=4,
- VALID_BS=2,
+ TRAIN_BS=8,
+ VALID_BS=4,
NUM_WORKERS=4,
WEIGHT_DECAY=0.0005,
- CONTRAST='AFTER'
+ CONTRAST='AFTER',
)
\ No newline at end of file
diff --git a/faster-rcnn/inference.py b/faster-rcnn/inference.py
new file mode 100644
index 0000000..9f689cd
--- /dev/null
+++ b/faster-rcnn/inference.py
@@ -0,0 +1,166 @@
+import sys
+sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils')
+import os
+import numpy as np
+import pandas as pd
+
+# for ignoring warnings
+import warnings
+warnings.filterwarnings('ignore')
+
+import cv2
+
+import torch
+import torchvision
+from torchvision import transforms as torchtrans
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+from torchvision.ops import nms
+
+
+from tqdm import tqdm
+import utils
+import transforms as T
+from dataset import RT_Dataset
+from config import Config
+from augment import get_transform
+
+from torchvision.models.detection.ssd import SSDClassificationHead
+from torchvision.models.detection import _utils
+from torchvision.models.detection import SSD300_VGG16_Weights
+
+
+
+# model
+def get_object_detection_model(num_classes):
+
+ # load a model pre-trained pre-trained on COCO
+ model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained=True)
+
+ # get number of input features for the classifier
+ in_features = model.roi_heads.box_predictor.cls_score.in_features
+ # replace the pre-trained head with a new one
+ model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
+
+ return model
+
+# valid function
+def valid_fn(val_data_loader, model, device):
+ model.eval()
+ outputs = []
+ ground_truths=[]
+
+ for images,targets in tqdm(val_data_loader):
+
+ images=list(img.to(device) for img in images)
+ targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+
+ output=model(images)
+
+
+ for out,target in zip(output,targets):
+ scores=out['scores'].detach().cpu().numpy()
+ boxes=out['boxes'].detach().cpu().numpy()
+ labels=out['labels'].detach().cpu().numpy()
+
+ keep_idx=nms(boxes,scores,iou_threshold=0.1)
+
+ boxes=boxes[keep_idx]
+ scores=scores[keep_idx]
+ labels=labels[keep_idx]
+
+
+ outputs.append({'boxes': boxes,
+ 'scores': scores,
+ 'labels': labels})
+
+ # ground truth 에 label 추가
+ gt_boxes=target['boxes'].cpu().numpy()
+ gt_labels=target['labels'].cpu().numpy()
+
+ ground_truths.append(list(zip(gt_labels,gt_boxes)))
+
+
+
+
+ return outputs,ground_truths
+
+
+def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.2):
+ global best_recall
+ outputs, ground_truths = valid_fn(valid_dataloader, model, device)
+ predictions = []
+
+
+ for output in outputs:
+ valid_scores=output['scores']>score_threshold
+
+ valid_boxes = output['boxes'][output['scores'] > score_threshold]
+ valid_labels=output['labels'][output['scores']> score_threshold]
+
+
+ predictions.append(list(zip(valid_labels,valid_boxes)))
+
+
+
+ # utils 모듈에 있는 calculate_metrics 함수를 사용
+ metrics = utils.calculate_metrics(predictions, ground_truths)
+
+ # 전체 성능
+ total_recall=metrics['total']['recall']
+ total_precision=metrics['total']['precision']
+ total_f1_score= metrics['total']['f1_score']
+
+ class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0}
+
+ return_outputs=metrics['total']
+ return return_outputs,class_result
+
+if __name__=='__main__':
+
+
+ if torch.cuda.is_available():
+ device=torch.device('cuda')
+
+
+ valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/test_total.csv')
+ image_dir='/content/drive/MyDrive/data/Image'
+
+
+
+
+ valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False))
+
+
+
+ valid_dataloader=torch.utils.data.DataLoader(
+ valid_dataset,
+ batch_size=Config['VALID_BS'],
+ shuffle=False,
+ num_workers=Config['NUM_WORKERS'],
+ collate_fn=utils.collate_fn,
+
+ )
+
+
+ model=get_object_detection_model(Config['NUM_CLASSES'])
+
+
+ model_save_path = "/content/drive/MyDrive/models/fasterrcnn_resnet50_fpnv2_8_4.pth"
+ saved_state=torch.load(model_save_path,map_location=device)
+
+ model.load_state_dict(saved_state['model_state_dict'])
+
+ model.to(device)
+
+
+
+ params = [p for p in model.parameters() if p.requires_grad]
+
+ optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY'])
+
+ lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9)
+
+ # valid data
+ return_outputs,class_result = validate_and_save_best_model(0, model, valid_dataloader, device, optimizer, lr_scheduler)
+
+ print(f'test Total result : {return_outputs} , class result : {class_result} ')
\ No newline at end of file
diff --git a/faster-rcnn/test.ipynb b/faster-rcnn/test.ipynb
deleted file mode 100644
index 4b49849..0000000
--- a/faster-rcnn/test.ipynb
+++ /dev/null
@@ -1,48 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([], shape=(0, 5), dtype=float64)"
- ]
- },
- "execution_count": 1,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import numpy as np\n",
- "\n",
- "\n",
- "np.zeros((0,5))"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "pixt",
- "language": "python",
- "name": "pixt"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/faster-rcnn/train.py b/faster-rcnn/train.py
index 24c702a..8323510 100644
--- a/faster-rcnn/train.py
+++ b/faster-rcnn/train.py
@@ -1,3 +1,5 @@
+import sys
+sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils')
import os
import numpy as np
import pandas as pd
@@ -10,10 +12,12 @@
import torch
import torchvision
-from torchvision import transforms as torchtrans
+from torchvision import transforms as torchtrans
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+from torchvision.ops import nms
from engine import train_one_epoch,evaluate
+from tqdm import tqdm
import utils
import transforms as T
from dataset import RT_Dataset
@@ -22,33 +26,132 @@
import wandb
-# model
-
def get_object_detection_model(num_classes):
# load a model pre-trained pre-trained on COCO
- model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
-
+ model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained=True)
+
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
- model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
+ model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
return model
-
+# valid function
+def valid_fn(val_data_loader, model, device):
+ model.eval()
+ outputs = []
+ ground_truths=[]
+
+ for images,targets in tqdm(val_data_loader):
+
+ images=list(img.to(device) for img in images)
+ targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+
+ output=model(images)
+
+
+ for out,target in zip(output,targets):
+ scores=out['scores'].detach().cpu().numpy()
+ boxes=out['boxes'].detach().cpu().numpy()
+ labels=out['labels'].detach().cpu().numpy()
+
+ keep_idx=nms(boxes,scores,iou_threshold=0.1)
+
+ boxes=boxes[keep_idx]
+ scores=scores[keep_idx]
+ labels=labels[keep_idx]
+
+
+ outputs.append({'boxes': boxes,
+ 'scores': scores,
+ 'labels': labels})
+
+ # ground truth 에 label 추가
+ gt_boxes=target['boxes'].cpu().numpy()
+ gt_labels=target['labels'].cpu().numpy()
+
+ ground_truths.append(list(zip(gt_labels,gt_boxes)))
+
+
+
+
+ return outputs,ground_truths
+
+
+def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.1):
+ global best_recall
+ outputs, ground_truths = valid_fn(valid_dataloader, model, device)
+ predictions = []
+
+
+ for output in outputs:
+ valid_scores=output['scores']>score_threshold
+
+ valid_boxes = output['boxes'][output['scores'] > score_threshold]
+ valid_labels=output['labels'][output['scores']> score_threshold]
+
+
+ predictions.append(list(zip(valid_labels,valid_boxes)))
+
+ print(f'pred : {predictions[0:5]}\n')
+ print(f'gt : {ground_truths[0:5]}')
+
+ # utils 모듈에 있는 calculate_metrics 함수를 사용
+ metrics = utils.calculate_metrics(predictions, ground_truths)
+
+ # 전체 성능
+ total_recall=metrics['total']['recall']
+ total_precision=metrics['total']['precision']
+ total_f1_score= metrics['total']['f1_score']
+
+ wandb.log({"epoch": epoch, "total_recall": total_recall, "total_precision": total_precision,"total_f1_score":total_f1_score})
+
+ categories={2: 'Porosity', 3: 'Slag'}
+ class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0}
+ # 각 클래스별 성능 로그
+ for class_label,class_metrics in metrics['per_class'].items():
+
+ if class_label==2 or class_label==3:
+
+ wandb.log({
+ f"class_{categories[class_label]}_recall" : class_metrics['recall'],
+ f"class_{categories[class_label]}_precision" : class_metrics['precision'],
+ f"class_{categories[class_label]}_f1_score" : class_metrics['f1_score'],
+ f"class_{categories[class_label]}_average_iou": class_metrics['average_iou'],
+
+ })
+ if total_recall > best_recall:
+ best_recall = total_recall
+
+ model_save_path = f"/content/drive/MyDrive/models/fasterrcnn_resnet50_fpnv2_{Config['TRAIN_BS']}_{Config['VALID_BS']}.pth"
+ torch.save({
+ 'epoch': epoch,
+ 'model_state_dict': model.state_dict(),
+ 'optimizer_state_dict': optimizer.state_dict(),
+ 'lr_scheduler_state_dict': lr_scheduler.state_dict()
+ },model_save_path)
+
+ return_outputs=metrics['total']
+ return return_outputs,class_result
+
if __name__=='__main__':
wandb.init(project='capstone',name='faster-rcnn',reinit=True)
-
+
if torch.cuda.is_available():
device=torch.device('cuda')
-
-
- train_df=pd.read_csv('../../data/annotations/train_total.csv')
- valid_df=pd.read_csv('../../data/annotations/valid_total.csv')
- image_dir='../../data_contrast/after/Image'
+
+
+ train_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/train_total.csv')
+ valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/valid_total.csv')
+ image_dir='/content/drive/MyDrive/data/Image'
+
+
+ result_dir_path=f'/content/drive/MyDrive/result/Faster-RCNN'
+ os.makedirs(result_dir_path,exist_ok=True)
train_dataset=RT_Dataset(train_df,image_dir,transforms=get_transform(train=True))
valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False))
@@ -67,27 +170,32 @@ def get_object_detection_model(num_classes):
shuffle=False,
num_workers=Config['NUM_WORKERS'],
collate_fn=utils.collate_fn,
-
+
)
model=get_object_detection_model(Config['NUM_CLASSES'])
model.to(device)
-
+
wandb.watch(model)
-
+
params = [p for p in model.parameters() if p.requires_grad]
-
+
optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY'])
-
- lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1)
-
-
+
+ lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9)
+
+ best_recall=-100
+
for epoch in range(Config['EPOCHS']):
train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=10)
-
+
lr_scheduler.step()
-
- evaluate(model, valid_dataloader, device=device)
+
+ # valid data
+ return_outputs,class_result = validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler)
+
+ print(f'epoch : {epoch}, output : {return_outputs}')
+
diff --git a/retinanet_/dataset.py b/retinanet_/dataset.py
deleted file mode 100644
index 13513d8..0000000
--- a/retinanet_/dataset.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import torch
-from torch.utils.data import DataLoader, Dataset
-import cv2
-import numpy as np
-
-
-class RT_Dataset(Dataset):
- def __init__(self,dataframe,image_dir,transforms=None):
- super().__init__()
- self.image_ids=dataframe['image_number'].unique()
- self.df=dataframe
- self.image_dir=image_dir
- self.transforms=transforms
-
- def __getitem__(self,index:int):
- image_id=self.image_ids[index]
- records=self.df[self.df['image_number']==image_id]
-
- image=cv2.imread(f'{self.image_dir}/{records["dataset"].values[0]}/{records["image_name"].values[0]}',cv2.IMREAD_COLOR)
- image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB).astype(np.float32)
- image/=255.0
-
- bndbox=np.array(eval(records['bndbox'].values[0]))
- labels=np.array(eval(records['labels'].values[0]))
- boxes=np.zeros((bndbox.shape[0],5))
-
- if bndbox.size>0:
- boxes[:,0:4]=bndbox
- boxes[:,4]=labels
-
- sample={'img':image, 'annot':boxes}
-
- if self.transforms:
- sample=self.transforms(sample)
-
- return sample
-
- def __len__(self) -> int:
- return self.image_ids.shape[0]
\ No newline at end of file
diff --git a/retinanet_/inference.py b/retinanet_/inference.py
new file mode 100644
index 0000000..ddf8176
--- /dev/null
+++ b/retinanet_/inference.py
@@ -0,0 +1,166 @@
+import sys
+sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils')
+import os
+import numpy as np
+import pandas as pd
+
+# for ignoring warnings
+import warnings
+warnings.filterwarnings('ignore')
+
+import cv2
+
+import torch
+import torchvision
+from torchvision import transforms as torchtrans
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+from torchvision.ops import nms
+
+
+from tqdm import tqdm
+import utils
+import transforms as T
+from dataset import RT_Dataset
+from config import Config
+from augment import get_transform
+
+from torchvision.models.detection.ssd import SSDClassificationHead
+from torchvision.models.detection import _utils
+from torchvision.models.detection import SSD300_VGG16_Weights
+
+
+
+
+# model
+def get_object_detection_model(num_classes):
+
+ # load a model pre-trained pre-trained on COCO
+ model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained=True)
+
+ # get number of input features for the classifier
+ in_features = model.roi_heads.box_predictor.cls_score.in_features
+ # replace the pre-trained head with a new one
+ model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
+
+ return model
+
+# valid function
+def valid_fn(val_data_loader, model, device):
+ model.eval()
+ outputs = []
+ ground_truths=[]
+
+ for images,targets in tqdm(val_data_loader):
+
+ images=list(img.to(device) for img in images)
+ targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+
+ output=model(images)
+
+
+ for out,target in zip(output,targets):
+ scores=out['scores'].detach().cpu().numpy()
+ boxes=out['boxes'].detach().cpu().numpy()
+ labels=out['labels'].detach().cpu().numpy()
+
+ keep_idx=nms(boxes,scores,iou_threshold=0.1)
+
+ boxes=boxes[keep_idx]
+ scores=scores[keep_idx]
+ labels=labels[keep_idx]
+
+
+ outputs.append({'boxes': boxes,
+ 'scores': scores,
+ 'labels': labels})
+
+ # ground truth 에 label 추가
+ gt_boxes=target['boxes'].cpu().numpy()
+ gt_labels=target['labels'].cpu().numpy()
+
+ ground_truths.append(list(zip(gt_labels,gt_boxes)))
+
+
+
+ return outputs,ground_truths
+
+
+def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.2):
+ global best_recall
+ outputs, ground_truths = valid_fn(valid_dataloader, model, device)
+ predictions = []
+
+
+ for output in outputs:
+ valid_scores=output['scores']>score_threshold
+
+ valid_boxes = output['boxes'][output['scores'] > score_threshold]
+ valid_labels=output['labels'][output['scores']> score_threshold]
+
+
+ predictions.append(list(zip(valid_labels,valid_boxes)))
+
+
+
+ # utils 모듈에 있는 calculate_metrics 함수를 사용
+ metrics = utils.calculate_metrics(predictions, ground_truths)
+
+ # 전체 성능
+ total_recall=metrics['total']['recall']
+ total_precision=metrics['total']['precision']
+ total_f1_score= metrics['total']['f1_score']
+
+ class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0}
+
+ return_outputs=metrics['total']
+ return return_outputs,class_result
+
+if __name__=='__main__':
+
+
+ if torch.cuda.is_available():
+ device=torch.device('cuda')
+
+
+ valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/test_total.csv')
+ image_dir='/content/drive/MyDrive/data/Image'
+
+
+
+
+ valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False))
+
+
+
+ valid_dataloader=torch.utils.data.DataLoader(
+ valid_dataset,
+ batch_size=Config['VALID_BS'],
+ shuffle=False,
+ num_workers=Config['NUM_WORKERS'],
+ collate_fn=utils.collate_fn,
+
+ )
+
+
+ model=torchvision.models.detection.retinanet_resnet50_fpn_v2(num_classes=4,pretrained=False,pretrained_backbone=True)
+
+
+ model_save_path = "/content/drive/MyDrive/models/retinanet_resnet50_fpnv2_8_4.pth"
+ saved_state=torch.load(model_save_path,map_location=device)
+
+ model.load_state_dict(saved_state['model_state_dict'])
+
+ model.to(device)
+
+
+
+ params = [p for p in model.parameters() if p.requires_grad]
+
+ optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY'])
+
+ lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9)
+
+ # valid data
+ return_outputs,class_result = validate_and_save_best_model(0, model, valid_dataloader, device, optimizer, lr_scheduler)
+
+ print(f'test Total result : {return_outputs} , class result : {class_result} ')
\ No newline at end of file
diff --git a/retinanet_/test.ipynb b/retinanet_/test.ipynb
deleted file mode 100644
index 6c7d865..0000000
--- a/retinanet_/test.ipynb
+++ /dev/null
@@ -1,346 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "\n",
- "os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\"\n",
- "\n",
- "import re\n",
- "import cv2\n",
- "import time\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "\n",
- "\n",
- "import torch\n",
- "import torch.optim as optim\n",
- "import torchvision.transforms as T\n",
- "from torchvision.utils import make_grid \n",
- "from torch.utils.data import DataLoader, Dataset\n",
- "\n",
- "from retinanet.csv_eval_original import evaluate \n",
- "from retinanet import model\n",
- "from retinanet.dataloader import collater, Resizer_512, Augmenter, Normalizer, UnNormalizer,CSVDataset\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "test_dataset=CSVDataset('../annotations_v2/retinanet_test.csv','../annotations_v2/classes.csv',transform=T.Compose([Normalizer(),Resizer_512()]))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "\n",
- "\n",
- "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
- "torch.cuda.empty_cache()\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "DataParallel(\n",
- " (module): ResNet(\n",
- " (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
- " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
- " (layer1): Sequential(\n",
- " (0): Bottleneck(\n",
- " (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " (downsample): Sequential(\n",
- " (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " )\n",
- " )\n",
- " (1): Bottleneck(\n",
- " (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " )\n",
- " (2): Bottleneck(\n",
- " (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " )\n",
- " )\n",
- " (layer2): Sequential(\n",
- " (0): Bottleneck(\n",
- " (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " (downsample): Sequential(\n",
- " (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
- " (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " )\n",
- " )\n",
- " (1): Bottleneck(\n",
- " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " )\n",
- " (2): Bottleneck(\n",
- " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " )\n",
- " (3): Bottleneck(\n",
- " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " )\n",
- " )\n",
- " (layer3): Sequential(\n",
- " (0): Bottleneck(\n",
- " (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " (downsample): Sequential(\n",
- " (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
- " (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " )\n",
- " )\n",
- " (1): Bottleneck(\n",
- " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " )\n",
- " (2): Bottleneck(\n",
- " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " )\n",
- " (3): Bottleneck(\n",
- " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " )\n",
- " (4): Bottleneck(\n",
- " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " )\n",
- " (5): Bottleneck(\n",
- " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " )\n",
- " )\n",
- " (layer4): Sequential(\n",
- " (0): Bottleneck(\n",
- " (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " (downsample): Sequential(\n",
- " (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
- " (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " )\n",
- " )\n",
- " (1): Bottleneck(\n",
- " (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " )\n",
- " (2): Bottleneck(\n",
- " (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
- " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
- " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
- " (relu): ReLU(inplace=True)\n",
- " )\n",
- " )\n",
- " (fpn): PyramidFeatures(\n",
- " (P5_1): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))\n",
- " (P5_upsampled): Upsample(scale_factor=2.0, mode='nearest')\n",
- " (P5_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (P4_1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))\n",
- " (P4_upsampled): Upsample(scale_factor=2.0, mode='nearest')\n",
- " (P4_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (P3_1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))\n",
- " (P3_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (P6): Conv2d(2048, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
- " (P7_1): ReLU()\n",
- " (P7_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
- " )\n",
- " (regressionModel): RegressionModel(\n",
- " (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (act1): ReLU()\n",
- " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (act2): ReLU()\n",
- " (conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (act3): ReLU()\n",
- " (conv4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (act4): ReLU()\n",
- " (output): Conv2d(256, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " )\n",
- " (classificationModel): ClassificationModel(\n",
- " (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (act1): ReLU()\n",
- " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (act2): ReLU()\n",
- " (conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (act3): ReLU()\n",
- " (conv4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (act4): ReLU()\n",
- " (output): Conv2d(256, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (output_act): Sigmoid()\n",
- " )\n",
- " (anchors): Anchors()\n",
- " (regressBoxes): BBoxTransform()\n",
- " (clipBoxes): ClipBoxes()\n",
- " (focalLoss): FocalLoss()\n",
- " )\n",
- ")"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "retinanet = torch.load(\"/home/irteam/junghye-dcloud-dir/pathfinder/pathfinder_ai/models/retinanet_50_epoch3.pt\", map_location=device)\n",
- "\n",
- "# 모델을 평가 모드로 전환합니다.\n",
- "retinanet.eval()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "698/698\n",
- "mAP:\n",
- "Others: 0.0\n",
- "detect된 거 없음\n",
- "Porosity: 0.0\n",
- "detect된 거 없음\n",
- "Slag: 0.0\n",
- "detect된 거 없음\n",
- "Normal: 0\n",
- "detect된 거 없음\n"
- ]
- },
- {
- "ename": "",
- "evalue": "",
- "output_type": "error",
- "traceback": [
- "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click here for more info. View Jupyter log for further details."
- ]
- }
- ],
- "source": [
- "average_precisions = evaluate(test_dataset, retinanet, iou_threshold=0.000000001, score_threshold=0.0000000001, max_detections=1000000,save_path='./')"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "pixt",
- "language": "python",
- "name": "pixt"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/retinanet_/train.py b/retinanet_/train.py
index 66f3760..ee45437 100644
--- a/retinanet_/train.py
+++ b/retinanet_/train.py
@@ -1,193 +1,191 @@
+import sys
+sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils')
import os
-
-os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
-
-import re
-import cv2
-import time
import numpy as np
import pandas as pd
+# for ignoring warnings
+import warnings
+warnings.filterwarnings('ignore')
-import torch
-import torch.optim as optim
-import torchvision.transforms as T
-from torchvision.utils import make_grid
-from torch.utils.data import DataLoader, Dataset
+import cv2
-from retinanet import model
-from retinanet.dataloader import collater, Resizer, Augmenter, Normalizer, UnNormalizer,CSVDataset,AspectRatioBasedSampler
-from config import Config
+import torch
+import torchvision
+from torchvision import transforms as torchtrans
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+from torchvision.ops import nms
+
+from engine import train_one_epoch,evaluate
+from tqdm import tqdm
+import utils
+import transforms as T
from dataset import RT_Dataset
+from config import Config
+from augment import get_transform
import wandb
-
-
-
-best_loss=float('inf')
-
-def train_one_epoch(retinanet,epoch_num,optimizer,scheduler, train_data_loader,device):
-
- print("Epoch - {} Started".format(epoch_num))
- st = time.time()
-
- retinanet.train()
-
- epoch_loss = []
-
- for iter_num, data in enumerate(train_data_loader):
-
- # Reseting gradients after each iter
- optimizer.zero_grad()
-
- # Forward
- #print(f'{data["annot"]}')
- classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda().float()])
-
- # Calculating Loss
- classification_loss = classification_loss.mean()
- regression_loss = regression_loss.mean()
-
- loss = classification_loss + regression_loss
-
- if bool(loss == 0):
- continue
-
- # Calculating Gradients
- loss.backward()
-
- # Gradient Clipping
- torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
-
- # Updating Weights
- optimizer.step()
-
- #Epoch Loss
- epoch_loss.append(float(loss))
-
- print(
- 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
- epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(epoch_loss)))
-
- del classification_loss
- del regression_loss
-
- wandb.log({
- 'avg_train_loss':round(np.mean(epoch_loss),4)
- })
- # Update the learning rate
- if lr_scheduler is not None:
- lr_scheduler.step(np.mean(epoch_loss))
-
- et = time.time()
- print("\n Total Time - {}\n".format(int(et - st)))
-
-
-
-def valid_one_epoch(retinanet,epoch_num, valid_data_loader,device):
- global best_loss #가장 좋은 손실값 업데이트하기 위해 global 변수로 선언
-
- print("Epoch - {} Started".format(epoch_num))
- st = time.time()
-
- epoch_loss = []
-
- for iter_num, data in enumerate(valid_data_loader):
-
- with torch.no_grad():
-
- # Forward
- classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda().float()])
-
- # Calculating Loss
- classification_loss = classification_loss.mean()
- regression_loss = regression_loss.mean()
- loss = classification_loss + regression_loss
-
- #Epoch Loss
- epoch_loss.append(float(loss))
-
- print(
- 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
- epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(epoch_loss)))
-
- del classification_loss
- del regression_loss
-
- avg_epoch_loss=np.mean(epoch_loss)
- wandb.log({
- 'avg_val_loss': round(avg_epoch_loss,4)
- })
-
- if avg_epoch_loss score_threshold
+
+ valid_boxes = output['boxes'][output['scores'] > score_threshold]
+ valid_labels=output['labels'][output['scores']> score_threshold]
+
+
+ predictions.append(list(zip(valid_labels,valid_boxes)))
+
+ print(f'pred : {predictions[0:5]}\n')
+ print(f'gt : {ground_truths[0:5]}')
+
+ # utils 모듈에 있는 calculate_metrics 함수를 사용
+ metrics = utils.calculate_metrics(predictions, ground_truths)
+
+ # 전체 성능
+ total_recall=metrics['total']['recall']
+ total_precision=metrics['total']['precision']
+ total_f1_score= metrics['total']['f1_score']
+
+ #wandb.log({"epoch": epoch, "recall": metrics['recall']}) # Recall을 W&B에 로그합니다.
+ wandb.log({"epoch": epoch, "total_recall": total_recall, "total_precision": total_precision,"total_f1_score":total_f1_score})
+
+ categories={2: 'Porosity', 3: 'Slag'}
+ class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0}
+ # 각 클래스별 성능 로그
+ for class_label,class_metrics in metrics['per_class'].items():
+ #class_label=class_label.item()
+ if class_label==2 or class_label==3:
+
+ wandb.log({
+ f"class_{categories[class_label]}_recall" : class_metrics['recall'],
+ f"class_{categories[class_label]}_precision" : class_metrics['precision'],
+ f"class_{categories[class_label]}_f1_score" : class_metrics['f1_score'],
+ f"class_{categories[class_label]}_average_iou": class_metrics['average_iou'],
+
+ })
+ if total_recall > best_recall:
+ best_recall = total_recall
+
+ model_save_path = f"/content/drive/MyDrive/models/retinanet_resnet50_fpnv2_{Config['TRAIN_BS']}_{Config['VALID_BS']}.pth"
+ torch.save({
+ 'epoch': epoch,
+ 'model_state_dict': model.state_dict(),
+ 'optimizer_state_dict': optimizer.state_dict(),
+ 'lr_scheduler_state_dict': lr_scheduler.state_dict()
+ }, model_save_path)
+ # wandb.save(model_save_path) # 모델 파일을 W&B에 저장합니다.
+
+ return_outputs=metrics['total']
+ return return_outputs,class_result
+
if __name__=='__main__':
- # wandb project
- wandb.init(project='capstone',name='retinanet_1123',reinit=True)
- device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
- torch.cuda.empty_cache()
-
- # dataset
- #train_df=pd.read_csv('/home/irteam/junghye-dcloud-dir/pathfinder/pathfinder_ai/annotations_v2/train_total.csv')
- #valid_df=pd.read_csv('/home/irteam/junghye-dcloud-dir/pathfinder/pathfinder_ai/annotations_v2/valid_total.csv')
- #image_dir='/home/irteam/junghye-dcloud-dir/pathfinder/data_contrast/before/Image'
-
- #train_dataset=RT_Dataset(train_df,image_dir,transforms=T.Compose([Augmenter(),Normalizer(),Resizer()]))
- #valid_dataset=RT_Dataset(valid_df,image_dir,transforms=T.Compose([Normalizer(),Resizer()]))
-
- # csv dataset
-
- train_dataset=CSVDataset('./annotations_v2/retinanet_train.csv','./annotations_v2/classes.csv',transform=T.Compose([Augmenter(),Normalizer()]))
- valid_dataset=CSVDataset('./annotations_v2/retinanet_valid.csv','./annotations_v2/classes.csv',transform=T.Compose([Normalizer()]))
-
- sampler=AspectRatioBasedSampler(train_dataset,batch_size=Config['TRAIN_BS'],drop_last=False)
- sampler_val=AspectRatioBasedSampler(valid_dataset,batch_size=Config['VALID_BS'],drop_last=False)
-
-
- train_data_loader = DataLoader(
+ wandb.init(project='capstone',name='RetinaNet',reinit=True)
+
+ if torch.cuda.is_available():
+ device=torch.device('cuda')
+
+
+ train_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/train_total.csv')
+ valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/valid_total.csv')
+ image_dir='/content/drive/MyDrive/data/Image'
+
+
+ result_dir_path=f'/content/drive/MyDrive/result/RetinaNet'
+ os.makedirs(result_dir_path,exist_ok=True)
+
+ train_dataset=RT_Dataset(train_df,image_dir,transforms=get_transform(train=True))
+ valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False))
+
+ train_dataloader=torch.utils.data.DataLoader(
train_dataset,
- num_workers = Config['NUM_WORKERS'],
- collate_fn = collater,
- batch_sampler=sampler,
+ batch_size=Config['TRAIN_BS'],
+ shuffle=True,
+ num_workers=Config['NUM_WORKERS'],
+ collate_fn=utils.collate_fn,
)
- valid_data_loader = DataLoader(
+ valid_dataloader=torch.utils.data.DataLoader(
valid_dataset,
- num_workers = Config['NUM_WORKERS'],
- collate_fn = collater,
- batch_sampler=sampler_val,
+ batch_size=Config['VALID_BS'],
+ shuffle=False,
+ num_workers=Config['NUM_WORKERS'],
+ collate_fn=utils.collate_fn,
+
)
- # load model
- retinanet = model.resnet50(num_classes = Config['NUM_CLASSES'], pretrained = True)
- retinanet.to(device)
- wandb.watch(retinanet)
-
-
- #optimizer = torch.optim.Adam(retinanet.parameters(), lr = Config['LR'],weight_decay=Config['WEIGHT_DECAY'])
- optimizer=torch.optim.Adam(retinanet.parameters(),lr=Config['LR'])
-
- #lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 5, gamma=0.5)
- lr_scheduler=torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,patience=3,verbose=True)
-
- ### Training Loop
- for epoch in range(Config['EPOCHS']):
-
- # Call train function
- train_one_epoch(retinanet,epoch, optimizer,lr_scheduler,train_data_loader,device=device)
- # Call valid function
- valid_one_epoch(retinanet,epoch, valid_data_loader,device=device)
+
+ #model=get_object_detection_model(Config['NUM_CLASSES'])
+ model=torchvision.models.detection.retinanet_resnet50_fpn_v2(num_classes=4,pretrained=False,pretrained_backbone=True)
+
+ model.to(device)
+
+ wandb.watch(model)
+
+ params = [p for p in model.parameters() if p.requires_grad]
+
+ optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY'])
+
+ #lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.9)
+ lr_scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15)
+
+ best_recall=-100
+ with open(f'{result_dir_path}/{Config["TRAIN_BS"]}_{Config["VALID_BS"]}_{Config["EPOCHS"]}.txt','w') as f:
+ for epoch in range(Config['EPOCHS']):
+ train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=10)
+
+ lr_scheduler.step()
+
+ # valid data
+ return_outputs,class_result = validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler)
+
+ print(f'epoch : {epoch}, output : {return_outputs}')
+ f.write(f"Epoch {epoch} Total result:{return_outputs}, class_result : {class_result}\n")
\ No newline at end of file
diff --git a/ssd300/config.py b/ssd300/config.py
new file mode 100644
index 0000000..0978d46
--- /dev/null
+++ b/ssd300/config.py
@@ -0,0 +1,14 @@
+Config=dict(
+ EPOCHS=150,
+ LR=0.0001,
+ IMG_SIZE=512,
+ DR_RATE=0.35,
+ NUM_CLASSES=3,
+ TRAIN_BS=4,
+ VALID_BS=2,
+ NUM_WORKERS=4,
+ WEIGHT_DECAY=0.0005,
+ CONTRAST='AFTER'
+
+)
+
diff --git a/ssd300/inference.py b/ssd300/inference.py
new file mode 100644
index 0000000..1399fe4
--- /dev/null
+++ b/ssd300/inference.py
@@ -0,0 +1,173 @@
+import sys
+sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils')
+import os
+import numpy as np
+import pandas as pd
+
+# for ignoring warnings
+import warnings
+warnings.filterwarnings('ignore')
+
+import cv2
+
+import torch
+import torchvision
+from torchvision import transforms as torchtrans
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+from torchvision.ops import nms
+
+
+from tqdm import tqdm
+import utils
+import transforms as T
+from dataset import RT_Dataset
+from config import Config
+from augment import get_transform
+
+from torchvision.models.detection.ssd import SSDClassificationHead
+from torchvision.models.detection import _utils
+from torchvision.models.detection import SSD300_VGG16_Weights
+
+
+# model
+def get_object_detection_model(num_classes=4,size=300):
+ # Load the Torchvision pretrained model.
+ model = torchvision.models.detection.ssd300_vgg16(
+ weights=SSD300_VGG16_Weights.COCO_V1
+ )
+ # Retrieve the list of input channels.
+ in_channels = _utils.retrieve_out_channels(model.backbone, (size, size))
+ # List containing number of anchors based on aspect ratios.
+ num_anchors = model.anchor_generator.num_anchors_per_location()
+ # The classification head.
+ model.head.classification_head = SSDClassificationHead(
+ in_channels=in_channels,
+ num_anchors=num_anchors,
+ num_classes=num_classes,
+ )
+ # Image size for transforms.
+ model.transform.min_size = (size,)
+ model.transform.max_size = size
+ return model
+
+# valid function
+def valid_fn(val_data_loader, model, device):
+ model.eval()
+ outputs = []
+ ground_truths=[]
+
+ for images,targets in tqdm(val_data_loader):
+
+ images=list(img.to(device) for img in images)
+ targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+
+ output=model(images)
+
+
+ for out,target in zip(output,targets):
+ scores=out['scores'].detach().cpu().numpy()
+ boxes=out['boxes'].detach().cpu().numpy()
+ labels=out['labels'].detach().cpu().numpy()
+
+ keep_idx=nms(boxes,scores,iou_threshold=0.1)
+
+ boxes=boxes[keep_idx]
+ scores=scores[keep_idx]
+ labels=labels[keep_idx]
+
+
+ outputs.append({'boxes': boxes,
+ 'scores': scores,
+ 'labels': labels})
+
+ # ground truth 에 label 추가
+ gt_boxes=target['boxes'].cpu().numpy()
+ gt_labels=target['labels'].cpu().numpy()
+
+ ground_truths.append(list(zip(gt_labels,gt_boxes)))
+
+
+
+
+ return outputs,ground_truths
+
+
+def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.2):
+ global best_recall
+ outputs, ground_truths = valid_fn(valid_dataloader, model, device)
+ predictions = []
+
+
+ for output in outputs:
+ valid_scores=output['scores']>score_threshold
+
+ valid_boxes = output['boxes'][output['scores'] > score_threshold]
+ valid_labels=output['labels'][output['scores']> score_threshold]
+
+
+ predictions.append(list(zip(valid_labels,valid_boxes)))
+
+
+
+ # utils 모듈에 있는 calculate_metrics 함수를 사용
+ metrics = utils.calculate_metrics(predictions, ground_truths)
+
+ # 전체 성능
+ total_recall=metrics['total']['recall']
+ total_precision=metrics['total']['precision']
+ total_f1_score= metrics['total']['f1_score']
+
+ class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0}
+
+ return_outputs=metrics['total']
+ return return_outputs,class_result
+
+if __name__=='__main__':
+
+
+ if torch.cuda.is_available():
+ device=torch.device('cuda')
+
+
+ valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/test_total.csv')
+ image_dir='/content/drive/MyDrive/data/Image'
+
+
+
+
+ valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False))
+
+
+
+ valid_dataloader=torch.utils.data.DataLoader(
+ valid_dataset,
+ batch_size=Config['VALID_BS'],
+ shuffle=False,
+ num_workers=Config['NUM_WORKERS'],
+ collate_fn=utils.collate_fn,
+
+ )
+
+
+ model=get_object_detection_model(Config['NUM_CLASSES'],Config['IMG_SIZE'])
+
+
+ model_save_path = "/content/drive/MyDrive/models/SSD300_8_4.pth"
+ saved_state=torch.load(model_save_path,map_location=device)
+
+ model.load_state_dict(saved_state['model_state_dict'])
+
+ model.to(device)
+
+
+
+ params = [p for p in model.parameters() if p.requires_grad]
+
+ optimizer = torch.optim.SGD(params, lr=Config['LR'], momentum=0.9, weight_decay=Config['WEIGHT_DECAY'])
+
+ lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9)
+
+ # valid data
+ return_outputs,class_result = validate_and_save_best_model(0, model, valid_dataloader, device, optimizer, lr_scheduler)
+
+ print(f'test Total result : {return_outputs} , class result : {class_result} ')
\ No newline at end of file
diff --git a/ssd300/train.py b/ssd300/train.py
new file mode 100644
index 0000000..fd95745
--- /dev/null
+++ b/ssd300/train.py
@@ -0,0 +1,205 @@
+import sys
+sys.path.append('/content/drive/MyDrive/Pathfinder-Models/utils')
+import os
+import numpy as np
+import pandas as pd
+
+# for ignoring warnings
+import warnings
+warnings.filterwarnings('ignore')
+
+import cv2
+
+import torch
+import torchvision
+from torchvision import transforms as torchtrans
+#from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+import torchvision.models.detection.ssd as ssd
+from torchvision.ops import nms
+
+from engine import train_one_epoch,evaluate
+from tqdm import tqdm
+import utils
+import transforms as T
+from dataset import RT_Dataset
+from config import Config
+from augment import get_transform,get_ssd_transform
+import wandb
+
+from torchvision.models.detection.ssd import SSDClassificationHead
+from torchvision.models.detection import _utils
+from torchvision.models.detection import SSD300_VGG16_Weights
+
+
+# model
+def get_object_detection_model(num_classes=4,size=300):
+ # Load the Torchvision pretrained model.
+ model = torchvision.models.detection.ssd300_vgg16(
+ weights=SSD300_VGG16_Weights.COCO_V1
+ )
+ # Retrieve the list of input channels.
+ in_channels = _utils.retrieve_out_channels(model.backbone, (size, size))
+ # List containing number of anchors based on aspect ratios.
+ num_anchors = model.anchor_generator.num_anchors_per_location()
+ # The classification head.
+ model.head.classification_head = SSDClassificationHead(
+ in_channels=in_channels,
+ num_anchors=num_anchors,
+ num_classes=num_classes,
+ )
+ # Image size for transforms.
+ model.transform.min_size = (size,)
+ model.transform.max_size = size
+ return model
+
+
+# valid function
+def valid_fn(val_data_loader, model, device):
+ model.eval()
+ outputs = []
+ ground_truths=[]
+
+ for images,targets in tqdm(val_data_loader):
+
+ images=list(img.to(device) for img in images)
+ targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+
+ output=model(images)
+
+
+ for out,target in zip(output,targets):
+ scores=out['scores'].detach().cpu().numpy()
+ boxes=out['boxes'].detach().cpu().numpy()
+ labels=out['labels'].detach().cpu().numpy()
+
+ keep_idx=nms(boxes,scores,iou_threshold=0.1)
+
+ boxes=boxes[keep_idx]
+ scores=scores[keep_idx]
+ labels=labels[keep_idx]
+
+
+ outputs.append({'boxes': boxes,
+ 'scores': scores,
+ 'labels': labels})
+
+ # label 포함시켜 ground truths에 추가
+ gt_boxes=target['boxes'].cpu().numpy()
+ gt_labels=target['labels'].cpu().numpy()
+
+
+ ground_truths.append(list(zip(gt_labels,gt_boxes)))
+
+
+ return outputs,ground_truths
+
+
+def validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler, score_threshold=0.2):
+ global best_recall
+ outputs, ground_truths = valid_fn(valid_dataloader, model, device)
+ predictions = []
+ for output in outputs:
+ valid_boxes = output['boxes'][output['scores'] > score_threshold]
+ valid_labels=output['labels'][output['scores']> score_threshold]
+ #predictions.append(valid_boxes)
+ predictions.append(list(zip(valid_labels,valid_boxes)))
+
+ # utils 모듈에 있는 calculate_metrics 함수를 사용
+ metrics = utils.calculate_metrics(predictions, ground_truths)
+
+ # 전체 성능
+ total_recall=metrics['total']['recall']
+ total_precision=metrics['total']['precision']
+ total_f1_score= metrics['total']['f1_score']
+
+
+ wandb.log({"epoch": epoch, "total_recall": total_recall, "total_precision": total_precision,"total_f1_score":total_f1_score})
+
+ categories={2: 'Porosity', 3: 'Slag'}
+ class_result = {class_label: metrics_val for class_label, metrics_val in metrics['per_class'].items() if class_label != 0}
+ # 각 클래스별 성능 로그
+ for class_label,class_metrics in metrics['per_class'].items():
+
+ if class_label==2 or class_label==3:
+
+ wandb.log({
+ f"class_{categories[class_label]}_recall" : class_metrics['recall'],
+ f"class_{categories[class_label]}_precision" : class_metrics['precision'],
+ f"class_{categories[class_label]}_f1_score" : class_metrics['f1_score'],
+ f"class_{categories[class_label]}_average_iou": class_metrics['average_iou'],
+
+ })
+ if total_recall > best_recall:
+ best_recall = total_recall
+
+ model_save_path = f"/content/drive/MyDrive/models/ssd300_real_{Config['TRAIN_BS']}_{Config['VALID_BS']}.pth"
+ torch.save({
+ 'epoch': epoch,
+ 'model_state_dict': model.state_dict(),
+ 'optimizer_state_dict': optimizer.state_dict(),
+ 'lr_scheduler_state_dict': lr_scheduler.state_dict()
+ }, model_save_path)
+
+
+ return_outputs=metrics['total']
+ return return_outputs,class_result
+
+if __name__=='__main__':
+ wandb.init(project='capstone',name='SSD300_VGG16',reinit=True)
+
+ if torch.cuda.is_available():
+ device=torch.device('cuda')
+
+
+ train_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/train_total.csv')
+ valid_df=pd.read_csv('/content/drive/MyDrive/data/annotations_v2/valid_total.csv')
+ image_dir='/content/drive/MyDrive/data/Image'
+
+ result_dir_path=f'/content/drive/MyDrive/result/{Config["MODEL"]}'
+ os.makedirs(result_dir_path,exist_ok=True)
+
+ train_dataset=RT_Dataset(train_df,image_dir,transforms=get_transform(train=True))
+ valid_dataset=RT_Dataset(valid_df,image_dir,transforms=get_transform(train=False))
+
+ train_dataloader=torch.utils.data.DataLoader(
+ train_dataset,
+ batch_size=Config['TRAIN_BS'],
+ shuffle=True,
+ num_workers=Config['NUM_WORKERS'],
+ collate_fn=utils.collate_fn,
+ )
+
+ valid_dataloader=torch.utils.data.DataLoader(
+ valid_dataset,
+ batch_size=Config['VALID_BS'],
+ shuffle=False,
+ num_workers=Config['NUM_WORKERS'],
+ collate_fn=utils.collate_fn,
+
+ )
+
+ model=get_object_detection_model(Config['NUM_CLASSES'],Config['IMG_SIZE'])
+ model.to(device)
+
+ wandb.watch(model)
+
+ params = [p for p in model.parameters() if p.requires_grad]
+
+
+
+ optimizer = torch.optim.Adam(params, lr=Config['LR'])
+
+ lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.9) # 3epoch마다 학습률 10%씩 감소
+
+ best_recall=-100
+ with open(f'{result_dir_path}/{Config["TRAIN_BS"]}_{Config["VALID_BS"]}_{Config["EPOCHS"]}.txt','w') as f:
+ for epoch in range(Config['EPOCHS']):
+ train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=10)
+
+ lr_scheduler.step()
+
+ # valid data
+ return_outputs,class_result = validate_and_save_best_model(epoch, model, valid_dataloader, device, optimizer, lr_scheduler)
+
+ print(f'epoch : {epoch}, output : {return_outputs}')
+ f.write(f"Epoch {epoch} Total result:{return_outputs}, class_result : {class_result}\n")
\ No newline at end of file
diff --git a/faster-rcnn/augment.py b/utils/augment.py
similarity index 58%
rename from faster-rcnn/augment.py
rename to utils/augment.py
index c40b351..7dc1442 100644
--- a/faster-rcnn/augment.py
+++ b/utils/augment.py
@@ -7,14 +7,18 @@ def get_transform(train):
if train:
return A.Compose([
-
- A.HorizontalFlip(p=0.5),
- A.RandomRotate90(p=0.5),
+ A.Resize(512,512),
+ A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
+ A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5),
+
+ A.RandomGamma(gamma_limit=(80, 120), p=0.5),
A.VerticalFlip(p=0.5),
# ToTensorV2 converts image to pytorch tensor without div by 255
ToTensorV2(p=1.0)
], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})
else:
return A.Compose([
+ A.Resize(512,512),
ToTensorV2(p=1.0)
- ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})
\ No newline at end of file
+ ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})
+
diff --git a/faster-rcnn/coco_eval.py b/utils/coco_eval.py
similarity index 100%
rename from faster-rcnn/coco_eval.py
rename to utils/coco_eval.py
diff --git a/faster-rcnn/coco_utils.py b/utils/coco_utils.py
similarity index 100%
rename from faster-rcnn/coco_utils.py
rename to utils/coco_utils.py
diff --git a/faster-rcnn/dataset.py b/utils/dataset.py
similarity index 93%
rename from faster-rcnn/dataset.py
rename to utils/dataset.py
index 7b27186..3b01eb3 100644
--- a/faster-rcnn/dataset.py
+++ b/utils/dataset.py
@@ -33,16 +33,20 @@ def __getitem__(self,index:int):
if len(bndboxes)>0:
boxes = [[box[0] , box[1], box[2], box[3]] for box in bndboxes]
- labels=[int(label)+1 for label in labels]
+ #labels=[int(label) for label in labels]
boxes=torch.as_tensor(boxes,dtype=torch.float32)
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
- labels = torch.as_tensor(labels, dtype=torch.int64)
+ labels=[int(label)+1 for label in labels]
+ #labels = torch.as_tensor(labels, dtype=torch.int64)
else:
boxes = torch.zeros((0,4),dtype=torch.float32)
labels = torch.zeros(0, dtype=torch.int64) # 더미 라벨
area = torch.zeros(0, dtype=torch.float32) # 더미 면적
+
+
+ labels = torch.as_tensor(labels, dtype=torch.int64)
#다 crowd x
iscrowd = torch.zeros((len(boxes),), dtype=torch.int64)
diff --git a/faster-rcnn/engine.py b/utils/engine.py
similarity index 91%
rename from faster-rcnn/engine.py
rename to utils/engine.py
index bc50e60..9de2ae7 100644
--- a/faster-rcnn/engine.py
+++ b/utils/engine.py
@@ -10,22 +10,22 @@
import utils
import wandb
-def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
+def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq,warmup_epochs=4):
model.train()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
header = 'Epoch: [{}]'.format(epoch)
lr_scheduler = None
- if epoch == 0:
+ if epoch best_iou:
+ best_iou = iou
+ best_match = i
+
+ # 모든 gt를 돌고 난 후 best_iou > iou_threshold이고 best_match일 경우
+ # 1) 해당 gt가 best_match가 없을 경우
+ if best_iou > iou_threshold and best_match is not None:
+ if not matched_gt[best_match]:
+ update_class_stats(class_stats, pred_class, 'TP', best_iou)
+ matched_gt[best_match] = True
+ else:
+ update_class_stats(class_stats, pred_class, 'FP') # 이미 임자가 있는데 잘못고름
+ else:
+ update_class_stats(class_stats, pred_class, 'FP')
+
+ for i, (gt_class, _) in enumerate(gt_boxes):
+ if not matched_gt[i]:
+ update_class_stats(class_stats, gt_class, 'FN')
+
+ print(f'class_stats : {class_stats}')
+ return calculate_classwise_metrics(class_stats)
+
+def update_class_stats(stats, cls, update_type, iou_score=0):
+ if cls not in stats:
+ stats[cls] = {'TP': 0, 'FP': 0, 'FN': 0, 'total_iou': 0}
+
+ if update_type == 'TP':
+ stats[cls]['TP'] += 1
+ stats[cls]['total_iou'] += iou_score
+ elif update_type == 'FP':
+ stats[cls]['FP'] += 1
+ elif update_type == 'FN':
+ stats[cls]['FN'] += 1
+
+def calculate_classwise_metrics(stats):
+ class_metrics = {}
+ total_TP, total_FP, total_FN, total_iou = 0, 0, 0, 0
+ for cls, counts in stats.items():
+ precision = counts['TP'] / (counts['TP'] + counts['FP']) if (counts['TP'] + counts['FP']) > 0 else 0
+ recall = counts['TP'] / (counts['TP'] + counts['FN']) if (counts['TP'] + counts['FN']) > 0 else 0
+ f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
+ average_iou = counts['total_iou'] / counts['TP'] if counts['TP'] > 0 else 0
+
+ class_metrics[cls] = {'precision': precision, 'recall': recall, 'f1_score': f1_score, 'average_iou': average_iou}
+
+ total_TP += counts['TP']
+ total_FP += counts['FP']
+ total_FN += counts['FN']
+ total_iou += counts['total_iou']
+
+ # 전체 성능 계산
+ total_precision = total_TP / (total_TP + total_FP) if (total_TP + total_FP) > 0 else 0
+ total_recall = total_TP / (total_TP + total_FN) if (total_TP + total_FN) > 0 else 0
+ if total_precision+total_recall==0:
+ total_f1_score=0
+ else:
+ total_f1_score = 2 * total_precision * total_recall / (total_precision + total_recall) if (total_precision + total_recall) > 0 else 0
+ total_average_iou = total_iou / total_TP if total_TP > 0 else 0
+
+ return {'total': {'precision': total_precision, 'recall': total_recall, 'f1_score': total_f1_score, 'average_iou': total_average_iou}, 'per_class': class_metrics}
+
+
+
+# IoU 계산 함수
+def calculate_iou(box1, box2):
+ """
+ Calculate the Intersection over Union (IoU) of two bounding boxes.
+ """
+
+ x1, y1, x2, y2 = box1
+ x1g, y1g, x2g, y2g = box2
+
+ # Calculate area of intersection
+ xi1 = max(x1, x1g)
+ yi1 = max(y1, y1g)
+ xi2 = min(x2, x2g)
+ yi2 = min(y2, y2g)
+ intersection_area = max(xi2 - xi1, 0) * max(yi2 - yi1, 0)
+
+ # Calculate area of union
+ box1_area = (x2 - x1) * (y2 - y1)
+ box2_area = (x2g - x1g) * (y2g - y1g)
+ union_area = box1_area + box2_area - intersection_area
+
+ # Calculate IoU
+ iou = intersection_area / union_area if union_area != 0 else 0
+
+ return iou
+
+
class SmoothedValue(object):
"""Track a series of values and provide access to smoothed values over a
window or the global series average.