- update code

maycuatroi · maycuatroi · commit 690b1aa84d01 · 2024-10-08T14:00:43.000+07:00
diff --git a/evo_science/dl/abstract_torch_model.py b/evo_science/dl/abstract_torch_model.py
@@ -1,7 +1,47 @@
+import torch
 import torch.nn as nn
 
 from evo_science.entities.models.abstract_model import AbstractModel
 
 
 class AbstractTorchModel(nn.Module, AbstractModel):
-    pass
+
+    def load_weight(self, checkpoint_path: str):
+        """
+        Load weights from a checkpoint file.
+
+        Args:
+            checkpoint_path (str): Path to the checkpoint file.
+
+        Returns:
+            self: The model instance with loaded weights.
+        """
+        # Load the current model state
+        model_state = self.state_dict()
+
+        # Load the checkpoint
+        checkpoint = torch.load(checkpoint_path, map_location="cpu")
+        checkpoint_state = checkpoint["model"].float().state_dict()
+
+        # Filter and load matching weights
+        compatible_weights = {
+            k: v for k, v in checkpoint_state.items() if k in model_state and v.shape == model_state[k].shape
+        }
+
+        # Update the model with compatible weights
+        self.load_state_dict(compatible_weights, strict=False)
+
+        return self
+
+    def get_criterion(self):
+        raise NotImplementedError("This method must be implemented in the subclass.")
+
+    def clip_gradients(self, max_norm=10.0):
+        """
+        Clip gradients of the model's parameters.
+
+        Args:
+            max_norm (float): The maximum norm value for gradient clipping. Default is 10.0.
+        """
+        parameters = self.parameters()
+        nn.utils.clip_grad_norm_(parameters, max_norm=max_norm)
diff --git a/evo_science/entities/metrics/iou.py b/evo_science/entities/metrics/iou.py
@@ -0,0 +1,53 @@
+from evo_science.entities.metrics.base_metric import BaseMetric
+import torch
+import numpy as np
+
+
+class IOU(BaseMetric):
+    name = "Intersection over Union"
+
+    def _calculate_np(self, y_true, y_pred):
+        return np.mean(np.diag(y_true @ y_pred.T))
+
+    @staticmethod
+    def compute_iou(box1, box2, eps=1e-7):
+        # Returns Complete Intersection over Union (CIoU) of box1(1,4) to box2(n,4)
+
+        # Get the coordinates of bounding boxes
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1.unbind(-1)
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2.unbind(-1)
+
+        # Calculate width and height of boxes
+        w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1
+        w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1
+
+        # Calculate intersection area
+        inter = torch.clamp((torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)), min=0) * torch.clamp(
+            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)), min=0
+        )
+
+        # Calculate union area
+        union = w1 * h1 + w2 * h2 - inter + eps
+
+        # Calculate IoU
+        iou = inter / union
+
+        # Calculate the convex (smallest enclosing box) width and height
+        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)
+        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)
+
+        # Calculate diagonal distance
+        c2 = cw.pow(2) + ch.pow(2) + eps
+
+        # Calculate center distance
+        rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2).pow(2) + (b2_y1 + b2_y2 - b1_y1 - b1_y2).pow(2)) / 4
+
+        # Calculate aspect ratio consistency term
+        v = (4 / (torch.pi**2)) * (torch.atan(w2 / (h2 + eps)) - torch.atan(w1 / (h1 + eps))).pow(2)
+
+        # Calculate alpha for CIoU
+        with torch.no_grad():
+            alpha = v / (v - iou + (1 + eps))
+
+        # Return CIoU
+        return iou - (rho2 / c2 + v * alpha)
diff --git a/evo_science/entities/optimizers/ema.py b/evo_science/entities/optimizers/ema.py
@@ -0,0 +1,40 @@
+import torch
+from torch import nn
+import copy
+from typing import Callable
+
+
+class ExponentialMovingAverage:
+    """
+    Exponential Moving Average (EMA) implementation.
+
+    Maintains a moving average of the model's parameters and buffers.
+    Reference:
+    - https://github.com/rwightman/pytorch-image-models
+    - https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
+    """
+
+    def __init__(self, model: nn.Module, decay: float = 0.9999, tau: float = 2000, updates: int = 0):
+        self.ema_model = copy.deepcopy(model).eval()
+        self.update_count = updates
+        self.decay_fn = self._create_decay_function(decay, tau)
+        self._freeze_ema_params()
+
+    def _create_decay_function(self, decay: float, tau: float) -> Callable[[int], float]:
+        return lambda x: decay * (1 - torch.exp(torch.tensor(-x / tau)).item())
+
+    def _freeze_ema_params(self):
+        for param in self.ema_model.parameters():
+            param.requires_grad_(False)
+
+    def update(self, model: nn.Module):
+        if hasattr(model, "module"):
+            model = model.module
+
+        with torch.no_grad():
+            self.update_count += 1
+            current_decay = self.decay_fn(self.update_count)
+
+            for ema_param, model_param in zip(self.ema_model.state_dict().values(), model.state_dict().values()):
+                if ema_param.dtype.is_floating_point:
+                    ema_param.mul_(current_decay).add_(model_param.detach(), alpha=1 - current_decay)
diff --git a/evo_science/entities/utils/average_meter.py b/evo_science/entities/utils/average_meter.py
@@ -0,0 +1,21 @@
+import numpy as np
+
+
+class AverageMeter:
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.count = 0
+        self.total = 0
+        self.average = 0
+
+    def update(self, value, n=1):
+        if not np.isnan(value):
+            self.count += n
+            self.total += value * n
+            self.average = self.total / self.count
+
+    @property
+    def avg(self):
+        return self.average
diff --git a/evo_science/entities/utils/nms.py b/evo_science/entities/utils/nms.py
@@ -0,0 +1,67 @@
+import torch
+import torchvision
+from time import time
+
+
+class NonMaxSuppression:
+    def __init__(self, conf_threshold, iou_threshold, max_wh=7680, max_det=300, max_nms=30000):
+        self.conf_threshold = conf_threshold
+        self.iou_threshold = iou_threshold
+        self.max_wh = max_wh
+        self.max_det = max_det
+        self.max_nms = max_nms
+
+    def __call__(self, outputs):
+        bs = outputs.shape[0]
+        nc = outputs.shape[1] - 4
+        xc = outputs[:, 4 : 4 + nc].amax(1) > self.conf_threshold
+
+        start = time()
+        limit = 0.5 + 0.05 * bs
+
+        output = [torch.zeros((0, 6), device=outputs.device)] * bs
+        for index, x in enumerate(outputs):
+            x = x.transpose(0, -1)[xc[index]]
+
+            if not x.shape[0]:
+                continue
+
+            x = self._process_candidates(x, nc)
+
+            if not x.shape[0]:
+                continue
+            x = x[x[:, 4].argsort(descending=True)[: self.max_nms]]
+
+            x = self._batched_nms(x)
+
+            output[index] = x
+            if (time() - start) > limit:
+                break
+
+        return output
+
+    def _process_candidates(self, x, nc):
+        box, cls = x.split((4, nc), 1)
+        box = self._wh2xy(box)
+        if nc > 1:
+            i, j = (cls > self.conf_threshold).nonzero(as_tuple=False).T
+            x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float()), 1)
+        else:
+            conf, j = cls.max(1, keepdim=True)
+            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > self.conf_threshold]
+        return x
+
+    def _batched_nms(self, x):
+        c = x[:, 5:6] * self.max_wh
+        boxes, scores = x[:, :4] + c, x[:, 4]
+        i = torchvision.ops.nms(boxes, scores, self.iou_threshold)
+        return x[i[: self.max_det]]
+
+    @staticmethod
+    def _wh2xy(x):
+        y = x.clone()
+        y[:, 0] = x[:, 0] - x[:, 2] / 2
+        y[:, 1] = x[:, 1] - x[:, 3] / 2
+        y[:, 2] = x[:, 0] + x[:, 2] / 2
+        y[:, 3] = x[:, 1] + x[:, 3] / 2
+        return y
diff --git a/evo_science/packages/yolo/losses/assigner.py b/evo_science/packages/yolo/losses/assigner.py
@@ -0,0 +1,101 @@
+from torch import nn
+import torch
+
+
+class Assigner(nn.Module):
+    def __init__(self, top_k=13, nc=80, alpha=1.0, beta=6.0, eps=1e-9):
+        super().__init__()
+        self.top_k = top_k
+        self.nc = nc
+        self.alpha = alpha
+        self.beta = beta
+        self.eps = eps
+
+    @torch.no_grad()
+    def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt):
+        batch_size = pd_scores.size(0)
+        num_max_boxes = gt_bboxes.size(1)
+
+        if num_max_boxes == 0:
+            device = gt_bboxes.device
+            return (
+                torch.full_like(pd_scores[..., 0], self.nc).to(device),
+                torch.zeros_like(pd_bboxes).to(device),
+                torch.zeros_like(pd_scores).to(device),
+                torch.zeros_like(pd_scores[..., 0]).to(device),
+                torch.zeros_like(pd_scores[..., 0]).to(device),
+            )
+
+        num_anchors = anc_points.shape[0]
+        shape = gt_bboxes.shape
+        lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2)
+        mask_in_gts = torch.cat((anc_points[None] - lt, rb - anc_points[None]), dim=2)
+        mask_in_gts = mask_in_gts.view(shape[0], shape[1], num_anchors, -1).amin(3).gt_(self.eps)
+        na = pd_bboxes.shape[-2]
+        gt_mask = (mask_in_gts * mask_gt).bool()  # b, max_num_obj, h*w
+        overlaps = torch.zeros([batch_size, num_max_boxes, na], dtype=pd_bboxes.dtype, device=pd_bboxes.device)
+        bbox_scores = torch.zeros([batch_size, num_max_boxes, na], dtype=pd_scores.dtype, device=pd_scores.device)
+
+        ind = torch.zeros([2, batch_size, num_max_boxes], dtype=torch.long)  # 2, b, max_num_obj
+        ind[0] = torch.arange(end=batch_size).view(-1, 1).expand(-1, num_max_boxes)  # b, max_num_obj
+        ind[1] = gt_labels.squeeze(-1)  # b, max_num_obj
+        bbox_scores[gt_mask] = pd_scores[ind[0], :, ind[1]][gt_mask]  # b, max_num_obj, h*w
+
+        pd_boxes = pd_bboxes.unsqueeze(1).expand(-1, num_max_boxes, -1, -1)[gt_mask]
+        gt_boxes = gt_bboxes.unsqueeze(2).expand(-1, -1, na, -1)[gt_mask]
+        overlaps[gt_mask] = compute_iou(gt_boxes, pd_boxes).squeeze(-1).clamp_(0)
+
+        align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta)
+
+        top_k_mask = mask_gt.expand(-1, -1, self.top_k).bool()
+        top_k_metrics, top_k_indices = torch.topk(align_metric, self.top_k, dim=-1, largest=True)
+        if top_k_mask is None:
+            top_k_mask = (top_k_metrics.max(-1, keepdim=True)[0] > self.eps).expand_as(top_k_indices)
+        top_k_indices.masked_fill_(~top_k_mask, 0)
+
+        mask_top_k = torch.zeros(align_metric.shape, dtype=torch.int8, device=top_k_indices.device)
+        ones = torch.ones_like(top_k_indices[:, :, :1], dtype=torch.int8, device=top_k_indices.device)
+        for k in range(self.top_k):
+            mask_top_k.scatter_add_(-1, top_k_indices[:, :, k : k + 1], ones)
+        mask_top_k.masked_fill_(mask_top_k > 1, 0)
+        mask_top_k = mask_top_k.to(align_metric.dtype)
+        mask_pos = mask_top_k * mask_in_gts * mask_gt
+
+        fg_mask = mask_pos.sum(-2)
+        if fg_mask.max() > 1:
+            mask_multi_gts = (fg_mask.unsqueeze(1) > 1).expand(-1, num_max_boxes, -1)
+            max_overlaps_idx = overlaps.argmax(1)
+
+            is_max_overlaps = torch.zeros(mask_pos.shape, dtype=mask_pos.dtype, device=mask_pos.device)
+            is_max_overlaps.scatter_(1, max_overlaps_idx.unsqueeze(1), 1)
+
+            mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos).float()
+            fg_mask = mask_pos.sum(-2)
+        target_gt_idx = mask_pos.argmax(-2)
+
+        # Assigned target
+        index = torch.arange(end=batch_size, dtype=torch.int64, device=gt_labels.device)[..., None]
+        target_index = target_gt_idx + index * num_max_boxes
+        target_labels = gt_labels.long().flatten()[target_index]
+
+        target_bboxes = gt_bboxes.view(-1, gt_bboxes.shape[-1])[target_index]
+
+        # Assigned target scores
+        target_labels.clamp_(0)
+
+        target_scores = torch.zeros(
+            (target_labels.shape[0], target_labels.shape[1], self.nc), dtype=torch.int64, device=target_labels.device
+        )
+        target_scores.scatter_(2, target_labels.unsqueeze(-1), 1)
+
+        fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.nc)
+        target_scores = torch.where(fg_scores_mask > 0, target_scores, 0)
+
+        # Normalize
+        align_metric *= mask_pos
+        pos_align_metrics = align_metric.amax(dim=-1, keepdim=True)
+        pos_overlaps = (overlaps * mask_pos).amax(dim=-1, keepdim=True)
+        norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).amax(-2).unsqueeze(-1)
+        target_scores = target_scores * norm_align_metric
+
+        return target_bboxes, target_scores, fg_mask.bool()
diff --git a/evo_science/packages/yolo/losses/box_loss.py b/evo_science/packages/yolo/losses/box_loss.py
@@ -0,0 +1,43 @@
+from torch import nn
+import torch
+from torch.nn import functional as F
+from evo_science.entities.metrics.iou import IOU
+
+
+class BoxLoss(nn.Module):
+    def __init__(self, dfl_ch):
+        super().__init__()
+        self.dfl_ch = dfl_ch
+
+    def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
+        loss_iou = self._compute_iou_loss(pred_bboxes, target_bboxes, target_scores, target_scores_sum, fg_mask)
+        loss_dfl = self._compute_dfl_loss(
+            pred_dist, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
+        )
+        return loss_iou, loss_dfl
+
+    def _compute_iou_loss(self, pred_bboxes, target_bboxes, target_scores, target_scores_sum, fg_mask):
+        weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1)
+        iou = IOU.compute_iou(pred_bboxes[fg_mask], target_bboxes[fg_mask])
+        return ((1.0 - iou) * weight).sum() / target_scores_sum
+
+    def _compute_dfl_loss(self, pred_dist, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
+        target = self._prepare_dfl_target(anchor_points, target_bboxes)
+        loss_dfl = self._distribution_focal_loss(pred_dist[fg_mask].view(-1, self.dfl_ch + 1), target[fg_mask])
+        weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1)
+        return (loss_dfl * weight).sum() / target_scores_sum
+
+    def _prepare_dfl_target(self, anchor_points, target_bboxes):
+        a, b = target_bboxes.chunk(2, -1)
+        target = torch.cat((anchor_points - a, b - anchor_points), -1)
+        return target.clamp(0, self.dfl_ch - 0.01)
+
+    @staticmethod
+    def _distribution_focal_loss(pred_dist, target):
+        tl = target.long()
+        tr = tl + 1
+        wl = tr - target
+        wr = 1 - wl
+        left_loss = F.cross_entropy(pred_dist, tl.view(-1), reduction="none").view(tl.shape)
+        right_loss = F.cross_entropy(pred_dist, tr.view(-1), reduction="none").view(tl.shape)
+        return (left_loss * wl + right_loss * wr).mean(-1, keepdim=True)
diff --git a/evo_science/packages/yolo/losses/yolo_loss.py b/evo_science/packages/yolo/losses/yolo_loss.py
diff --git a/evo_science/packages/yolo/yolo.py b/evo_science/packages/yolo/yolo.py