- refactor csp and darknet

maycuatroi · Oct 8, 2024 · edc722d · edc722d
1 parent 690b1aa
commit edc722d
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 26 deletions.
diff --git a/evo_science/packages/yolo/layers/conv.py b/evo_science/packages/yolo/layers/conv.py
@@ -8,11 +8,12 @@ def __init__(
         out_channels: int,
         kernel_size: int = 3,
         stride: int = 1,
-        padding: int = 1,
+        padding: int = None,
         groups: int = 1,
         activation: bool = True,
     ):
         super().__init__()
+        padding = (kernel_size - 1) // 2 if padding is None else padding
         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False, groups=groups)
         self.bn = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0.03)
         self.activation = nn.SiLU(inplace=True) if activation else nn.Identity()

diff --git a/evo_science/packages/yolo/layers/csp.py b/evo_science/packages/yolo/layers/csp.py
@@ -6,17 +6,40 @@
 
 
 class CSP(nn.Module):
+    """
+    CSPNet: A New Backbone that can Enhance Learning Capability of CNN
+
+    The CSP structure is defined as:
+    CSP(x) = Conv(Concat(Conv1(x), Conv2(x), Residual1(Conv2(x)), ..., ResidualN(Conv2(x))))
+
+        Where:
+        - Conv1, Conv2 are convolutional layers that split the input into two branches
+        - Residual1, ..., ResidualN are residual blocks applied to the right branch
+        - Concat concatenates the outputs of all branches
+        - Conv is a final convolutional layer to combine the features
+
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            num_residuals (int, optional): Number of residual blocks. Defaults to 1.
+            use_residual_add (bool, optional): Whether to use residual addition in Residual blocks. Defaults to True.
+
+        Reference: https://arxiv.org/abs/1911.11929
+    """
+
     def __init__(self, in_channels, out_channels, num_residuals=1, use_residual_add=True):
         super().__init__()
-        self.branch1 = Conv(in_channels, out_channels // 2)
-        self.branch2 = nn.Sequential(
-            Conv(in_channels, out_channels // 2),
-            *[Residual(out_channels // 2, use_residual_add) for _ in range(num_residuals)]
-        )
-        self.final_conv = Conv((2 + num_residuals) * out_channels // 2, out_channels)
+        self.conv_left = Conv(in_channels, out_channels // 2)
+        self.conv_right = Conv(in_channels, out_channels // 2)
+        self.conv_bottom = Conv((2 + num_residuals) * out_channels // 2, out_channels)
+        self.residuals = nn.ModuleList(Residual(out_channels // 2, use_residual_add) for _ in range(num_residuals))
 
     def forward(self, x):
-        branch1_output = self.branch1(x)
-        branch2_output = self.branch2(x)
-        combined = torch.cat([branch1_output, branch2_output], dim=1)
-        return self.final_conv(combined)
+        left_branch = self.conv_left(x)
+        right_branch = self.conv_right(x)
+
+        features = [left_branch, right_branch]
+        for residual in self.residuals:
+            features.append(residual(features[-1]))
+
+        return self.conv_bottom(torch.cat(features, dim=1))
diff --git a/evo_science/packages/yolo/layers/darknet.py b/evo_science/packages/yolo/layers/darknet.py
@@ -9,15 +9,11 @@
 class DarkNet(nn.Module):
     def __init__(self, width, depth):
         super().__init__()
-        self.layers = nn.ModuleDict(
-            {
-                "stage1": self._create_stage(width[0], width[1], 3, 2),
-                "stage2": self._create_stage(width[1], width[2], 3, 2, depth[0]),
-                "stage3": self._create_stage(width[2], width[3], 3, 2, depth[1]),
-                "stage4": self._create_stage(width[3], width[4], 3, 2, depth[2]),
-                "stage5": self._create_stage(width[4], width[5], 3, 2, depth[0], use_spp=True),
-            }
-        )
+        self.stage1 = self._create_stage(width[0], width[1], 3, 2)
+        self.stage2 = self._create_stage(width[1], width[2], 3, 2, depth[0])
+        self.stage3 = self._create_stage(width[2], width[3], 3, 2, depth[1])
+        self.stage4 = self._create_stage(width[3], width[4], 3, 2, depth[2])
+        self.stage5 = self._create_stage(width[4], width[5], 3, 2, depth[0], use_spp=True)
 
     def _create_stage(self, in_channels, out_channels, kernel_size, stride, num_csp_blocks=0, use_spp=False):
         layers = [Conv(in_channels, out_channels, kernel_size, stride)]
@@ -28,9 +24,11 @@ def _create_stage(self, in_channels, out_channels, kernel_size, stride, num_csp_
         return nn.Sequential(*layers)
 
     def forward(self, x):
-        outputs = []
-        for i, (stage_id, layer) in enumerate(self.layers.items()):
-            x = layer(x)
-            if i >= 2:
-                outputs.append(x)
-        return outputs
+        # Forward through each stage and track outputs from stage 3, 4, and 5
+        x = self.stage1(x)
+        x = self.stage2(x)
+        p3 = self.stage3(x)
+        p4 = self.stage4(p3)
+        p5 = self.stage5(p4)
+
+        return p3, p4, p5