Have to give up the model I had been worked on for over a month and b…

…roke my driver, and sadly realize how it can be easily removed and doesn't even seem to be a lot of work for it was well tuned and packed beautifully. :(
LASR-at-Home · Jul 12, 2024 · 6dd9096 · 6dd9096
1 parent 471ee60
commit 6dd9096
Show file tree

Hide file tree

Showing 4 changed files with 2 additions and 374 deletions.
diff --git a/common/vision/lasr_vision_feature_extraction/nodes/service b/common/vision/lasr_vision_feature_extraction/nodes/service
@@ -1,5 +1,5 @@
 from lasr_vision_msgs.srv import TorchFaceFeatureDetectionDescription, TorchFaceFeatureDetectionDescriptionRequest, TorchFaceFeatureDetectionDescriptionResponse
-from lasr_vision_feature_extraction.categories_and_attributes import CategoriesAndAttributes, CelebAMaskHQCategoriesAndAttributes, DeepFashion2GeneralizedCategoriesAndAttributes
+from lasr_vision_feature_extraction.categories_and_attributes import DeepFashion2GeneralizedCategoriesAndAttributes
 
 from cv2_img import msg_to_cv2_img
 from numpy2message import message2numpy
@@ -23,7 +23,7 @@ def detect(request: TorchFaceFeatureDetectionDescriptionRequest) -> TorchFaceFea
     head_frame = lasr_vision_feature_extraction.extract_mask_region(full_frame, head_mask.astype(np.uint8), expand_x=0.4, expand_y=0.5)
     torso_frame = lasr_vision_feature_extraction.extract_mask_region(full_frame, torso_mask.astype(np.uint8), expand_x=0.2, expand_y=0.0)
     rst_str = lasr_vision_feature_extraction.predict_frame(
-        head_frame, torso_frame, full_frame, head_mask, torso_mask, head_predictor=head_predictor, cloth_predictor=cloth_predictor,
+        head_frame, torso_frame, full_frame, head_mask, torso_mask, head_predictor=None, cloth_predictor=cloth_predictor,
     )
     response = TorchFaceFeatureDetectionDescriptionResponse()
     response.description = rst_str
@@ -32,8 +32,6 @@ def detect(request: TorchFaceFeatureDetectionDescriptionRequest) -> TorchFaceFea
 
 if __name__ == '__main__':
     # predictor will be global when inited, thus will be used within the function above.
-    head_model = lasr_vision_feature_extraction.load_face_classifier_model()
-    head_predictor = lasr_vision_feature_extraction.Predictor(head_model, torch.device('cpu'), CelebAMaskHQCategoriesAndAttributes)
     cloth_model = lasr_vision_feature_extraction.load_cloth_classifier_model()
     cloth_predictor = lasr_vision_feature_extraction.ClothPredictor(cloth_model, torch.device('cpu'), DeepFashion2GeneralizedCategoriesAndAttributes)
     rospy.init_node('torch_service')

diff --git a/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/__init__.py b/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/__init__.py
@@ -9,12 +9,10 @@
 import torchvision.models as models
 from lasr_vision_feature_extraction.categories_and_attributes import (
     CategoriesAndAttributes,
-    CelebAMaskHQCategoriesAndAttributes,
     DeepFashion2GeneralizedCategoriesAndAttributes,
 )
 from lasr_vision_feature_extraction.image_with_masks_and_attributes import (
     ImageWithMasksAndAttributes,
-    ImageOfPerson,
     ImageOfCloth,
 )
 
@@ -71,129 +69,6 @@ def forward(self, x_copy, x):
         return x
 
 
-class UNetWithResnetEncoder(nn.Module):
-    def __init__(self, num_classes, in_channels=3, freeze_bn=False, sigmoid=True):
-        super(UNetWithResnetEncoder, self).__init__()
-        self.sigmoid = sigmoid
-        self.resnet = models.resnet34(
-            pretrained=False
-        )  # Initialize with a ResNet model
-        if in_channels != 3:
-            self.resnet.conv1 = nn.Conv2d(
-                in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False
-            )
-
-        self.encoder1 = nn.Sequential(
-            self.resnet.conv1, self.resnet.bn1, self.resnet.relu
-        )
-        self.encoder2 = self.resnet.layer1
-        self.encoder3 = self.resnet.layer2
-        self.encoder4 = self.resnet.layer3
-        self.encoder5 = self.resnet.layer4
-
-        self.up1 = Decoder(512, 256, 256)
-        self.up2 = Decoder(256, 128, 128)
-        self.up3 = Decoder(128, 64, 64)
-        self.up4 = Decoder(64, 64, 64)
-
-        self.final_conv = nn.Conv2d(64, num_classes, kernel_size=1)
-        self._initialize_weights()
-
-        if freeze_bn:
-            self.freeze_bn()
-
-    def _initialize_weights(self):
-        for module in self.modules():
-            if isinstance(module, nn.Conv2d) or isinstance(module, nn.ConvTranspose2d):
-                nn.init.kaiming_normal_(module.weight)
-                if module.bias is not None:
-                    module.bias.data.zero_()
-                elif isinstance(module, nn.BatchNorm2d):
-                    module.weight.data.fill_(1)
-                    module.bias.data.zero_()
-
-    def forward(self, x):
-        x1 = self.encoder1(x)
-        x2 = self.encoder2(x1)
-        x3 = self.encoder3(x2)
-        x4 = self.encoder4(x3)
-        x5 = self.encoder5(x4)
-
-        x = self.up1(x4, x5)
-        x = self.up2(x3, x)
-        x = self.up3(x2, x)
-        x = self.up4(x1, x)
-        x = F.interpolate(
-            x, size=(x.size(2) * 2, x.size(3) * 2), mode="bilinear", align_corners=True
-        )
-
-        x = self.final_conv(x)
-
-        if self.sigmoid:
-            x = torch.sigmoid(x)
-        return x
-
-    def freeze_bn(self):
-        for module in self.modules():
-            if isinstance(module, nn.BatchNorm2d):
-                module.eval()
-
-    def unfreeze_bn(self):
-        for module in self.modules():
-            if isinstance(module, nn.BatchNorm2d):
-                module.train()
-
-
-class MultiLabelResNet(nn.Module):
-    def __init__(self, num_labels, input_channels=3, sigmoid=True):
-        super(MultiLabelResNet, self).__init__()
-        self.model = models.resnet34(pretrained=False)
-        self.sigmoid = sigmoid
-
-        if input_channels != 3:
-            self.model.conv1 = nn.Conv2d(
-                input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False
-            )
-
-        num_ftrs = self.model.fc.in_features
-
-        self.model.fc = nn.Linear(num_ftrs, num_labels)
-
-    def forward(self, x):
-        x = self.model(x)
-        if self.sigmoid:
-            x = torch.sigmoid(x)
-        return x
-
-
-class CombinedModel(nn.Module):
-    def __init__(
-        self, segment_model: nn.Module, predict_model: nn.Module, cat_layers: int = None
-    ):
-        super(CombinedModel, self).__init__()
-        self.segment_model = segment_model
-        self.predict_model = predict_model
-        self.cat_layers = cat_layers
-        self.freeze_seg = False
-
-    def forward(self, x: torch.Tensor):
-        seg_masks = self.segment_model(x)
-        seg_masks_ = seg_masks.detach()
-        if self.cat_layers:
-            seg_masks_ = seg_masks_[:, 0 : self.cat_layers]
-            x = torch.cat((x, seg_masks_), dim=1)
-        else:
-            x = torch.cat((x, seg_masks_), dim=1)
-        logic_outputs = self.predict_model(x)
-        return seg_masks, logic_outputs
-
-    def freeze_segment_model(self):
-        self.segment_model.eval()
-
-    def unfreeze_segment_model(self):
-        self.segment_model.train()
-
-
 class SegmentPredictor(nn.Module):
     def __init__(self, num_masks, num_labels, in_channels=3, sigmoid=True):
         super(SegmentPredictor, self).__init__()
@@ -461,32 +336,6 @@ def predict(self, rgb_image: np.ndarray) -> ImageWithMasksAndAttributes:
         return image_obj
 
 
-def load_face_classifier_model():
-    cat_layers = CelebAMaskHQCategoriesAndAttributes.merged_categories.keys().__len__()
-    segment_model = UNetWithResnetEncoder(num_classes=cat_layers)
-    predictions = (
-        len(CelebAMaskHQCategoriesAndAttributes.attributes)
-        - len(CelebAMaskHQCategoriesAndAttributes.avoided_attributes)
-        + len(CelebAMaskHQCategoriesAndAttributes.mask_labels)
-    )
-    predict_model = MultiLabelResNet(
-        num_labels=predictions, input_channels=cat_layers + 3
-    )
-    model = CombinedModel(segment_model, predict_model, cat_layers=cat_layers)
-    model.eval()
-
-    r = rospkg.RosPack()
-    model, _, _, _ = load_torch_model(
-        model,
-        None,
-        path=path.join(
-            r.get_path("lasr_vision_feature_extraction"), "models", "face_model.pth"
-        ),
-        cpu_only=True,
-    )
-    return model
-
-
 def load_cloth_classifier_model():
     num_classes = len(DeepFashion2GeneralizedCategoriesAndAttributes.attributes)
     model = SegmentPredictorBbox(
@@ -573,15 +422,11 @@ def predict_frame(
     head_frame = pad_image_to_even_dims(head_frame)
     torso_frame = pad_image_to_even_dims(torso_frame)
 
-    rst_person = ImageOfPerson.from_parent_instance(
-        head_predictor.predict(head_frame)
-    ).describe()
     rst_cloth = ImageOfCloth.from_parent_instance(
         cloth_predictor.predict(torso_frame)
     ).describe()
 
     result = {
-        **rst_person,
         **rst_cloth,
     }
 

diff --git a/...vision_feature_extraction/src/lasr_vision_feature_extraction/categories_and_attributes.py b/...vision_feature_extraction/src/lasr_vision_feature_extraction/categories_and_attributes.py
@@ -10,166 +10,6 @@ class CategoriesAndAttributes:
     thresholds_pred: dict[str, float] = {}
 
 
-class CelebAMaskHQCategoriesAndAttributes(CategoriesAndAttributes):
-    mask_categories = [
-        "cloth",
-        "r_ear",
-        "hair",
-        "l_brow",
-        "l_eye",
-        "l_lip",
-        "mouth",
-        "neck",
-        "nose",
-        "r_brow",
-        "r_ear",
-        "r_eye",
-        "skin",
-        "u_lip",
-        "hat",
-        "l_ear",
-        "neck_l",
-        "eye_g",
-    ]
-    merged_categories = {
-        "ear": [
-            "l_ear",
-            "r_ear",
-        ],
-        "brow": [
-            "l_brow",
-            "r_brow",
-        ],
-        "eye": [
-            "l_eye",
-            "r_eye",
-        ],
-        "mouth": [
-            "l_lip",
-            "u_lip",
-            "mouth",
-        ],
-    }
-    _categories_to_merge = []
-    for key in sorted(list(merged_categories.keys())):
-        for cat in merged_categories[key]:
-            _categories_to_merge.append(cat)
-    for key in mask_categories:
-        if key not in _categories_to_merge:
-            merged_categories[key] = [key]
-    mask_labels = ["hair"]
-    selective_attributes = {
-        "facial_hair": [
-            "5_o_Clock_Shadow",
-            "Goatee",
-            "Mustache",
-            "No_Beard",
-            "Sideburns",
-        ],
-        "hair_colour": [
-            "Black_Hair",
-            "Blond_Hair",
-            "Brown_Hair",
-            "Gray_Hair",
-        ],
-        "hair_shape": [
-            "Straight_Hair",
-            "Wavy_Hair",
-        ],
-    }
-    plane_attributes = [
-        "Bangs",
-        "Eyeglasses",
-        "Wearing_Earrings",
-        "Wearing_Hat",
-        "Wearing_Necklace",
-        "Wearing_Necktie",
-        "Male",
-    ]
-    avoided_attributes = [
-        "Arched_Eyebrows",
-        "Bags_Under_Eyes",
-        "Big_Lips",
-        "Big_Nose",
-        "Bushy_Eyebrows",
-        "Chubby",
-        "Double_Chin",
-        "High_Cheekbones",
-        "Narrow_Eyes",
-        "Oval_Face",
-        "Pointy_Nose",
-        "Receding_Hairline",
-        "Rosy_Cheeks",
-        "Heavy_Makeup",
-        "Wearing_Lipstick",
-        "Attractive",
-        "Blurry",
-        "Mouth_Slightly_Open",
-        "Pale_Skin",
-        "Smiling",
-        "Young",
-    ]
-    attributes = [
-        "5_o_Clock_Shadow",
-        "Arched_Eyebrows",
-        "Attractive",
-        "Bags_Under_Eyes",
-        "Bald",
-        "Bangs",
-        "Big_Lips",
-        "Big_Nose",
-        "Black_Hair",
-        "Blond_Hair",
-        "Blurry",
-        "Brown_Hair",
-        "Bushy_Eyebrows",
-        "Chubby",
-        "Double_Chin",
-        "Eyeglasses",
-        "Goatee",
-        "Gray_Hair",
-        "Heavy_Makeup",
-        "High_Cheekbones",
-        "Male",
-        "Mouth_Slightly_Open",
-        "Mustache",
-        "Narrow_Eyes",
-        "No_Beard",
-        "Oval_Face",
-        "Pale_Skin",
-        "Pointy_Nose",
-        "Receding_Hairline",
-        "Rosy_Cheeks",
-        "Sideburns",
-        "Smiling",
-        "Straight_Hair",
-        "Wavy_Hair",
-        "Wearing_Earrings",
-        "Wearing_Hat",
-        "Wearing_Lipstick",
-        "Wearing_Necklace",
-        "Wearing_Necktie",
-        "Young",
-    ]
-
-    thresholds_mask: dict[str, float] = {}
-    thresholds_pred: dict[str, float] = {}
-
-    # set default thresholds:
-    for key in sorted(merged_categories.keys()):
-        thresholds_mask[key] = 0.5
-    for key in attributes + mask_labels:
-        if key not in avoided_attributes:
-            thresholds_pred[key] = 0.5
-
-    # set specific thresholds:
-    thresholds_mask["eye_g"] = 0.5
-    thresholds_pred["Eyeglasses"] = 0.5
-    thresholds_pred["Wearing_Earrings"] = 0.5
-    thresholds_pred["Wearing_Necklace"] = 0.5
-    thresholds_pred["Wearing_Necktie"] = 0.5
-
-
 class DeepFashion2GeneralizedCategoriesAndAttributes(CategoriesAndAttributes):
     mask_categories = [
         "short sleeve top",