Skip to content

Commit

Permalink
Have to give up the model I had been worked on for over a month and b…
Browse files Browse the repository at this point in the history
…roke my driver, and sadly realize how it can be easily removed and doesn't even seem to be a lot of work for it was well tuned and packed beautifully. :(
  • Loading branch information
Benteng Ma committed Jul 12, 2024
1 parent 471ee60 commit 6dd9096
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 374 deletions.
6 changes: 2 additions & 4 deletions common/vision/lasr_vision_feature_extraction/nodes/service
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from lasr_vision_msgs.srv import TorchFaceFeatureDetectionDescription, TorchFaceFeatureDetectionDescriptionRequest, TorchFaceFeatureDetectionDescriptionResponse
from lasr_vision_feature_extraction.categories_and_attributes import CategoriesAndAttributes, CelebAMaskHQCategoriesAndAttributes, DeepFashion2GeneralizedCategoriesAndAttributes
from lasr_vision_feature_extraction.categories_and_attributes import DeepFashion2GeneralizedCategoriesAndAttributes

from cv2_img import msg_to_cv2_img
from numpy2message import message2numpy
Expand All @@ -23,7 +23,7 @@ def detect(request: TorchFaceFeatureDetectionDescriptionRequest) -> TorchFaceFea
head_frame = lasr_vision_feature_extraction.extract_mask_region(full_frame, head_mask.astype(np.uint8), expand_x=0.4, expand_y=0.5)
torso_frame = lasr_vision_feature_extraction.extract_mask_region(full_frame, torso_mask.astype(np.uint8), expand_x=0.2, expand_y=0.0)
rst_str = lasr_vision_feature_extraction.predict_frame(
head_frame, torso_frame, full_frame, head_mask, torso_mask, head_predictor=head_predictor, cloth_predictor=cloth_predictor,
head_frame, torso_frame, full_frame, head_mask, torso_mask, head_predictor=None, cloth_predictor=cloth_predictor,
)
response = TorchFaceFeatureDetectionDescriptionResponse()
response.description = rst_str
Expand All @@ -32,8 +32,6 @@ def detect(request: TorchFaceFeatureDetectionDescriptionRequest) -> TorchFaceFea

if __name__ == '__main__':
# predictor will be global when inited, thus will be used within the function above.
head_model = lasr_vision_feature_extraction.load_face_classifier_model()
head_predictor = lasr_vision_feature_extraction.Predictor(head_model, torch.device('cpu'), CelebAMaskHQCategoriesAndAttributes)
cloth_model = lasr_vision_feature_extraction.load_cloth_classifier_model()
cloth_predictor = lasr_vision_feature_extraction.ClothPredictor(cloth_model, torch.device('cpu'), DeepFashion2GeneralizedCategoriesAndAttributes)
rospy.init_node('torch_service')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@
import torchvision.models as models
from lasr_vision_feature_extraction.categories_and_attributes import (
CategoriesAndAttributes,
CelebAMaskHQCategoriesAndAttributes,
DeepFashion2GeneralizedCategoriesAndAttributes,
)
from lasr_vision_feature_extraction.image_with_masks_and_attributes import (
ImageWithMasksAndAttributes,
ImageOfPerson,
ImageOfCloth,
)

Expand Down Expand Up @@ -71,129 +69,6 @@ def forward(self, x_copy, x):
return x


class UNetWithResnetEncoder(nn.Module):
def __init__(self, num_classes, in_channels=3, freeze_bn=False, sigmoid=True):
super(UNetWithResnetEncoder, self).__init__()
self.sigmoid = sigmoid
self.resnet = models.resnet34(
pretrained=False
) # Initialize with a ResNet model
if in_channels != 3:
self.resnet.conv1 = nn.Conv2d(
in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False
)

self.encoder1 = nn.Sequential(
self.resnet.conv1, self.resnet.bn1, self.resnet.relu
)
self.encoder2 = self.resnet.layer1
self.encoder3 = self.resnet.layer2
self.encoder4 = self.resnet.layer3
self.encoder5 = self.resnet.layer4

self.up1 = Decoder(512, 256, 256)
self.up2 = Decoder(256, 128, 128)
self.up3 = Decoder(128, 64, 64)
self.up4 = Decoder(64, 64, 64)

self.final_conv = nn.Conv2d(64, num_classes, kernel_size=1)
self._initialize_weights()

if freeze_bn:
self.freeze_bn()

def _initialize_weights(self):
for module in self.modules():
if isinstance(module, nn.Conv2d) or isinstance(module, nn.ConvTranspose2d):
nn.init.kaiming_normal_(module.weight)
if module.bias is not None:
module.bias.data.zero_()
elif isinstance(module, nn.BatchNorm2d):
module.weight.data.fill_(1)
module.bias.data.zero_()

def forward(self, x):
x1 = self.encoder1(x)
x2 = self.encoder2(x1)
x3 = self.encoder3(x2)
x4 = self.encoder4(x3)
x5 = self.encoder5(x4)

x = self.up1(x4, x5)
x = self.up2(x3, x)
x = self.up3(x2, x)
x = self.up4(x1, x)
x = F.interpolate(
x, size=(x.size(2) * 2, x.size(3) * 2), mode="bilinear", align_corners=True
)

x = self.final_conv(x)

if self.sigmoid:
x = torch.sigmoid(x)
return x

def freeze_bn(self):
for module in self.modules():
if isinstance(module, nn.BatchNorm2d):
module.eval()

def unfreeze_bn(self):
for module in self.modules():
if isinstance(module, nn.BatchNorm2d):
module.train()


class MultiLabelResNet(nn.Module):
def __init__(self, num_labels, input_channels=3, sigmoid=True):
super(MultiLabelResNet, self).__init__()
self.model = models.resnet34(pretrained=False)
self.sigmoid = sigmoid

if input_channels != 3:
self.model.conv1 = nn.Conv2d(
input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False
)

num_ftrs = self.model.fc.in_features

self.model.fc = nn.Linear(num_ftrs, num_labels)

def forward(self, x):
x = self.model(x)
if self.sigmoid:
x = torch.sigmoid(x)
return x


class CombinedModel(nn.Module):
def __init__(
self, segment_model: nn.Module, predict_model: nn.Module, cat_layers: int = None
):
super(CombinedModel, self).__init__()
self.segment_model = segment_model
self.predict_model = predict_model
self.cat_layers = cat_layers
self.freeze_seg = False

def forward(self, x: torch.Tensor):
seg_masks = self.segment_model(x)
seg_masks_ = seg_masks.detach()
if self.cat_layers:
seg_masks_ = seg_masks_[:, 0 : self.cat_layers]
x = torch.cat((x, seg_masks_), dim=1)
else:
x = torch.cat((x, seg_masks_), dim=1)
logic_outputs = self.predict_model(x)
return seg_masks, logic_outputs

def freeze_segment_model(self):
self.segment_model.eval()

def unfreeze_segment_model(self):
self.segment_model.train()


class SegmentPredictor(nn.Module):
def __init__(self, num_masks, num_labels, in_channels=3, sigmoid=True):
super(SegmentPredictor, self).__init__()
Expand Down Expand Up @@ -461,32 +336,6 @@ def predict(self, rgb_image: np.ndarray) -> ImageWithMasksAndAttributes:
return image_obj


def load_face_classifier_model():
cat_layers = CelebAMaskHQCategoriesAndAttributes.merged_categories.keys().__len__()
segment_model = UNetWithResnetEncoder(num_classes=cat_layers)
predictions = (
len(CelebAMaskHQCategoriesAndAttributes.attributes)
- len(CelebAMaskHQCategoriesAndAttributes.avoided_attributes)
+ len(CelebAMaskHQCategoriesAndAttributes.mask_labels)
)
predict_model = MultiLabelResNet(
num_labels=predictions, input_channels=cat_layers + 3
)
model = CombinedModel(segment_model, predict_model, cat_layers=cat_layers)
model.eval()

r = rospkg.RosPack()
model, _, _, _ = load_torch_model(
model,
None,
path=path.join(
r.get_path("lasr_vision_feature_extraction"), "models", "face_model.pth"
),
cpu_only=True,
)
return model


def load_cloth_classifier_model():
num_classes = len(DeepFashion2GeneralizedCategoriesAndAttributes.attributes)
model = SegmentPredictorBbox(
Expand Down Expand Up @@ -573,15 +422,11 @@ def predict_frame(
head_frame = pad_image_to_even_dims(head_frame)
torso_frame = pad_image_to_even_dims(torso_frame)

rst_person = ImageOfPerson.from_parent_instance(
head_predictor.predict(head_frame)
).describe()
rst_cloth = ImageOfCloth.from_parent_instance(
cloth_predictor.predict(torso_frame)
).describe()

result = {
**rst_person,
**rst_cloth,
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,166 +10,6 @@ class CategoriesAndAttributes:
thresholds_pred: dict[str, float] = {}


class CelebAMaskHQCategoriesAndAttributes(CategoriesAndAttributes):
mask_categories = [
"cloth",
"r_ear",
"hair",
"l_brow",
"l_eye",
"l_lip",
"mouth",
"neck",
"nose",
"r_brow",
"r_ear",
"r_eye",
"skin",
"u_lip",
"hat",
"l_ear",
"neck_l",
"eye_g",
]
merged_categories = {
"ear": [
"l_ear",
"r_ear",
],
"brow": [
"l_brow",
"r_brow",
],
"eye": [
"l_eye",
"r_eye",
],
"mouth": [
"l_lip",
"u_lip",
"mouth",
],
}
_categories_to_merge = []
for key in sorted(list(merged_categories.keys())):
for cat in merged_categories[key]:
_categories_to_merge.append(cat)
for key in mask_categories:
if key not in _categories_to_merge:
merged_categories[key] = [key]
mask_labels = ["hair"]
selective_attributes = {
"facial_hair": [
"5_o_Clock_Shadow",
"Goatee",
"Mustache",
"No_Beard",
"Sideburns",
],
"hair_colour": [
"Black_Hair",
"Blond_Hair",
"Brown_Hair",
"Gray_Hair",
],
"hair_shape": [
"Straight_Hair",
"Wavy_Hair",
],
}
plane_attributes = [
"Bangs",
"Eyeglasses",
"Wearing_Earrings",
"Wearing_Hat",
"Wearing_Necklace",
"Wearing_Necktie",
"Male",
]
avoided_attributes = [
"Arched_Eyebrows",
"Bags_Under_Eyes",
"Big_Lips",
"Big_Nose",
"Bushy_Eyebrows",
"Chubby",
"Double_Chin",
"High_Cheekbones",
"Narrow_Eyes",
"Oval_Face",
"Pointy_Nose",
"Receding_Hairline",
"Rosy_Cheeks",
"Heavy_Makeup",
"Wearing_Lipstick",
"Attractive",
"Blurry",
"Mouth_Slightly_Open",
"Pale_Skin",
"Smiling",
"Young",
]
attributes = [
"5_o_Clock_Shadow",
"Arched_Eyebrows",
"Attractive",
"Bags_Under_Eyes",
"Bald",
"Bangs",
"Big_Lips",
"Big_Nose",
"Black_Hair",
"Blond_Hair",
"Blurry",
"Brown_Hair",
"Bushy_Eyebrows",
"Chubby",
"Double_Chin",
"Eyeglasses",
"Goatee",
"Gray_Hair",
"Heavy_Makeup",
"High_Cheekbones",
"Male",
"Mouth_Slightly_Open",
"Mustache",
"Narrow_Eyes",
"No_Beard",
"Oval_Face",
"Pale_Skin",
"Pointy_Nose",
"Receding_Hairline",
"Rosy_Cheeks",
"Sideburns",
"Smiling",
"Straight_Hair",
"Wavy_Hair",
"Wearing_Earrings",
"Wearing_Hat",
"Wearing_Lipstick",
"Wearing_Necklace",
"Wearing_Necktie",
"Young",
]

thresholds_mask: dict[str, float] = {}
thresholds_pred: dict[str, float] = {}

# set default thresholds:
for key in sorted(merged_categories.keys()):
thresholds_mask[key] = 0.5
for key in attributes + mask_labels:
if key not in avoided_attributes:
thresholds_pred[key] = 0.5

# set specific thresholds:
thresholds_mask["eye_g"] = 0.5
thresholds_pred["Eyeglasses"] = 0.5
thresholds_pred["Wearing_Earrings"] = 0.5
thresholds_pred["Wearing_Necklace"] = 0.5
thresholds_pred["Wearing_Necktie"] = 0.5


class DeepFashion2GeneralizedCategoriesAndAttributes(CategoriesAndAttributes):
mask_categories = [
"short sleeve top",
Expand Down
Loading

0 comments on commit 6dd9096

Please sign in to comment.