@@ -10,8 +10,11 @@ import (
10
10
"github.com/NexusGPU/tensor-fusion/internal/constants"
11
11
"github.com/NexusGPU/tensor-fusion/internal/gpuallocator"
12
12
"github.com/NexusGPU/tensor-fusion/internal/utils"
13
+ appsv1 "k8s.io/api/apps/v1"
13
14
corev1 "k8s.io/api/core/v1"
15
+ "k8s.io/apimachinery/pkg/api/errors"
14
16
"k8s.io/apimachinery/pkg/api/resource"
17
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15
18
"sigs.k8s.io/controller-runtime/pkg/client"
16
19
)
17
20
@@ -47,8 +50,11 @@ func ParseTensorFusionInfo(
47
50
info .EnabledReplicas = & val32
48
51
}
49
52
50
- // Generate the workload name: if the Pod has no controller, use the Pod's name; otherwise, use the root controller's name.
51
- if controllerRef , err := utils .FindRootControllerRef (ctx , k8sClient , pod ); err == nil {
53
+ // Generate the workload name:
54
+ // If the Pod has no controller, use the Pod's name;
55
+ // if it is controlled by a Deployment, return the Deployment's name;
56
+ // otherwise, return the name of the first-level controller.
57
+ if controllerRef , err := getPodControllerRef (ctx , k8sClient , pod ); err == nil {
52
58
if controllerRef != nil {
53
59
info .WorkloadName = controllerRef .Name
54
60
} else {
@@ -254,3 +260,34 @@ func handleDedicatedGPU(pod *corev1.Pod, workloadProfile *tfv1.WorkloadProfile)
254
260
workloadProfile .Spec .Resources .Limits .Vram = resource .Vram
255
261
return nil
256
262
}
263
+
264
+ func getPodControllerRef (ctx context.Context , c client.Client , pod * corev1.Pod ) (* metav1.OwnerReference , error ) {
265
+ podControllerRef := metav1 .GetControllerOf (pod )
266
+ if podControllerRef == nil {
267
+ return nil , nil
268
+ }
269
+
270
+ switch podControllerRef .Kind {
271
+ case "ReplicaSet" :
272
+ {
273
+ // Special handling for Deployment resources
274
+ rs := & appsv1.ReplicaSet {}
275
+ if err := c .Get (ctx , client.ObjectKey {
276
+ Namespace : pod .Namespace ,
277
+ Name : podControllerRef .Name ,
278
+ }, rs ); err != nil {
279
+ if errors .IsNotFound (err ) {
280
+ return podControllerRef , nil
281
+ }
282
+ return nil , fmt .Errorf ("failed to get ReplicaSet: %w" , err )
283
+ }
284
+ rsContollerRef := metav1 .GetControllerOf (rs )
285
+ if rsContollerRef != nil && rsContollerRef .Kind == "Deployment" {
286
+ // If controlled by a Deployment, return the controllerRef of rs
287
+ return rsContollerRef , nil
288
+ }
289
+ }
290
+ }
291
+
292
+ return podControllerRef , nil
293
+ }
0 commit comments