diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go index 5b33e2990..1ab01e67d 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go @@ -337,7 +337,10 @@ func (p *CPUPressureLoadEviction) collectMetrics(_ context.Context) { for containerName, containerEntry := range entry { if containerEntry == nil || containerEntry.IsPool { continue - } else if containerEntry.OwnerPool == state.EmptyOwnerPoolName || p.skipPools.Has(p.configTranslator.Translate(containerEntry.OwnerPool)) { + } else if containerEntry.OwnerPool == state.EmptyOwnerPoolName || + p.skipPools.Has(p.configTranslator.Translate(containerEntry.OwnerPool)) || + // skip pod with system pool + state.IsSystemPool(containerEntry.OwnerPool) { general.Infof("skip collecting metric for pod: %s, container: %s with owner pool name: %s", podUID, containerName, containerEntry.OwnerPool) continue @@ -422,7 +425,7 @@ func (p *CPUPressureLoadEviction) checkSharedPressureByPoolSize(pod2Pool PodPool // accumulateSharedPoolsLimit calculates the cpu core limit used by shared core pool, // and it equals: machine-core - cores-for-dedicated-pods - reserved-cores-reclaim-pods - reserved-cores-system-pods. func (p *CPUPressureLoadEviction) accumulateSharedPoolsLimit() int { - availableCPUSet := p.state.GetMachineState().GetFilteredAvailableCPUSet(p.systemReservedCPUs, nil, state.CheckNUMABinding) + availableCPUSet := p.state.GetMachineState().GetFilteredAvailableCPUSet(p.systemReservedCPUs, nil, state.CheckSharedOrDedicatedNUMABinding) coreNumReservedForReclaim := p.dynamicConf.GetDynamicConfiguration().MinReclaimedResourceForAllocate[v1.ResourceCPU] if coreNumReservedForReclaim.Value() > int64(p.metaServer.NumCPUs) { @@ -431,7 +434,7 @@ func (p *CPUPressureLoadEviction) accumulateSharedPoolsLimit() int { reservedForReclaim := machine.GetCoreNumReservedForReclaim(int(coreNumReservedForReclaim.Value()), p.metaServer.NumNUMANodes) reservedForReclaimInSharedNuma := 0 - sharedCoresNUMAs := p.state.GetMachineState().GetFilteredNUMASet(state.CheckNUMABinding) + sharedCoresNUMAs := p.state.GetMachineState().GetFilteredNUMASet(state.CheckSharedOrDedicatedNUMABinding) for _, numaID := range sharedCoresNUMAs.ToSliceInt() { reservedForReclaimInSharedNuma += reservedForReclaim[numaID] } diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go index 5a7203e6a..cd810ee66 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go @@ -212,6 +212,7 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration consts.PodAnnotationQoSLevelSharedCores: policyImplement.sharedCoresAllocationHandler, consts.PodAnnotationQoSLevelDedicatedCores: policyImplement.dedicatedCoresAllocationHandler, consts.PodAnnotationQoSLevelReclaimedCores: policyImplement.reclaimedCoresAllocationHandler, + consts.PodAnnotationQoSLevelSystemCores: policyImplement.systemCoresAllocationHandler, } // register hint providers for pods with different QoS level @@ -219,6 +220,7 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration consts.PodAnnotationQoSLevelSharedCores: policyImplement.sharedCoresHintHandler, consts.PodAnnotationQoSLevelDedicatedCores: policyImplement.dedicatedCoresHintHandler, consts.PodAnnotationQoSLevelReclaimedCores: policyImplement.reclaimedCoresHintHandler, + consts.PodAnnotationQoSLevelSystemCores: policyImplement.systemCoresHintHandler, } if err := policyImplement.cleanPools(); err != nil { @@ -426,7 +428,7 @@ func (p *DynamicPolicy) GetResourcesAllocation(_ context.Context, // pooledCPUs is the total available cpu cores minus those that are reserved pooledCPUs := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, func(ai *state.AllocationInfo) bool { - return state.CheckDedicated(ai) || state.CheckNUMABinding(ai) + return state.CheckDedicated(ai) || state.CheckSharedNUMABinding(ai) }, state.CheckDedicatedNUMABinding) pooledCPUsTopologyAwareAssignments, err := machine.GetNumaAwareAssignments(p.machineInfo.CPUTopology, pooledCPUs) @@ -1067,7 +1069,7 @@ func (p *DynamicPolicy) initReclaimPool() error { machineState := p.state.GetMachineState() availableCPUs := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, func(ai *state.AllocationInfo) bool { - return state.CheckDedicated(ai) || state.CheckNUMABinding(ai) + return state.CheckDedicated(ai) || state.CheckSharedNUMABinding(ai) }, state.CheckDedicatedNUMABinding).Difference(noneResidentCPUs) @@ -1182,7 +1184,7 @@ func (p *DynamicPolicy) checkNormalShareCoresCpuResource(req *pluginapi.Resource machineState := p.state.GetMachineState() pooledCPUs := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, - state.CheckDedicated, state.CheckNUMABinding) + state.CheckDedicated, state.CheckSharedOrDedicatedNUMABinding) general.Infof("[checkNormalShareCoresCpuResource] node cpu allocated: %d, allocatable: %d", shareCoresAllocatedInt, pooledCPUs.Size()) if shareCoresAllocatedInt > pooledCPUs.Size() { diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go index a27acee3c..328065bce 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go @@ -590,6 +590,19 @@ func (p *DynamicPolicy) applyBlocks(blockCPUSet advisorapi.BlockCPUSet, resp *ad allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName) general.Errorf(errMsg) return fmt.Errorf(errMsg) + case consts.PodAnnotationQoSLevelSystemCores: + poolCPUSet, topologyAwareAssignments, err := p.getSystemPoolCPUSetAndNumaAwareAssignments(newEntries, allocationInfo) + if err != nil { + return fmt.Errorf("pod: %s/%s, container: %s is system_cores, "+ + "getSystemPoolCPUSetAndNumaAwareAssignments failed with error: %v", + allocationInfo.PodNamespace, allocationInfo.PodName, + allocationInfo.ContainerName, err) + } + + newEntries[podUID][containerName].AllocationResult = poolCPUSet + newEntries[podUID][containerName].OriginalAllocationResult = poolCPUSet.Clone() + newEntries[podUID][containerName].TopologyAwareAssignments = topologyAwareAssignments + newEntries[podUID][containerName].OriginalTopologyAwareAssignments = machine.DeepcopyCPUAssignment(topologyAwareAssignments) case consts.PodAnnotationQoSLevelSharedCores, consts.PodAnnotationQoSLevelReclaimedCores: ownerPoolName := allocationInfo.GetOwnerPoolName() if calculationInfo, ok := resp.GetCalculationInfo(podUID, containerName); ok { diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go index de516f317..fc9a968e6 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go @@ -21,6 +21,7 @@ import ( "fmt" "math" "sort" + "strings" "time" v1 "k8s.io/api/core/v1" @@ -69,7 +70,7 @@ func (p *DynamicPolicy) sharedCoresWithoutNUMABindingAllocationHandler(_ context machineState := p.state.GetMachineState() pooledCPUs := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, - state.CheckDedicated, state.CheckNUMABinding) + state.CheckDedicated, state.CheckSharedOrDedicatedNUMABinding) if pooledCPUs.IsEmpty() { general.Errorf("pod: %s/%s, container: %s get empty pooledCPUs", req.PodNamespace, req.PodName, req.ContainerName) @@ -1021,7 +1022,7 @@ func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine // 2. construct entries for all pools if poolsCPUSet[state.PoolNameReclaim].IsEmpty() { - return fmt.Errorf("entry: %s is empty", state.PoolNameShare) + return fmt.Errorf("entry: %s is empty", state.PoolNameReclaim) } for poolName, cset := range poolsCPUSet { @@ -1116,6 +1117,20 @@ func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine newPodEntries[podUID][containerName].TopologyAwareAssignments = machine.DeepcopyCPUAssignment(rampUpCPUsTopologyAwareAssignments) newPodEntries[podUID][containerName].OriginalTopologyAwareAssignments = machine.DeepcopyCPUAssignment(rampUpCPUsTopologyAwareAssignments) + case apiconsts.PodAnnotationQoSLevelSystemCores: + poolCPUSet, topologyAwareAssignments, err := p.getSystemPoolCPUSetAndNumaAwareAssignments(newPodEntries, allocationInfo) + if err != nil { + return fmt.Errorf("pod: %s/%s, container: %s is system_cores, "+ + "getSystemPoolCPUSetAndNumaAwareAssignments failed with error: %v", + allocationInfo.PodNamespace, allocationInfo.PodName, + allocationInfo.ContainerName, err) + } + + newPodEntries[podUID][containerName].AllocationResult = poolCPUSet + newPodEntries[podUID][containerName].OriginalAllocationResult = poolCPUSet.Clone() + newPodEntries[podUID][containerName].TopologyAwareAssignments = topologyAwareAssignments + newPodEntries[podUID][containerName].OriginalTopologyAwareAssignments = machine.DeepcopyCPUAssignment(topologyAwareAssignments) + case apiconsts.PodAnnotationQoSLevelSharedCores, apiconsts.PodAnnotationQoSLevelReclaimedCores: var ownerPoolName string if state.CheckSharedNUMABinding(allocationInfo) { @@ -1123,9 +1138,9 @@ func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine if ownerPoolName == state.EmptyOwnerPoolName { var err error - // why do we itegrate GetOwnerPoolName + GetSpecifiedNUMABindingPoolName into GetPoolName for SharedNUMABinding containers? + // why do we integrate GetOwnerPoolName + GetSpecifiedNUMABindingPoolName into GetPoolName for SharedNUMABinding containers? // it's because we reply on GetSpecifiedPoolName (in GetPoolName) when calling CheckNUMABindingSharedCoresAntiAffinity, - // At that time, NUMA hint for the candicate container isn't confirmed, so we can't implement NUMA hint aware logic in GetSpecifiedPoolName. + // At that time, NUMA hint for the candidate container isn't confirmed, so we can't implement NUMA hint aware logic in GetSpecifiedPoolName. ownerPoolName, err = allocationInfo.GetSpecifiedNUMABindingPoolName() if err != nil { return fmt.Errorf("pod: %s/%s, container: %s is shared_cores with numa_binding, "+ @@ -1744,3 +1759,129 @@ func (p *DynamicPolicy) getReclaimOverlapShareRatio(entries state.PodEntries) (m return reclaimOverlapShareRatio, nil } + +func (p *DynamicPolicy) systemCoresHintHandler(_ context.Context, request *pluginapi.ResourceRequest) (*pluginapi.ResourceHintsResponse, error) { + return util.PackResourceHintsResponse(request, string(v1.ResourceCPU), + map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceCPU): nil, // indicates that there is no numa preference + }) +} + +func (p *DynamicPolicy) systemCoresAllocationHandler(ctx context.Context, req *pluginapi.ResourceRequest) (*pluginapi.ResourceAllocationResponse, error) { + if req.ContainerType == pluginapi.ContainerType_SIDECAR { + return p.allocationSidecarHandler(ctx, req, apiconsts.PodAnnotationQoSLevelSystemCores) + } + + allocationInfo := &state.AllocationInfo{ + PodUid: req.PodUid, + PodNamespace: req.PodNamespace, + PodName: req.PodName, + ContainerName: req.ContainerName, + ContainerType: req.ContainerType.String(), + ContainerIndex: req.ContainerIndex, + OwnerPoolName: state.EmptyOwnerPoolName, + PodRole: req.PodRole, + PodType: req.PodType, + InitTimestamp: time.Now().Format(util.QRMTimeFormat), + Labels: general.DeepCopyMap(req.Labels), + Annotations: general.DeepCopyMap(req.Annotations), + QoSLevel: apiconsts.PodAnnotationQoSLevelSystemCores, + } + + poolCPUSet, topologyAwareAssignments, err := p.getSystemPoolCPUSetAndNumaAwareAssignments(p.state.GetPodEntries(), allocationInfo) + if err != nil { + general.ErrorS(err, "unable to get system pool cpuset and topologyAwareAssignments", + "podNamespace", req.PodNamespace, + "podName", req.PodName, + "containerName", req.ContainerName) + return nil, err + } + + systemPoolName, err := allocationInfo.GetSpecifiedSystemPoolName() + if err != nil { + return nil, err + } + + general.InfoS("allocate system pool cpuset successfully", + "podNamespace", req.PodNamespace, + "podName", req.PodName, + "containerName", req.ContainerName, + "poolName", systemPoolName, + "result", poolCPUSet.String(), + "topologyAwareAssignments", topologyAwareAssignments) + + allocationInfo.OwnerPoolName = systemPoolName + allocationInfo.AllocationResult = poolCPUSet + allocationInfo.OriginalAllocationResult = poolCPUSet.Clone() + allocationInfo.TopologyAwareAssignments = topologyAwareAssignments + allocationInfo.OriginalTopologyAwareAssignments = machine.DeepcopyCPUAssignment(topologyAwareAssignments) + + p.state.SetAllocationInfo(allocationInfo.PodUid, allocationInfo.ContainerName, allocationInfo) + podEntries := p.state.GetPodEntries() + + updatedMachineState, err := generateMachineStateFromPodEntries(p.machineInfo.CPUTopology, podEntries) + if err != nil { + general.Errorf("pod: %s/%s, container: %s generateMachineStateFromPodEntries failed with error: %v", + req.PodNamespace, req.PodName, req.ContainerName, err) + return nil, fmt.Errorf("generateMachineStateFromPodEntries failed with error: %v", err) + } + p.state.SetMachineState(updatedMachineState) + + resp, err := cpuutil.PackAllocationResponse(allocationInfo, string(v1.ResourceCPU), util.OCIPropertyNameCPUSetCPUs, false, true, req) + if err != nil { + general.Errorf("pod: %s/%s, container: %s PackResourceAllocationResponseByAllocationInfo failed with error: %v", + req.PodNamespace, req.PodName, req.ContainerName, err) + return nil, fmt.Errorf("PackResourceAllocationResponseByAllocationInfo failed with error: %v", err) + } + return resp, nil +} + +func (p *DynamicPolicy) getSystemPoolCPUSetAndNumaAwareAssignments(podEntries state.PodEntries, + allocationInfo *state.AllocationInfo, +) (machine.CPUSet, map[int]machine.CPUSet, error) { + if allocationInfo == nil { + return machine.CPUSet{}, nil, fmt.Errorf("allocationInfo is nil") + } + + poolCPUSet := machine.NewCPUSet() + specifiedPoolName := allocationInfo.GetSpecifiedPoolName() + if specifiedPoolName != state.EmptyOwnerPoolName { + for pool, entries := range podEntries { + if !entries.IsPoolEntry() { + continue + } + + if pool == specifiedPoolName || strings.HasPrefix(pool, specifiedPoolName) { + poolCPUSet = poolCPUSet.Union(entries.GetPoolEntry().AllocationResult) + general.Infof("pod: %s/%s, container: %s get system pool cpuset from pool: %s, cpuset: %s", allocationInfo.PodNamespace, allocationInfo.PodName, + allocationInfo.ContainerName, pool, entries.GetPoolEntry().AllocationResult.String()) + } + } + } + + // if pool set is empty, try to get default cpuset + if poolCPUSet.IsEmpty() { + // if the pod is numa binding, get the default cpuset from machine state + if state.CheckNUMABinding(allocationInfo) { + poolCPUSet = p.state.GetMachineState().GetAvailableCPUSet(p.reservedCPUs) + } + + // if the default cpuset is empty or no numa binding, use all cpuset as default cpuset + if poolCPUSet.IsEmpty() { + poolCPUSet = p.machineInfo.CPUDetails.CPUs() + } + general.Infof("pod: %s/%s, container: %s get system pool cpuset from default cpuset: %s", allocationInfo.PodNamespace, allocationInfo.PodName, + allocationInfo.ContainerName, poolCPUSet.String()) + } + + if poolCPUSet.IsEmpty() { + return machine.CPUSet{}, nil, fmt.Errorf("no system pool cpuset for pool %s", specifiedPoolName) + } + + topologyAwareAssignments, err := machine.GetNumaAwareAssignments(p.machineInfo.CPUTopology, poolCPUSet) + if err != nil { + return machine.CPUSet{}, nil, fmt.Errorf("unable to get numa aware assignments: %v", err) + } + + return poolCPUSet, topologyAwareAssignments, nil +} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go index e7efe2095..cebdff5c1 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go @@ -165,7 +165,7 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingHintHandler(_ context.Conte // if hints exists in extra state-file, prefer to use them if hints == nil { - availableNUMAs := machineState.GetFilteredNUMASet(state.CheckNUMABinding) + availableNUMAs := machineState.GetFilteredNUMASet(state.CheckSharedOrDedicatedNUMABinding) var extraErr error hints, extraErr = util.GetHintsFromExtraStateFile(req.PodName, string(v1.ResourceCPU), p.extraStateFileAbsPath, availableNUMAs) @@ -794,8 +794,8 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingSharedCores(reqInt int, podE machineState state.NUMANodeMap, req *pluginapi.ResourceRequest, ) (map[string]*pluginapi.ListOfTopologyHints, error) { - nonBindingNUMAsCPUQuantity := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, nil, state.CheckNUMABinding).Size() - nonBindingNUMAs := machineState.GetFilteredNUMASet(state.CheckNUMABinding) + nonBindingNUMAsCPUQuantity := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, nil, state.CheckSharedOrDedicatedNUMABinding).Size() + nonBindingNUMAs := machineState.GetFilteredNUMASet(state.CheckSharedOrDedicatedNUMABinding) nonBindingSharedRequestedQuantity := state.GetNonBindingSharedRequestedQuantityFromPodEntries(podEntries, nil, p.getContainerRequestedCores) reqAnnotations := req.Annotations diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go index b41da5ebf..8d255e2f4 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go @@ -72,6 +72,11 @@ func getTestDynamicPolicyWithInitialization(topology *machine.CPUTopology, state return nil, err } + err = dynamicPolicy.initReservePool() + if err != nil { + return nil, err + } + err = dynamicPolicy.initReclaimPool() if err != nil { return nil, err @@ -117,6 +122,7 @@ func getTestDynamicPolicyWithoutInitialization(topology *machine.CPUTopology, st consts.PodAnnotationQoSLevelSharedCores: policyImplement.sharedCoresAllocationHandler, consts.PodAnnotationQoSLevelDedicatedCores: policyImplement.dedicatedCoresAllocationHandler, consts.PodAnnotationQoSLevelReclaimedCores: policyImplement.reclaimedCoresAllocationHandler, + consts.PodAnnotationQoSLevelSystemCores: policyImplement.systemCoresAllocationHandler, } // register hint providers for pods with different QoS level @@ -124,6 +130,7 @@ func getTestDynamicPolicyWithoutInitialization(topology *machine.CPUTopology, st consts.PodAnnotationQoSLevelSharedCores: policyImplement.sharedCoresHintHandler, consts.PodAnnotationQoSLevelDedicatedCores: policyImplement.dedicatedCoresHintHandler, consts.PodAnnotationQoSLevelReclaimedCores: policyImplement.reclaimedCoresHintHandler, + consts.PodAnnotationQoSLevelSystemCores: policyImplement.systemCoresHintHandler, } policyImplement.metaServer = &metaserver.MetaServer{ @@ -892,6 +899,108 @@ func TestAllocate(t *testing.T) { }, cpuTopology: cpuTopology, }, + { + description: "req for system_cores with specified cpuset pool", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 0, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + consts.PodAnnotationCPUEnhancementKey: `{"cpuset_pool": "reserve"}`, + }, + }, + expectedResp: &pluginapi.ResourceAllocationResponse{ + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + AllocationResult: &pluginapi.ResourceAllocation{ + ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ + string(v1.ResourceCPU): { + OciPropertyName: util.OCIPropertyNameCPUSetCPUs, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 2, // reserve pool + AllocationResult: machine.NewCPUSet(0, 2).String(), + ResourceHints: &pluginapi.ListOfTopologyHints{ + Hints: []*pluginapi.TopologyHint{nil}, + }, + }, + }, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + consts.PodAnnotationCPUEnhancementCPUSet: "reserve", + }, + }, + cpuTopology: cpuTopology, + }, + { + description: "req for system_cores without specified cpuset pool", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 0, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + }, + expectedResp: &pluginapi.ResourceAllocationResponse{ + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + AllocationResult: &pluginapi.ResourceAllocation{ + ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ + string(v1.ResourceCPU): { + OciPropertyName: util.OCIPropertyNameCPUSetCPUs, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: float64(cpuTopology.CPUDetails.CPUs().Size()), // default for all cpuset + AllocationResult: cpuTopology.CPUDetails.CPUs().String(), + ResourceHints: &pluginapi.ListOfTopologyHints{ + Hints: []*pluginapi.TopologyHint{nil}, + }, + }, + }, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + }, + cpuTopology: cpuTopology, + }, } for _, tc := range testCases { diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go index 6493dde31..28ee7e5fa 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go @@ -193,6 +193,27 @@ func (ai *AllocationInfo) GetSpecifiedNUMABindingPoolName() (string, error) { return GetNUMAPoolName(specifiedPoolName, numaSet.ToSliceNoSortUInt64()[0]), nil } +func (ai *AllocationInfo) GetSpecifiedSystemPoolName() (string, error) { + if !CheckSystem(ai) { + return EmptyOwnerPoolName, fmt.Errorf("GetSpecifiedSystemPoolName only for system_cores") + } + + specifiedPoolName := ai.GetSpecifiedPoolName() + if specifiedPoolName == EmptyOwnerPoolName { + return PoolNamePrefixSystem, nil + } + + return fmt.Sprintf("%s%s%s", PoolNamePrefixSystem, "-", specifiedPoolName), nil +} + +func CheckSystem(ai *AllocationInfo) bool { + if ai == nil { + return false + } + + return ai.QoSLevel == consts.PodAnnotationQoSLevelSystemCores +} + // CheckMainContainer returns true if the AllocationInfo is for main container func (ai *AllocationInfo) CheckMainContainer() bool { if ai == nil { @@ -305,6 +326,16 @@ func CheckSharedNUMABinding(ai *AllocationInfo) bool { return CheckShared(ai) && CheckNUMABinding(ai) } +// CheckSharedOrDedicatedNUMABinding returns true if the AllocationInfo is for pod with +// shared-qos or dedicated-qos and numa-binding enhancement +func CheckSharedOrDedicatedNUMABinding(ai *AllocationInfo) bool { + if ai == nil { + return false + } + + return CheckSharedNUMABinding(ai) || CheckDedicatedNUMABinding(ai) +} + // CheckDedicatedPool returns true if the AllocationInfo is for a container in the dedicated pool func CheckDedicatedPool(ai *AllocationInfo) bool { if ai == nil { diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go index 6859303c1..77fe9a3e5 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go @@ -38,6 +38,7 @@ const ( PoolNameDedicated = "dedicated" PoolNameReserve = "reserve" PoolNamePrefixIsolation = "isolation" + PoolNamePrefixSystem = "system" // PoolNameFallback is not a real pool, and is a union of // all none-reclaimed pools to put pod should have been isolated @@ -195,9 +196,15 @@ func IsIsolationPool(poolName string) bool { return strings.HasPrefix(poolName, PoolNamePrefixIsolation) } +func IsSystemPool(poolName string) bool { + return strings.HasPrefix(poolName, PoolNamePrefixSystem) +} + func GetPoolType(poolName string) string { if IsIsolationPool(poolName) { return PoolNamePrefixIsolation + } else if IsSystemPool(poolName) { + return PoolNamePrefixSystem } switch poolName { case PoolNameReclaim, PoolNameDedicated, PoolNameReserve, PoolNameFallback: @@ -215,6 +222,8 @@ func GetSpecifiedPoolName(qosLevel, cpusetEnhancementValue string) string { return cpusetEnhancementValue } return PoolNameShare + case apiconsts.PodAnnotationQoSLevelSystemCores: + return cpusetEnhancementValue case apiconsts.PodAnnotationQoSLevelReclaimedCores: return PoolNameReclaim case apiconsts.PodAnnotationQoSLevelDedicatedCores: @@ -466,7 +475,7 @@ func GenerateMachineStateFromPodEntriesByPolicy(topology *machine.CPUTopology, p // only modify allocated and default properties in NUMA node state if the policy is dynamic and the entry indicates numa_binding. // shared_cores with numa_binding also contributes to numaNodeState.AllocatedCPUSet, // it's convenient that we can skip NUMA with AllocatedCPUSet > 0 when allocating CPUs for dedicated_cores with numa_binding. - if CheckNUMABinding(allocationInfo) { + if CheckSharedOrDedicatedNUMABinding(allocationInfo) { allocatedCPUsInNumaNode = allocatedCPUsInNumaNode.Union(allocationInfo.OriginalTopologyAwareAssignments[int(numaNode)]) } case cpuconsts.CPUResourcePluginPolicyNameNative: diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util_test.go index 511542f1f..b0b0c90ed 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util_test.go @@ -552,6 +552,14 @@ func TestGetSpecifiedPoolName(t *testing.T) { }, want: PoolNameReclaim, }, + { + name: "system_cores with empty cpusetEnhancementValue", + args: args{ + qosLevel: consts.PodAnnotationQoSLevelSystemCores, + cpusetEnhancementValue: "reserve", + }, + want: "reserve", + }, } for _, tt := range tests { tt := tt diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/util.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/util.go index b2cc5354d..546c065d8 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/util.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/util.go @@ -44,7 +44,7 @@ func generateMachineStateFromPodEntries(topology *machine.CPUTopology, podEntrie // because qos level and annotations will change after we support customized updater of enhancements and qos level func updateAllocationInfoByReq(req *pluginapi.ResourceRequest, allocationInfo *state.AllocationInfo) error { if req == nil { - return fmt.Errorf("updateAllocationInfoByReq got ni l req") + return fmt.Errorf("updateAllocationInfoByReq got nil req") } else if allocationInfo == nil { return nil } diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go index 4a42d8ed2..650526dad 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go @@ -230,12 +230,14 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration apiconsts.PodAnnotationQoSLevelSharedCores: policyImplement.sharedCoresAllocationHandler, apiconsts.PodAnnotationQoSLevelDedicatedCores: policyImplement.dedicatedCoresAllocationHandler, apiconsts.PodAnnotationQoSLevelReclaimedCores: policyImplement.reclaimedCoresAllocationHandler, + apiconsts.PodAnnotationQoSLevelSystemCores: policyImplement.systemCoresAllocationHandler, } policyImplement.hintHandlers = map[string]util.HintHandler{ apiconsts.PodAnnotationQoSLevelSharedCores: policyImplement.sharedCoresHintHandler, apiconsts.PodAnnotationQoSLevelDedicatedCores: policyImplement.dedicatedCoresHintHandler, apiconsts.PodAnnotationQoSLevelReclaimedCores: policyImplement.reclaimedCoresHintHandler, + apiconsts.PodAnnotationQoSLevelSystemCores: policyImplement.systemCoresHintHandler, } policyImplement.asyncLimitedWorkersMap = map[string]*asyncworker.AsyncLimitedWorkers{ @@ -1142,7 +1144,7 @@ func (p *DynamicPolicy) checkNormalShareCoresResource(req *pluginapi.ResourceReq machineState := p.state.GetMachineState() resourceState := machineState[v1.ResourceMemory] - numaWithoutNUMABindingPods := resourceState.GetNUMANodesWithoutNUMABindingPods() + numaWithoutNUMABindingPods := resourceState.GetNUMANodesWithoutSharedOrDedicatedNUMABindingPods() numaAllocatableWithoutNUMABindingPods := uint64(0) for _, numaID := range numaWithoutNUMABindingPods.ToSliceInt() { numaAllocatableWithoutNUMABindingPods += resourceState[numaID].Allocatable diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers.go index 8da9becbc..15dea2666 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers.go @@ -49,6 +49,24 @@ func (p *DynamicPolicy) sharedCoresAllocationHandler(ctx context.Context, } } +func (p *DynamicPolicy) systemCoresAllocationHandler(_ context.Context, req *pluginapi.ResourceRequest) (*pluginapi.ResourceAllocationResponse, error) { + if req == nil { + return nil, fmt.Errorf("systemCoresAllocationHandler got nil request") + } + + switch req.Annotations[apiconsts.PodAnnotationMemoryEnhancementNumaBinding] { + case apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable: + resourcesMachineState := p.state.GetMachineState() + defaultSystemCoresNUMAs := p.getDefaultSystemCoresNUMAs(resourcesMachineState[v1.ResourceMemory]) + // allocate system_cores pod with NUMA binding + // todo: currently we only set cpuset.mems for system_cores pods with numa binding to NUMAs without dedicated and NUMA binding and NUMA exclusive pod, + // in the future, we set them according to their cpuset_pool annotation. + return p.allocateTargetNUMAs(req, apiconsts.PodAnnotationQoSLevelSystemCores, defaultSystemCoresNUMAs) + default: + return p.allocateTargetNUMAs(req, apiconsts.PodAnnotationQoSLevelSystemCores, p.topology.CPUDetails.NUMANodes()) + } +} + func (p *DynamicPolicy) reclaimedCoresAllocationHandler(ctx context.Context, req *pluginapi.ResourceRequest, ) (*pluginapi.ResourceAllocationResponse, error) { @@ -66,7 +84,7 @@ func (p *DynamicPolicy) reclaimedCoresAllocationHandler(ctx context.Context, // we will support adjusting cpuset.mems for reclaimed_cores dynamically according to memory advisor. // Notice: before supporting dynamic adjustment, not to hybrid reclaimed_cores // with dedicated_cores numa_binding containers. - return p.allocateAllNUMAs(req, apiconsts.PodAnnotationQoSLevelReclaimedCores) + return p.allocateTargetNUMAs(req, apiconsts.PodAnnotationQoSLevelReclaimedCores, p.topology.CPUDetails.NUMANodes()) } func (p *DynamicPolicy) dedicatedCoresAllocationHandler(ctx context.Context, @@ -307,7 +325,7 @@ func (p *DynamicPolicy) allocateNUMAsWithoutNUMABindingPods(_ context.Context, machineState := p.state.GetMachineState() resourceState := machineState[v1.ResourceMemory] - numaWithoutNUMABindingPods := resourceState.GetNUMANodesWithoutNUMABindingPods() + numaWithoutNUMABindingPods := resourceState.GetNUMANodesWithoutSharedOrDedicatedNUMABindingPods() allocationInfo := p.state.GetAllocationInfo(v1.ResourceMemory, req.PodUid, req.ContainerName) if allocationInfo != nil { @@ -358,20 +376,19 @@ func (p *DynamicPolicy) allocateNUMAsWithoutNUMABindingPods(_ context.Context, return resp, nil } -// allocateAllNUMAs returns all numa node as allocation results, +// allocateTargetNUMAs returns target numa nodes as allocation results, // and it will store the allocation in states. -func (p *DynamicPolicy) allocateAllNUMAs(req *pluginapi.ResourceRequest, - qosLevel string, +func (p *DynamicPolicy) allocateTargetNUMAs(req *pluginapi.ResourceRequest, + qosLevel string, targetNUMAs machine.CPUSet, ) (*pluginapi.ResourceAllocationResponse, error) { if !pluginapi.SupportedKatalystQoSLevels.Has(qosLevel) { return nil, fmt.Errorf("invalid qosLevel: %s", qosLevel) } - allNUMAs := p.topology.CPUDetails.NUMANodes() allocationInfo := p.state.GetAllocationInfo(v1.ResourceMemory, req.PodUid, req.ContainerName) - if allocationInfo != nil && !allocationInfo.NumaAllocationResult.Equals(allNUMAs) { + if allocationInfo != nil && !allocationInfo.NumaAllocationResult.Equals(targetNUMAs) { general.Infof("pod: %s/%s, container: %s change cpuset.mems from: %s to %s", - req.PodNamespace, req.PodName, req.ContainerName, allocationInfo.NumaAllocationResult.String(), allNUMAs.String()) + req.PodNamespace, req.PodName, req.ContainerName, allocationInfo.NumaAllocationResult.String(), targetNUMAs.String()) } allocationInfo = &state.AllocationInfo{ @@ -383,7 +400,7 @@ func (p *DynamicPolicy) allocateAllNUMAs(req *pluginapi.ResourceRequest, ContainerIndex: req.ContainerIndex, PodRole: req.PodRole, PodType: req.PodType, - NumaAllocationResult: allNUMAs.Clone(), + NumaAllocationResult: targetNUMAs.Clone(), Labels: general.DeepCopyMap(req.Labels), Annotations: general.DeepCopyMap(req.Annotations), QoSLevel: qosLevel, @@ -419,88 +436,14 @@ func (p *DynamicPolicy) adjustAllocationEntries() error { machineState := resourcesMachineState[v1.ResourceMemory] podEntries := podResourceEntries[v1.ResourceMemory] - numaWithoutNUMABindingPods := machineState.GetNUMANodesWithoutNUMABindingPods() - general.Infof("numaWithoutNUMABindingPods: %s", numaWithoutNUMABindingPods.String()) - // for numaSetChangedContainers, we should reset their allocation info and // trigger necessary Knob actions (like dropping caches or migrate memory // to make sure already-allocated memory cooperate with the new numaset) - numaSetChangedContainers := make(map[string]map[string]bool) - for podUID, containerEntries := range podEntries { - for containerName, allocationInfo := range containerEntries { - if allocationInfo == nil { - general.Errorf("pod: %s, container: %s has nil allocationInfo", podUID, containerName) - continue - } else if containerName == "" { - general.Errorf("pod: %s has empty containerName entry", podUID) - continue - } else if allocationInfo.CheckNumaBinding() { - // not to adjust NUMA binding containers - continue - } else if allocationInfo.QoSLevel == apiconsts.PodAnnotationQoSLevelReclaimedCores { - // todo: consider strategy here after supporting cpuset.mems dynamic adjustment - continue - } - - // todo: currently we only set cpuset.mems to NUMAs without NUMA binding for pods isn't NUMA binding - // when cgroup memory policy becomes ready, we will allocate quantity for each pod meticulously. - if !allocationInfo.NumaAllocationResult.IsSubsetOf(numaWithoutNUMABindingPods) { - if numaSetChangedContainers[podUID] == nil { - numaSetChangedContainers[podUID] = make(map[string]bool) - } - numaSetChangedContainers[podUID][containerName] = true - } - - if !allocationInfo.NumaAllocationResult.Equals(numaWithoutNUMABindingPods) { - general.Infof("pod: %s/%s, container: %s change cpuset.mems from: %s to %s", - allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, - allocationInfo.NumaAllocationResult.String(), numaWithoutNUMABindingPods.String()) - } - - allocationInfo.NumaAllocationResult = numaWithoutNUMABindingPods.Clone() - allocationInfo.TopologyAwareAllocations = nil - } - } - - // TODO: optimize this logic someday: - // only for refresh memory request for old inplace update resized pods. - for podUID, containerEntries := range podEntries { - for containerName, allocationInfo := range containerEntries { - if allocationInfo == nil { - general.Errorf("pod: %s, container: %s has nil allocationInfo", podUID, containerName) - continue - } else if containerName == "" { - general.Errorf("pod: %s has empty containerName entry", podUID) - continue - } else if allocationInfo.QoSLevel != apiconsts.PodAnnotationQoSLevelSharedCores { - continue - } - if allocationInfo.QoSLevel == apiconsts.PodAnnotationQoSLevelSharedCores { - if allocationInfo.CheckNumaBinding() { - if allocationInfo.CheckSideCar() { - continue - } - - if len(allocationInfo.TopologyAwareAllocations) != 1 { - general.Errorf("pod: %s/%s, container: %s topologyAwareAllocations length is not 1: %v", - allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, allocationInfo.TopologyAwareAllocations) - continue - } - - // update AggregatedQuantity && TopologyAwareAllocations for snb - allocationInfo.AggregatedQuantity = p.getContainerRequestedMemoryBytes(allocationInfo) - for numaId, quantity := range allocationInfo.TopologyAwareAllocations { - if quantity != allocationInfo.AggregatedQuantity { - allocationInfo.TopologyAwareAllocations[numaId] = allocationInfo.AggregatedQuantity - } - } - } else { - // update AggregatedQuantity for normal share cores - allocationInfo.AggregatedQuantity = p.getContainerRequestedMemoryBytes(allocationInfo) - } - } - } - } + numaSetChangedContainers := make(map[string]map[string]*state.AllocationInfo) + p.adjustAllocationEntriesForSharedCores(numaSetChangedContainers, podEntries, machineState) + p.adjustAllocationEntriesForDedicatedCores(numaSetChangedContainers, podEntries, machineState) + p.adjustAllocationEntriesForSystemCores(numaSetChangedContainers, podEntries, machineState) + // todo: adjust allocation entries for reclaimed cores resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, p.state.GetReservedMemory()) if err != nil { @@ -510,47 +453,9 @@ func (p *DynamicPolicy) adjustAllocationEntries() error { p.state.SetPodResourceEntries(podResourceEntries) p.state.SetMachineState(resourcesMachineState) - movePagesWorkers, ok := p.asyncLimitedWorkersMap[memoryPluginAsyncWorkTopicMovePage] - if !ok { - return fmt.Errorf("asyncLimitedWorkers for %s not found", memoryPluginAsyncWorkTopicMovePage) - } - - // drop cache and migrate pages for containers whose numaset changed - for podUID, containers := range numaSetChangedContainers { - for containerName := range containers { - containerID, err := p.metaServer.GetContainerID(podUID, containerName) - if err != nil { - general.Errorf("get container id of pod: %s container: %s failed with error: %v", podUID, containerName, err) - continue - } - - container, err := p.metaServer.GetContainerSpec(podUID, containerName) - if err != nil || container == nil { - general.Errorf("get container spec for pod: %s, container: %s failed with error: %v", podUID, containerName, err) - continue - } - - if !numaWithoutNUMABindingPods.IsEmpty() { - movePagesWorkName := util.GetContainerAsyncWorkName(podUID, containerName, - memoryPluginAsyncWorkTopicMovePage) - // start a asynchronous work to migrate pages for containers whose numaset changed and doesn't require numa_binding - err = movePagesWorkers.AddWork( - &asyncworker.Work{ - Name: movePagesWorkName, - UID: uuid.NewUUID(), - Fn: MovePagesForContainer, - Params: []interface{}{ - podUID, containerID, - p.topology.CPUDetails.NUMANodes(), - numaWithoutNUMABindingPods.Clone(), - }, - DeliveredAt: time.Now(), - }, asyncworker.DuplicateWorkPolicyOverride) - if err != nil { - general.Errorf("add work: %s pod: %s container: %s failed with error: %v", movePagesWorkName, podUID, containerName, err) - } - } - } + err = p.migratePagesForNUMASetChangedContainers(numaSetChangedContainers) + if err != nil { + return fmt.Errorf("migratePagesForNUMASetChangedContainers failed with error: %v", err) } return nil @@ -763,3 +668,192 @@ func packAllocationResponse(allocationInfo *state.AllocationInfo, req *pluginapi Annotations: general.DeepCopyMap(req.Annotations), }, nil } + +func (p *DynamicPolicy) adjustAllocationEntriesForSharedCores(numaSetChangedContainers map[string]map[string]*state.AllocationInfo, + podEntries state.PodEntries, machineState state.NUMANodeMap, +) { + numaWithoutNUMABindingPods := machineState.GetNUMANodesWithoutSharedOrDedicatedNUMABindingPods() + general.Infof("numaWithoutNUMABindingPods: %s", numaWithoutNUMABindingPods.String()) + + for podUID, containerEntries := range podEntries { + for containerName, allocationInfo := range containerEntries { + if allocationInfo == nil { + general.Errorf("pod: %s, container: %s has nil allocationInfo", podUID, containerName) + continue + } else if containerName == "" { + general.Errorf("pod: %s has empty containerName entry", podUID) + continue + } else if allocationInfo.QoSLevel != apiconsts.PodAnnotationQoSLevelSharedCores { + // not to adjust NUMA binding containers + continue + } + + if !allocationInfo.CheckNumaBinding() { + // update container to target numa set for normal share cores + p.updateNUMASetChangedContainers(numaSetChangedContainers, allocationInfo, numaWithoutNUMABindingPods) + + // update AggregatedQuantity for normal share cores + allocationInfo.AggregatedQuantity = p.getContainerRequestedMemoryBytes(allocationInfo) + } else { + // memory of sidecar in snb pod is belonged to main container so we don't need to adjust it + if allocationInfo.CheckSideCar() { + continue + } + + if len(allocationInfo.TopologyAwareAllocations) != 1 { + general.Errorf("pod: %s/%s, container: %s topologyAwareAllocations length is not 1: %v", + allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, allocationInfo.TopologyAwareAllocations) + continue + } + + // only for refresh memory request for old inplace update resized pods. + // update AggregatedQuantity && TopologyAwareAllocations for snb + allocationInfo.AggregatedQuantity = p.getContainerRequestedMemoryBytes(allocationInfo) + for numaId, quantity := range allocationInfo.TopologyAwareAllocations { + if quantity != allocationInfo.AggregatedQuantity { + allocationInfo.TopologyAwareAllocations[numaId] = allocationInfo.AggregatedQuantity + } + } + } + } + } +} + +func (p *DynamicPolicy) adjustAllocationEntriesForDedicatedCores(numaSetChangedContainers map[string]map[string]*state.AllocationInfo, + podEntries state.PodEntries, machineState state.NUMANodeMap, +) { + numaWithoutNUMABindingPods := machineState.GetNUMANodesWithoutSharedOrDedicatedNUMABindingPods() + general.Infof("numaWithoutNUMABindingPods: %s", numaWithoutNUMABindingPods.String()) + + for podUID, containerEntries := range podEntries { + for containerName, allocationInfo := range containerEntries { + if allocationInfo == nil { + general.Errorf("pod: %s, container: %s has nil allocationInfo", podUID, containerName) + continue + } else if containerName == "" { + general.Errorf("pod: %s has empty containerName entry", podUID) + continue + } else if allocationInfo.QoSLevel != apiconsts.PodAnnotationQoSLevelDedicatedCores { + continue + } + + if !allocationInfo.CheckNumaBinding() { + // not to adjust NUMA binding containers + // update container to target numa set for normal share cores + p.updateNUMASetChangedContainers(numaSetChangedContainers, allocationInfo, numaWithoutNUMABindingPods) + } + } + } +} + +// adjustAllocationEntriesForSystemCores adjusts the allocation entries for system cores pods. +func (p *DynamicPolicy) adjustAllocationEntriesForSystemCores(numaSetChangedContainers map[string]map[string]*state.AllocationInfo, + podEntries state.PodEntries, machineState state.NUMANodeMap, +) { + defaultSystemCoresNUMAs := p.getDefaultSystemCoresNUMAs(machineState) + + for podUID, containerEntries := range podEntries { + for containerName, allocationInfo := range containerEntries { + if allocationInfo == nil { + general.Errorf("pod: %s, container: %s has nil allocationInfo", podUID, containerName) + continue + } else if containerName == "" { + general.Errorf("pod: %s has empty containerName entry", podUID) + continue + } else if allocationInfo.QoSLevel != apiconsts.PodAnnotationQoSLevelSystemCores { + continue + } + + if allocationInfo.CheckNumaBinding() { + // update container to target numa set for system_cores pod with NUMA binding + // todo: currently we only update cpuset.mems for system_cores pods to NUMAs without dedicated and NUMA binding and NUMA exclusive pod, + // in the future, we will update cpuset.mems for system_cores according to their cpuset_pool annotation. + p.updateNUMASetChangedContainers(numaSetChangedContainers, allocationInfo, defaultSystemCoresNUMAs) + } + } + } +} + +func (p *DynamicPolicy) updateNUMASetChangedContainers(numaSetChangedContainers map[string]map[string]*state.AllocationInfo, + allocationInfo *state.AllocationInfo, targetNumaSet machine.CPUSet, +) { + if numaSetChangedContainers == nil || allocationInfo == nil { + return + } + + // todo: currently we only set cpuset.mems to NUMAs without NUMA binding for pods isn't NUMA binding + // when cgroup memory policy becomes ready, we will allocate quantity for each pod meticulously. + if !allocationInfo.NumaAllocationResult.IsSubsetOf(targetNumaSet) { + if numaSetChangedContainers[allocationInfo.PodUid] == nil { + numaSetChangedContainers[allocationInfo.PodUid] = make(map[string]*state.AllocationInfo) + } + numaSetChangedContainers[allocationInfo.PodUid][allocationInfo.ContainerName] = allocationInfo + } + + if !allocationInfo.NumaAllocationResult.Equals(targetNumaSet) { + general.Infof("pod: %s/%s, container: %s change cpuset.mems from: %s to %s", + allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, + allocationInfo.NumaAllocationResult.String(), targetNumaSet.String()) + } + + allocationInfo.NumaAllocationResult = targetNumaSet.Clone() + allocationInfo.TopologyAwareAllocations = nil +} + +func (p *DynamicPolicy) migratePagesForNUMASetChangedContainers(numaSetChangedContainers map[string]map[string]*state.AllocationInfo) error { + movePagesWorkers, ok := p.asyncLimitedWorkersMap[memoryPluginAsyncWorkTopicMovePage] + if !ok { + return fmt.Errorf("asyncLimitedWorkers for %s not found", memoryPluginAsyncWorkTopicMovePage) + } + + // drop cache and migrate pages for containers whose numaset changed + for podUID, containers := range numaSetChangedContainers { + for containerName, allocationInfo := range containers { + containerID, err := p.metaServer.GetContainerID(podUID, containerName) + if err != nil { + general.Errorf("get container id of pod: %s container: %s failed with error: %v", podUID, containerName, err) + continue + } + + container, err := p.metaServer.GetContainerSpec(podUID, containerName) + if err != nil || container == nil { + general.Errorf("get container spec for pod: %s, container: %s failed with error: %v", podUID, containerName, err) + continue + } + + if !allocationInfo.NumaAllocationResult.IsEmpty() { + movePagesWorkName := util.GetContainerAsyncWorkName(podUID, containerName, + memoryPluginAsyncWorkTopicMovePage) + // start a asynchronous work to migrate pages for containers whose numaset changed and doesn't require numa_binding + err = movePagesWorkers.AddWork( + &asyncworker.Work{ + Name: movePagesWorkName, + UID: uuid.NewUUID(), + Fn: MovePagesForContainer, + Params: []interface{}{ + podUID, containerID, + p.topology.CPUDetails.NUMANodes(), + allocationInfo.NumaAllocationResult.Clone(), + }, + DeliveredAt: time.Now(), + }, asyncworker.DuplicateWorkPolicyOverride) + if err != nil { + general.Errorf("add work: %s pod: %s container: %s failed with error: %v", movePagesWorkName, podUID, containerName, err) + } + } + } + } + + return nil +} + +// getDefaultSystemCoresNUMAs returns the default system cores NUMAs. +func (p *DynamicPolicy) getDefaultSystemCoresNUMAs(machineState state.NUMANodeMap) machine.CPUSet { + numaNodesWithoutNUMABindingAndNUMAExclusivePods := machineState.GetNUMANodesWithoutDedicatedNUMABindingAndNUMAExclusivePods() + general.Infof("numaNodesWithoutNUMABindingAndNUMAExclusivePods: %s", numaNodesWithoutNUMABindingAndNUMAExclusivePods.String()) + if numaNodesWithoutNUMABindingAndNUMAExclusivePods.IsEmpty() { + // if there is no numa nodes without NUMA binding and NUMA exclusive pods, we will use all numa nodes. + return p.topology.CPUDetails.NUMANodes() + } + return numaNodesWithoutNUMABindingAndNUMAExclusivePods +} diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_async_handler.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_async_handler.go index 2eb90cd33..5790d7f52 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_async_handler.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_async_handler.go @@ -277,7 +277,7 @@ func (p *DynamicPolicy) checkMemorySet(_ *coreconfig.Configuration, general.Warningf("skip memset checking for pod: %s/%s container: %s with zero memory request", allocationInfo.PodNamespace, allocationInfo.PodName, containerName) continue - } else if allocationInfo.CheckNumaBinding() { + } else if allocationInfo.CheckSharedOrDedicatedNUMABinding() { unionNUMABindingStateMemorySet = unionNUMABindingStateMemorySet.Union(allocationInfo.NumaAllocationResult) } @@ -373,7 +373,7 @@ func (p *DynamicPolicy) checkMemorySet(_ *coreconfig.Configuration, } machineState := p.state.GetMachineState()[v1.ResourceMemory] - notAssignedMemSet := machineState.GetNUMANodesWithoutNUMABindingPods() + notAssignedMemSet := machineState.GetNUMANodesWithoutSharedOrDedicatedNUMABindingPods() if !unionNUMABindingStateMemorySet.Union(notAssignedMemSet).Equals(p.topology.CPUDetails.NUMANodes()) { general.Infof("found node memset invalid. unionNUMABindingStateMemorySet: %s, notAssignedMemSet: %s, topology: %s", unionNUMABindingStateMemorySet.String(), notAssignedMemSet.String(), p.topology.CPUDetails.NUMANodes().String()) @@ -476,7 +476,7 @@ func (p *DynamicPolicy) clearResidualState(_ *coreconfig.Configuration, } // setMemoryMigrate is used to calculate and set memory migrate configuration, notice that -// 1. not to set memory migrate for NUMA binding containers +// 1. not to set memory migrate for shared or dedicated NUMA binding containers // 2. for a certain given pod/container, only one setting action is on the flight // 3. the setting action is done asynchronously to avoid hang func (p *DynamicPolicy) setMemoryMigrate() { @@ -496,7 +496,7 @@ func (p *DynamicPolicy) setMemoryMigrate() { } else if containerName == "" { general.Errorf("pod: %s has empty containerName entry", podUID) continue - } else if allocationInfo.CheckNumaBinding() { + } else if allocationInfo.CheckSharedOrDedicatedNUMABinding() { continue } diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go index c441125fd..f8f4839a3 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go @@ -71,6 +71,17 @@ func (p *DynamicPolicy) sharedCoresHintHandler(ctx context.Context, }) } +func (p *DynamicPolicy) systemCoresHintHandler(_ context.Context, req *pluginapi.ResourceRequest) (*pluginapi.ResourceHintsResponse, error) { + if req == nil { + return nil, fmt.Errorf("got nil request") + } + + return util.PackResourceHintsResponse(req, string(v1.ResourceMemory), + map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceMemory): nil, // indicates that there is no numa preference + }) +} + func (p *DynamicPolicy) reclaimedCoresHintHandler(ctx context.Context, req *pluginapi.ResourceRequest, ) (*pluginapi.ResourceHintsResponse, error) { @@ -161,7 +172,7 @@ func (p *DynamicPolicy) numaBindingHintHandler(_ context.Context, // if hints exists in extra state-file, prefer to use them if hints == nil { - availableNUMAs := resourcesMachineState[v1.ResourceMemory].GetNUMANodesWithoutNUMABindingPods() + availableNUMAs := resourcesMachineState[v1.ResourceMemory].GetNUMANodesWithoutSharedOrDedicatedNUMABindingPods() var extraErr error hints, extraErr = util.GetHintsFromExtraStateFile(req.PodName, string(v1.ResourceMemory), diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_test.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_test.go index 0bf54d069..105ea4073 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_test.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_test.go @@ -142,12 +142,14 @@ func getTestDynamicPolicyWithInitialization(topology *machine.CPUTopology, machi consts.PodAnnotationQoSLevelSharedCores: policyImplement.sharedCoresAllocationHandler, consts.PodAnnotationQoSLevelDedicatedCores: policyImplement.dedicatedCoresAllocationHandler, consts.PodAnnotationQoSLevelReclaimedCores: policyImplement.reclaimedCoresAllocationHandler, + consts.PodAnnotationQoSLevelSystemCores: policyImplement.systemCoresAllocationHandler, } policyImplement.hintHandlers = map[string]util.HintHandler{ consts.PodAnnotationQoSLevelSharedCores: policyImplement.sharedCoresHintHandler, consts.PodAnnotationQoSLevelDedicatedCores: policyImplement.dedicatedCoresHintHandler, consts.PodAnnotationQoSLevelReclaimedCores: policyImplement.reclaimedCoresHintHandler, + consts.PodAnnotationQoSLevelSystemCores: policyImplement.systemCoresHintHandler, } policyImplement.asyncWorkers = asyncworker.NewAsyncWorkers(memoryPluginAsyncWorkersName, policyImplement.emitter) @@ -1100,6 +1102,44 @@ func TestGetTopologyHints(t *testing.T) { }, }, }, + { + description: "req for system_cores main container", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 1073741824, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + }, + expectedResp: &pluginapi.ResourceHintsResponse{ + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceMemory): nil, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + }, + }, { description: "req for dedicated_cores with numa_binding & numa_exclusive main container", req: &pluginapi.ResourceRequest{ @@ -1732,13 +1772,13 @@ func TestGetResourcesAllocation(t *testing.T) { as.NotNil(resp1.PodResources[req.PodUid]) as.NotNil(resp1.PodResources[req.PodUid].ContainerResources[testName]) as.NotNil(resp1.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) - as.Equal(resp1.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)], &pluginapi.ResourceAllocationInfo{ + as.Equal(&pluginapi.ResourceAllocationInfo{ OciPropertyName: util.OCIPropertyNameCPUSetMems, IsNodeResource: false, IsScalarResource: true, AllocatedQuantity: 1073741824, AllocationResult: machine.NewCPUSet(0, 1, 2, 3).String(), - }) + }, resp1.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) // test for reclaimed_cores req = &pluginapi.ResourceRequest{ @@ -1769,13 +1809,13 @@ func TestGetResourcesAllocation(t *testing.T) { as.NotNil(resp2.PodResources[req.PodUid]) as.NotNil(resp2.PodResources[req.PodUid].ContainerResources[testName]) as.NotNil(resp2.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) - as.Equal(resp2.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)], &pluginapi.ResourceAllocationInfo{ + as.Equal(&pluginapi.ResourceAllocationInfo{ OciPropertyName: util.OCIPropertyNameCPUSetMems, IsNodeResource: false, IsScalarResource: true, AllocatedQuantity: 0, AllocationResult: machine.NewCPUSet(0, 1, 2, 3).String(), - }) + }, resp2.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) os.RemoveAll(tmpDir) dynamicPolicy, err = getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) @@ -1815,13 +1855,98 @@ func TestGetResourcesAllocation(t *testing.T) { as.NotNil(resp3.PodResources[req.PodUid]) as.NotNil(resp3.PodResources[req.PodUid].ContainerResources[testName]) as.NotNil(resp3.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) - as.Equal(resp3.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)], &pluginapi.ResourceAllocationInfo{ + as.Equal(&pluginapi.ResourceAllocationInfo{ OciPropertyName: util.OCIPropertyNameCPUSetMems, IsNodeResource: false, IsScalarResource: true, AllocatedQuantity: 7516192768, AllocationResult: machine.NewCPUSet(0).String(), - }) + }, resp3.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) + + // test for system_cores with cpuset_pool reserve + req = &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + Hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + Preferred: true, + }, + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 2147483648, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + consts.PodAnnotationCPUEnhancementKey: `{"cpuset_pool": "reserve"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + } + + _, err = dynamicPolicy.Allocate(context.Background(), req) + as.Nil(err) + + resp4, err := dynamicPolicy.GetResourcesAllocation(context.Background(), &pluginapi.GetResourcesAllocationRequest{}) + as.Nil(err) + + as.NotNil(resp4.PodResources[req.PodUid]) + as.NotNil(resp4.PodResources[req.PodUid].ContainerResources[testName]) + as.NotNil(resp4.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) + as.Equal(&pluginapi.ResourceAllocationInfo{ + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 0, + AllocationResult: machine.NewCPUSet(0, 1, 2, 3).String(), + }, resp4.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) + + // test for system_cores with cpuset_pool reserve and with numa binding + req = &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + Hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + Preferred: true, + }, + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 2147483648, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + consts.PodAnnotationCPUEnhancementKey: `{"cpuset_pool": "reserve"}`, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + } + + _, err = dynamicPolicy.Allocate(context.Background(), req) + as.Nil(err) + + resp5, err := dynamicPolicy.GetResourcesAllocation(context.Background(), &pluginapi.GetResourcesAllocationRequest{}) + as.Nil(err) + + as.NotNil(resp5.PodResources[req.PodUid]) + as.NotNil(resp5.PodResources[req.PodUid].ContainerResources[testName]) + as.NotNil(resp5.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) + as.Equal(&pluginapi.ResourceAllocationInfo{ + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 0, + AllocationResult: machine.NewCPUSet(1, 2, 3).String(), + }, resp5.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) } func TestGetReadonlyState(t *testing.T) { diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state.go index 8806fbe7f..eb86180fd 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state.go @@ -136,6 +136,23 @@ func (ai *AllocationInfo) CheckNumaBinding() bool { consts.PodAnnotationMemoryEnhancementNumaBindingEnable } +// CheckNumaExclusive returns true if the AllocationInfo is for pod with numa-exclusive enhancement +func (ai *AllocationInfo) CheckNumaExclusive() bool { + return ai.Annotations[consts.PodAnnotationMemoryEnhancementNumaExclusive] == + consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable +} + +// CheckSharedOrDedicatedNUMABinding returns true if the AllocationInfo is for pod with +// shared-qos or dedicated-qos and numa-binding enhancement +func (ai *AllocationInfo) CheckSharedOrDedicatedNUMABinding() bool { + if ai == nil { + return false + } + + return (ai.QoSLevel == consts.PodAnnotationQoSLevelSharedCores && ai.CheckNumaBinding()) || + (ai.QoSLevel == consts.PodAnnotationQoSLevelDedicatedCores && ai.CheckNumaBinding()) +} + // CheckMainContainer returns true if the AllocationInfo is for main container func (ai *AllocationInfo) CheckMainContainer() bool { return ai.ContainerType == pluginapi.ContainerType_MAIN.String() @@ -267,15 +284,33 @@ func (ns *NUMANodeState) Clone() *NUMANodeState { } } -// HasNUMABindingPods returns true if any AllocationInfo in this NUMANodeState is for numa-binding -func (ns *NUMANodeState) HasNUMABindingPods() bool { +// HasSharedOrDedicatedNUMABindingPods returns true if any AllocationInfo in this NUMANodeState is for shared or dedicated numa-binding +func (ns *NUMANodeState) HasSharedOrDedicatedNUMABindingPods() bool { if ns == nil { return false } for _, containerEntries := range ns.PodEntries { for _, allocationInfo := range containerEntries { - if allocationInfo != nil && allocationInfo.CheckNumaBinding() { + if allocationInfo != nil && allocationInfo.CheckSharedOrDedicatedNUMABinding() { + return true + } + } + } + return false +} + +// HasDedicatedNUMABindingAndNUMAExclusivePods returns true if any AllocationInfo in this NUMANodeState is for dedicated with numa-binding and +// numa-exclusive +func (ns *NUMANodeState) HasDedicatedNUMABindingAndNUMAExclusivePods() bool { + if ns == nil { + return false + } + + for _, containerEntries := range ns.PodEntries { + for _, allocationInfo := range containerEntries { + if allocationInfo != nil && allocationInfo.QoSLevel == consts.PodAnnotationQoSLevelDedicatedCores && + allocationInfo.CheckNumaBinding() && allocationInfo.CheckNumaExclusive() { return true } } @@ -328,12 +363,24 @@ func (nm NUMANodeMap) BytesPerNUMA() (uint64, error) { return 0, fmt.Errorf("getBytesPerNUMAFromMachineState doesn't get valid numaState") } -// GetNUMANodesWithoutNUMABindingPods returns a set of numa nodes; for -// those numa nodes, they all don't contain numa-binding pods -func (nm NUMANodeMap) GetNUMANodesWithoutNUMABindingPods() machine.CPUSet { +// GetNUMANodesWithoutSharedOrDedicatedNUMABindingPods returns a set of numa nodes; for +// those numa nodes, they all don't contain shared or dedicated numa-binding pods +func (nm NUMANodeMap) GetNUMANodesWithoutSharedOrDedicatedNUMABindingPods() machine.CPUSet { + res := machine.NewCPUSet() + for numaId, numaNodeState := range nm { + if numaNodeState != nil && !numaNodeState.HasSharedOrDedicatedNUMABindingPods() { + res = res.Union(machine.NewCPUSet(numaId)) + } + } + return res +} + +// GetNUMANodesWithoutDedicatedNUMABindingAndNUMAExclusivePods returns a set of numa nodes; for +// those numa nodes, they all don't contain dedicated with numa-binding and numa-exclusive pods +func (nm NUMANodeMap) GetNUMANodesWithoutDedicatedNUMABindingAndNUMAExclusivePods() machine.CPUSet { res := machine.NewCPUSet() for numaId, numaNodeState := range nm { - if numaNodeState != nil && !numaNodeState.HasNUMABindingPods() { + if numaNodeState != nil && !numaNodeState.HasDedicatedNUMABindingAndNUMAExclusivePods() { res = res.Union(machine.NewCPUSet(numaId)) } }