From 3839a0bf63634aa43d5dbba89fbcc99d2543c1ac Mon Sep 17 00:00:00 2001 From: Francesco Torchia Date: Tue, 9 Jul 2024 17:23:44 +0200 Subject: [PATCH 1/6] Fix vGpu allocatable mechanism; removing validation steps Signed-off-by: Francesco Torchia --- .../machine-config/harvester.vue | 114 ++++++++---------- shell/assets/translations/en-us.yaml | 3 - 2 files changed, 49 insertions(+), 68 deletions(-) diff --git a/pkg/harvester-manager/machine-config/harvester.vue b/pkg/harvester-manager/machine-config/harvester.vue index b312ad4d9cb..6141fea9b08 100644 --- a/pkg/harvester-manager/machine-config/harvester.vue +++ b/pkg/harvester-manager/machine-config/harvester.vue @@ -18,9 +18,10 @@ import YamlEditor from '@shell/components/YamlEditor'; import { Checkbox } from '@components/Form/Checkbox'; import { Banner } from '@components/Banner'; import { clone, get } from '@shell/utils/object'; +import { uniq, removeObject } from '@shell/utils/array'; import { _CREATE } from '@shell/config/query-params'; -import { removeObject } from '@shell/utils/array'; + import { mapGetters } from 'vuex'; import { HCI, @@ -315,7 +316,7 @@ export default { this.networksObj = JSON.parse(this.value.networkInfo); this.networksHistoric = this.value.networkInfo; - this.getEnabledVGpuDevices(); + this.getAvailableVGpuDevices(); this.update(); } catch (e) { @@ -407,7 +408,8 @@ export default { networkDataIsBase64, vmAffinityIsBase64, SOURCE_TYPE, - vGpuEnabledDevices: {}, + vGpuDevices: {}, + vGpusInit: vGpus, vGpus, }; }, @@ -479,34 +481,11 @@ export default { }; }, - vGpusAllocatable() { - const allocatable = this.allNodeObjects.reduce((acc, node) => [ - ...acc, - ...Object.keys(node.status.allocatable || {}).filter((k) => k.startsWith(VGPU_PREFIX.NVIDIA)), - ], []); - - return allocatable.reduce((acc, v) => { - let available = 0; - - this.allNodeObjects.forEach((n) => { - if (n.status.allocatable[v]) { - available += Number(n.status.allocatable[v]); - } - }); - - if (available > 0) { - return { - ...acc, - [v]: available - }; - } - - return acc; - }, {}); - }, - vGpuOptions() { - return Object.keys(this.vGpuEnabledDevices).filter((x) => !this.vGpus.includes(x)); + return uniq([ + ...this.vGpusInit, + ...Object.keys(this.vGpuDevices).filter((k) => this.vGpuDevices[k].enabled && this.vGpuDevices[k].allocatable > 0), + ]); } }, @@ -626,8 +605,6 @@ export default { this.validatorDiskAndNetowrk(errors); - this.validatorVGpus(errors); - podAffinityValidator(this.vmAffinity.affinity, this.$store.getters, errors); return { errors }; @@ -695,21 +672,6 @@ export default { } }, - validatorVGpus(errors) { - const notAllocatable = this.vGpus - .map((id) => this.vGpuEnabledDevices[id]) - .filter((vGpu) => this.vGpusAllocatable[vGpu?.type] < this.machinePools[this.poolIndex]?.pool?.quantity); - - notAllocatable.forEach((vGpu) => { - const message = this.$store.getters['i18n/t']('cluster.credential.harvester.vGpus.errors.notAllocatable', { - vGpus: vGpu?.type, - pool: this.machinePools[this.poolIndex]?.pool?.name || '', - }); - - errors.push(message); - }); - }, - valuesChanged(value, type) { this.value[type] = base64Encode(value); }, @@ -733,22 +695,35 @@ export default { } }, - async getEnabledVGpuDevices() { + async getAvailableVGpuDevices() { const clusterId = get(this.credential, 'decodedData.clusterId'); if (clusterId) { const url = `/k8s/clusters/${ clusterId }/v1`; - const res = await this.$store.dispatch('cluster/request', { url: `${ url }/${ HCI.VGPU_DEVICE }s` }); - - this.vGpuEnabledDevices = (res?.data || []) - .filter((v) => v.spec.enabled) - .reduce((acc, v) => ({ - ...acc, - [v.id]: { - type: VGPU_PREFIX.NVIDIA + v.spec.vGPUTypeName?.replace(' ', '_'), - id: v.id - }, - }), {}); + + const vGpus = await this.$store.dispatch('cluster/request', { url: `${ url }/${ HCI.VGPU_DEVICE }s` }); + const harvesterCluster = await this.$store.dispatch('cluster/request', { url: `${ url }/harvester/cluster/local` }); + + let deviceCapacity = {}; + + if (harvesterCluster?.links?.deviceCapacity) { + deviceCapacity = await this.$store.dispatch('cluster/request', { url: harvesterCluster?.links?.deviceCapacity }); + } + + this.vGpuDevices = (vGpus?.data || []) + .reduce((acc, v) => { + const type = v.spec.vGPUTypeName ? `${ VGPU_PREFIX.NVIDIA }${ v.spec.vGPUTypeName.replace(' ', '_') }` : ''; + + return { + ...acc, + [v.id]: { + id: v.id, + enabled: v.spec.enabled, + allocatable: deviceCapacity[type] ? Number(deviceCapacity[type]) : 0, + type, + }, + }; + }, {}); } }, @@ -909,14 +884,13 @@ export default { }, updateVGpu() { - const vGPURequests = this.vGpus?.filter((name) => name).reduce((acc, name, i) => ([ + const vGPURequests = this.vGpus?.filter((name) => name).reduce((acc, name) => ([ ...acc, { name, - deviceName: this.vGpuEnabledDevices[name]?.type, + deviceName: this.vGpuDevices[name]?.type, } - ]) - , []); + ]), []); this.value.vgpuInfo = vGPURequests.length > 0 ? JSON.stringify({ vGPURequests }) : ''; }, @@ -1117,9 +1091,19 @@ export default { }, vGpuOptionLabel(opt) { - const vGpu = this.vGpuEnabledDevices[opt]; + const vGpu = this.vGpuDevices[opt]; + + if (vGpu) { + let label = `${ vGpu.type?.replace(VGPU_PREFIX.NVIDIA, '') } - ${ vGpu.id }`; + + if (vGpu.allocatable > 0) { + label += ` (allocatable: ${ vGpu.allocatable })`; + } + + return label; + } - return `${ vGpu?.type?.replace(VGPU_PREFIX.NVIDIA, '') } - ${ vGpu?.id } (allocatable: ${ this.vGpusAllocatable[vGpu?.type] })`; + return opt; } } }; diff --git a/shell/assets/translations/en-us.yaml b/shell/assets/translations/en-us.yaml index 26aea5e2b19..f2704ebe9a6 100644 --- a/shell/assets/translations/en-us.yaml +++ b/shell/assets/translations/en-us.yaml @@ -1357,9 +1357,6 @@ cluster: networkName: Network Name macAddress: Mac Address macFormat: 'Invalid MAC address format.' - vGpus: - errors: - notAllocatable: '"VGPUs" not allocatable. There are not enough [{vGpus}] devices to be allocated to each node in machine pool [{pool}]' volume: title: Volumes volume: Volume From 9ea14af2d2bfb40222e5e7c631ba4f1cb4c8013e Mon Sep 17 00:00:00 2001 From: Francesco Torchia Date: Tue, 6 Aug 2024 10:36:10 +0200 Subject: [PATCH 2/6] Code clean up Signed-off-by: Francesco Torchia --- pkg/harvester-manager/machine-config/harvester.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/harvester-manager/machine-config/harvester.vue b/pkg/harvester-manager/machine-config/harvester.vue index 6141fea9b08..bfbce4847ce 100644 --- a/pkg/harvester-manager/machine-config/harvester.vue +++ b/pkg/harvester-manager/machine-config/harvester.vue @@ -701,7 +701,7 @@ export default { if (clusterId) { const url = `/k8s/clusters/${ clusterId }/v1`; - const vGpus = await this.$store.dispatch('cluster/request', { url: `${ url }/${ HCI.VGPU_DEVICE }s` }); + const vGpus = await this.$store.dispatch('cluster/request', { url: `${ url }/${ HCI.VGPU_DEVICE }` }); const harvesterCluster = await this.$store.dispatch('cluster/request', { url: `${ url }/harvester/cluster/local` }); let deviceCapacity = {}; From 965287d0f53bf0a8a0deab773f6b177208170a89 Mon Sep 17 00:00:00 2001 From: Francesco Torchia Date: Thu, 8 Aug 2024 19:19:52 +0200 Subject: [PATCH 3/6] Select vGpus by type; Remove vGpu profile from labels Signed-off-by: Francesco Torchia --- .../machine-config/harvester.vue | 47 +++++++++++-------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/pkg/harvester-manager/machine-config/harvester.vue b/pkg/harvester-manager/machine-config/harvester.vue index bfbce4847ce..dcdadc2bf4f 100644 --- a/pkg/harvester-manager/machine-config/harvester.vue +++ b/pkg/harvester-manager/machine-config/harvester.vue @@ -20,7 +20,7 @@ import { Banner } from '@components/Banner'; import { clone, get } from '@shell/utils/object'; import { uniq, removeObject } from '@shell/utils/array'; -import { _CREATE } from '@shell/config/query-params'; +import { _CREATE, _VIEW } from '@shell/config/query-params'; import { mapGetters } from 'vuex'; import { @@ -376,7 +376,7 @@ export default { if (this.value.vgpuInfo) { const vGPURequests = JSON.parse(this.value.vgpuInfo)?.vGPURequests; - vGpus = vGPURequests?.map((r) => r?.name).filter((r) => r) || []; + vGpus = vGPURequests?.map((r) => r?.deviceName).filter((f) => f) || []; } return { @@ -482,10 +482,14 @@ export default { }, vGpuOptions() { - return uniq([ + const vGpuTypes = uniq([ ...this.vGpusInit, - ...Object.keys(this.vGpuDevices).filter((k) => this.vGpuDevices[k].enabled && this.vGpuDevices[k].allocatable > 0), + ...Object.values(this.vGpuDevices) + .filter((vGpu) => vGpu.enabled && vGpu.allocatable > 0 && !!vGpu.type) + .map((vGpu) => vGpu.type), ]); + + return vGpuTypes; } }, @@ -884,13 +888,14 @@ export default { }, updateVGpu() { - const vGPURequests = this.vGpus?.filter((name) => name).reduce((acc, name) => ([ - ...acc, - { - name, - deviceName: this.vGpuDevices[name]?.type, - } - ]), []); + /** + * We are assigning the first vGpu profile found for each vGpu type selected by the user. + * This will not work if we will remove the limit of only one vGpu assignable to each cluster. + */ + const vGPURequests = this.vGpus?.filter((f) => f).map((deviceName) => ({ + name: Object.values(this.vGpuDevices).filter((f) => f.type === deviceName)?.[0]?.id || '', + deviceName, + })) || []; this.value.vgpuInfo = vGPURequests.length > 0 ? JSON.stringify({ vGPURequests }) : ''; }, @@ -1091,19 +1096,23 @@ export default { }, vGpuOptionLabel(opt) { - const vGpu = this.vGpuDevices[opt]; + let label = opt.replace(VGPU_PREFIX.NVIDIA, ''); - if (vGpu) { - let label = `${ vGpu.type?.replace(VGPU_PREFIX.NVIDIA, '') } - ${ vGpu.id }`; + if (this.mode === _VIEW) { + return label; + } - if (vGpu.allocatable > 0) { - label += ` (allocatable: ${ vGpu.allocatable })`; - } + /** + * We get the allocatable label from the first vGpu profile found for each vGpu type. + * This is consistent as long as vGpu profiles with the same vGpu type, have the same allocable number. + */ + const vGpu = Object.values(this.vGpuDevices).filter((f) => f.type === opt)?.[0]; - return label; + if (vGpu?.allocatable > 0) { + label += ` (allocatable: ${ vGpu.allocatable })`; } - return opt; + return label; } } }; From 578e44d1cf56a9367022e179502ca8a913410c02 Mon Sep 17 00:00:00 2001 From: Francesco Torchia Date: Fri, 9 Aug 2024 13:15:39 +0200 Subject: [PATCH 4/6] Add support for Harvester <1.3.2 versions - allocatable info is empty Signed-off-by: Francesco Torchia --- pkg/harvester-manager/l10n/en-us.yaml | 2 ++ .../machine-config/harvester.vue | 32 +++++++++++++------ 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/pkg/harvester-manager/l10n/en-us.yaml b/pkg/harvester-manager/l10n/en-us.yaml index 6506b3e03c4..7da28f81e5e 100644 --- a/pkg/harvester-manager/l10n/en-us.yaml +++ b/pkg/harvester-manager/l10n/en-us.yaml @@ -24,3 +24,5 @@ harvesterManager: title: VGPUs label: VGPU type placeholder: 'Please select a VGPU' + allocatable: allocatable + allocatableUnknown: missing allocation info diff --git a/pkg/harvester-manager/machine-config/harvester.vue b/pkg/harvester-manager/machine-config/harvester.vue index dcdadc2bf4f..d1d0b53e3ba 100644 --- a/pkg/harvester-manager/machine-config/harvester.vue +++ b/pkg/harvester-manager/machine-config/harvester.vue @@ -485,7 +485,7 @@ export default { const vGpuTypes = uniq([ ...this.vGpusInit, ...Object.values(this.vGpuDevices) - .filter((vGpu) => vGpu.enabled && vGpu.allocatable > 0 && !!vGpu.type) + .filter((vGpu) => vGpu.enabled && !!vGpu.type && (vGpu.allocatable === null || vGpu.allocatable > 0)) .map((vGpu) => vGpu.type), ]); @@ -706,24 +706,34 @@ export default { const url = `/k8s/clusters/${ clusterId }/v1`; const vGpus = await this.$store.dispatch('cluster/request', { url: `${ url }/${ HCI.VGPU_DEVICE }` }); - const harvesterCluster = await this.$store.dispatch('cluster/request', { url: `${ url }/harvester/cluster/local` }); - let deviceCapacity = {}; + let deviceCapacity = null; - if (harvesterCluster?.links?.deviceCapacity) { - deviceCapacity = await this.$store.dispatch('cluster/request', { url: harvesterCluster?.links?.deviceCapacity }); + try { + const harvesterCluster = await this.$store.dispatch('cluster/request', { url: `${ url }/harvester/cluster/local` }); + + if (harvesterCluster?.links?.deviceCapacity) { + deviceCapacity = await this.$store.dispatch('cluster/request', { url: harvesterCluster?.links?.deviceCapacity }); + } + } catch (e) { } this.vGpuDevices = (vGpus?.data || []) .reduce((acc, v) => { const type = v.spec.vGPUTypeName ? `${ VGPU_PREFIX.NVIDIA }${ v.spec.vGPUTypeName.replace(' ', '_') }` : ''; + let allocatable = null; + + if (deviceCapacity) { + allocatable = deviceCapacity[type] ? Number(deviceCapacity[type]) : 0; + } + return { ...acc, [v.id]: { - id: v.id, - enabled: v.spec.enabled, - allocatable: deviceCapacity[type] ? Number(deviceCapacity[type]) : 0, + id: v.id, + enabled: v.spec.enabled, + allocatable, type, }, }; @@ -1108,8 +1118,10 @@ export default { */ const vGpu = Object.values(this.vGpuDevices).filter((f) => f.type === opt)?.[0]; - if (vGpu?.allocatable > 0) { - label += ` (allocatable: ${ vGpu.allocatable })`; + if (vGpu?.allocatable === null) { + label += ` (${ this.t('harvesterManager.vGpu.allocatableUnknown') })`; + } else if(vGpu?.allocatable > 0) { + label += ` (${ this.t('harvesterManager.vGpu.allocatable') }: ${ vGpu.allocatable })`; } return label; From 1c93eb7d06d4a5321007539279699d686a0bb501 Mon Sep 17 00:00:00 2001 From: Francesco Torchia Date: Fri, 9 Aug 2024 17:42:56 +0200 Subject: [PATCH 5/6] Re-enable machine pools validation Signed-off-by: Francesco Torchia --- .../machine-config/harvester.vue | 37 ++++++++++++++++++- shell/assets/translations/en-us.yaml | 3 ++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/pkg/harvester-manager/machine-config/harvester.vue b/pkg/harvester-manager/machine-config/harvester.vue index d1d0b53e3ba..0f4dcacf870 100644 --- a/pkg/harvester-manager/machine-config/harvester.vue +++ b/pkg/harvester-manager/machine-config/harvester.vue @@ -609,6 +609,8 @@ export default { this.validatorDiskAndNetowrk(errors); + this.validatorVGpus(errors); + podAffinityValidator(this.vmAffinity.affinity, this.$store.getters, errors); return { errors }; @@ -676,6 +678,39 @@ export default { } }, + validatorVGpus(errors) { + const notAllocatable = this.vGpus + .map((type) => { + const allocated = this.machinePools.reduce((acc, machinePool) => { + const vGPURequests = JSON.parse(machinePool?.config?.vgpuInfo || '')?.vGPURequests; + + const vGpuTypes = vGPURequests?.map((r) => r?.deviceName).filter((f) => f) || []; + + if (vGpuTypes.includes(type)) { + return acc + machinePool.pool.quantity; + } + + return acc; + }, 0); + + return { + vGpu: Object.values(this.vGpuDevices).filter((f) => f.type === type)?.[0], + allocated + }; + }) + .filter(({ vGpu, allocated }) => vGpu && vGpu.allocatable > 0 && vGpu.allocatable < allocated); + + notAllocatable.forEach(({ vGpu, allocated }) => { + const message = this.$store.getters['i18n/t']('cluster.credential.harvester.vGpus.errors.notAllocatable', { + vGpu: vGpu?.type, + allocated, + allocatable: vGpu?.allocatable + }); + + errors.push(message); + }); + }, + valuesChanged(value, type) { this.value[type] = base64Encode(value); }, @@ -1120,7 +1155,7 @@ export default { if (vGpu?.allocatable === null) { label += ` (${ this.t('harvesterManager.vGpu.allocatableUnknown') })`; - } else if(vGpu?.allocatable > 0) { + } else if (vGpu?.allocatable > 0) { label += ` (${ this.t('harvesterManager.vGpu.allocatable') }: ${ vGpu.allocatable })`; } diff --git a/shell/assets/translations/en-us.yaml b/shell/assets/translations/en-us.yaml index f2704ebe9a6..332abb8ca90 100644 --- a/shell/assets/translations/en-us.yaml +++ b/shell/assets/translations/en-us.yaml @@ -1357,6 +1357,9 @@ cluster: networkName: Network Name macAddress: Mac Address macFormat: 'Invalid MAC address format.' + vGpus: + errors: + notAllocatable: '[{vGpu}] vGPU device is not allocatable; required: {allocated}, allocatable: {allocatable}' volume: title: Volumes volume: Volume From 86b13779524bfea32694a6634d489a6e4757b2cc Mon Sep 17 00:00:00 2001 From: Francesco Torchia Date: Wed, 18 Sep 2024 00:04:45 +0200 Subject: [PATCH 6/6] Replace vGpu name with a placeholder Signed-off-by: Francesco Torchia --- pkg/harvester-manager/machine-config/harvester.vue | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/harvester-manager/machine-config/harvester.vue b/pkg/harvester-manager/machine-config/harvester.vue index 0f4dcacf870..f24cc1c97fc 100644 --- a/pkg/harvester-manager/machine-config/harvester.vue +++ b/pkg/harvester-manager/machine-config/harvester.vue @@ -933,12 +933,12 @@ export default { }, updateVGpu() { - /** - * We are assigning the first vGpu profile found for each vGpu type selected by the user. - * This will not work if we will remove the limit of only one vGpu assignable to each cluster. - */ const vGPURequests = this.vGpus?.filter((f) => f).map((deviceName) => ({ - name: Object.values(this.vGpuDevices).filter((f) => f.type === deviceName)?.[0]?.id || '', + /** + * 'provisioned' is a placeholder. + * The real vGpu name is assigned to the provisioned VM by the backend and saved in 'harvesterhci.io/deviceAllocationDetails' annotation. + */ + name: 'provisioned', deviceName, })) || [];