From c9a3bf133c490758deb46c6cc70f8077d81d9aeb Mon Sep 17 00:00:00 2001 From: Wojciech Ozga Date: Fri, 7 Jun 2024 04:48:24 -0500 Subject: [PATCH] Support the COVH promote_to_tvm() ABI which causes a TVM to be created in a single-step. Preload VM pages into memory, fill the NACL shared memory with boot vcpu state, and reflect the promote_to_tvm() call to the TSM. Support CoVE implementations that do not support dynamic page conversion. A TSM that does not support dynamic page conversion does not require the donation of pages to store VCPU state in confidential memory. Signed-off-by: Wojciech Ozga --- arch/riscv/include/asm/kvm_cove.h | 13 +++- arch/riscv/include/asm/kvm_cove_sbi.h | 4 + arch/riscv/include/asm/kvm_vcpu_sbi.h | 1 + arch/riscv/include/uapi/asm/kvm.h | 1 + arch/riscv/kvm/Makefile | 2 +- arch/riscv/kvm/cove.c | 102 ++++++++++++++++++++------ arch/riscv/kvm/cove_sbi.c | 20 +++++ arch/riscv/kvm/main.c | 6 +- arch/riscv/kvm/vcpu_sbi.c | 9 +++ arch/riscv/kvm/vcpu_sbi_covh.c | 85 +++++++++++++++++++++ arch/riscv/kvm/vm.c | 4 +- 11 files changed, 218 insertions(+), 29 deletions(-) create mode 100644 arch/riscv/kvm/vcpu_sbi_covh.c diff --git a/arch/riscv/include/asm/kvm_cove.h b/arch/riscv/include/asm/kvm_cove.h index ba4e282c0e81..561da58376ac 100644 --- a/arch/riscv/include/asm/kvm_cove.h +++ b/arch/riscv/include/asm/kvm_cove.h @@ -134,7 +134,10 @@ int kvm_riscv_cove_init(void); /* TVM related functions */ void kvm_riscv_cove_vm_destroy(struct kvm *kvm); -int kvm_riscv_cove_vm_init(struct kvm *kvm); +int kvm_riscv_cove_vm_single_step_init(struct kvm_vcpu *vcpu, + unsigned long fdt_address, + unsigned long tap_addr); +int kvm_riscv_cove_vm_multi_step_init(struct kvm *kvm); /* TVM VCPU related functions */ void kvm_riscv_cove_vcpu_destroy(struct kvm_vcpu *vcpu); @@ -168,7 +171,13 @@ static inline int kvm_riscv_cove_hardware_enable(void) {return 0; } /* TVM related functions */ static inline void kvm_riscv_cove_vm_destroy(struct kvm *kvm) {} -static inline int kvm_riscv_cove_vm_init(struct kvm *kvm) {return -1; } +static inline int kvm_riscv_cove_vm_single_step_init(struct kvm_vcpu *vcpu, + unsigned long fdt_address, + unsigned long tap_addr) +{ + return -1; +} +static inline int kvm_riscv_cove_vm_multi_step_init(struct kvm *kvm) {return -1; } /* TVM VCPU related functions */ static inline void kvm_riscv_cove_vcpu_destroy(struct kvm_vcpu *vcpu) {} diff --git a/arch/riscv/include/asm/kvm_cove_sbi.h b/arch/riscv/include/asm/kvm_cove_sbi.h index c9302650adc8..1314227adfad 100644 --- a/arch/riscv/include/asm/kvm_cove_sbi.h +++ b/arch/riscv/include/asm/kvm_cove_sbi.h @@ -78,6 +78,10 @@ int sbi_covh_tvm_demote_page(unsigned long tvmid, int sbi_covh_tvm_remove_pages(unsigned long tvmid, unsigned long tvm_base_page_addr, unsigned long len); +int sbi_covh_tsm_promote_to_tvm(unsigned long fdt_address, + unsigned long tap_addr, + unsigned long sepc, + unsigned long *tvmid); /* Functions related to CoVE Interrupt Management(COVI) Extension */ int sbi_covi_tvm_aia_init(unsigned long tvm_gid, struct sbi_cove_tvm_aia_params *tvm_aia_params); diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index 5b37a12337b1..763a931407f3 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -68,6 +68,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; #ifdef CONFIG_RISCV_COVE_HOST extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_covg; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_covh; #endif #endif /* __RISCV_KVM_VCPU_SBI_H__ */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 2a2434136e39..679a6727a143 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -149,6 +149,7 @@ enum KVM_RISCV_SBI_EXT_ID { KVM_RISCV_SBI_EXT_VENDOR, KVM_RISCV_SBI_EXT_DBCN, KVM_RISCV_SBI_EXT_COVG, + KVM_RISCV_SBI_EXT_COVH, KVM_RISCV_SBI_EXT_MAX, }; diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 31f4dbd97b03..fba7ebd0cd72 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -31,4 +31,4 @@ kvm-y += aia.o kvm-y += aia_device.o kvm-y += aia_aplic.o kvm-y += aia_imsic.o -kvm-$(CONFIG_RISCV_COVE_HOST) += cove_sbi.o cove.o vcpu_sbi_covg.o +kvm-$(CONFIG_RISCV_COVE_HOST) += cove_sbi.o cove.o vcpu_sbi_covg.o vcpu_sbi_covh.o diff --git a/arch/riscv/kvm/cove.c b/arch/riscv/kvm/cove.c index c4a3fe3b6bd7..8dca1b951c39 100644 --- a/arch/riscv/kvm/cove.c +++ b/arch/riscv/kvm/cove.c @@ -628,12 +628,12 @@ void kvm_riscv_cove_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_riscv_cove_vcpu_init(struct kvm_vcpu *vcpu) { - int rc; - struct kvm *kvm; struct kvm_cove_tvm_vcpu_context *tvcpuc; struct kvm_cove_tvm_context *tvmc; - struct page *vcpus_page; unsigned long vcpus_phys_addr; + struct page *vcpus_page; + struct kvm *kvm; + int rc; if (!vcpu) return -EINVAL; @@ -654,36 +654,38 @@ int kvm_riscv_cove_vcpu_init(struct kvm_vcpu *vcpu) if (!tvcpuc) return -ENOMEM; - vcpus_page = alloc_pages(GFP_KERNEL | __GFP_ZERO, - get_order_num_pages(tinfo.tvcpu_pages_needed)); - if (!vcpus_page) { - rc = -ENOMEM; - goto alloc_page_failed; - } - tvcpuc->vcpu = vcpu; tvcpuc->vcpu_state.npages = tinfo.tvcpu_pages_needed; - tvcpuc->vcpu_state.page = vcpus_page; - vcpus_phys_addr = page_to_phys(vcpus_page); - rc = cove_convert_pages(vcpus_phys_addr, tvcpuc->vcpu_state.npages, true); - if (rc) - goto convert_failed; + if (tinfo.tvcpu_pages_needed > 0) { + vcpus_page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order_num_pages(tinfo.tvcpu_pages_needed)); + if (!vcpus_page) { + rc = -ENOMEM; + goto alloc_page_failed; + } + tvcpuc->vcpu_state.page = vcpus_page; + vcpus_phys_addr = page_to_phys(vcpus_page); - rc = sbi_covh_create_tvm_vcpu(tvmc->tvm_guest_id, vcpu->vcpu_idx, vcpus_phys_addr); - if (rc) - goto vcpu_create_failed; + rc = cove_convert_pages(vcpus_phys_addr, tvcpuc->vcpu_state.npages, true); + if (rc) + goto convert_failed; + rc = sbi_covh_create_tvm_vcpu(tvmc->tvm_guest_id, vcpu->vcpu_idx, vcpus_phys_addr); + if (rc) + goto vcpu_create_failed; + } vcpu->arch.tc = tvcpuc; return 0; vcpu_create_failed: /* Reclaim all the pages or return to the confidential page pool */ - sbi_covh_tsm_reclaim_pages(vcpus_phys_addr, tvcpuc->vcpu_state.npages); + if (tinfo.tvcpu_pages_needed > 0) + sbi_covh_tsm_reclaim_pages(vcpus_phys_addr, tvcpuc->vcpu_state.npages); convert_failed: - __free_pages(vcpus_page, get_order_num_pages(tinfo.tvcpu_pages_needed)); + if (tinfo.tvcpu_pages_needed > 0) + __free_pages(vcpus_page, get_order_num_pages(tinfo.tvcpu_pages_needed)); alloc_page_failed: kfree(tvcpuc); @@ -877,7 +879,7 @@ void kvm_riscv_cove_vm_destroy(struct kvm *kvm) kvm_err("Memory reclaim failed with rc %d\n", rc); } -int kvm_riscv_cove_vm_init(struct kvm *kvm) +int kvm_riscv_cove_vm_multi_step_init(struct kvm *kvm) { struct kvm_cove_tvm_context *tvmc; struct page *tvms_page, *pgt_page; @@ -980,6 +982,64 @@ int kvm_riscv_cove_vm_init(struct kvm *kvm) return rc; } +int kvm_riscv_cove_vm_single_step_init(struct kvm_vcpu *vcpu, unsigned long fdt_address, + unsigned long tap_addr) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long tvm_gid, target_vcpuid; + struct kvm_cove_tvm_context *tvmc; + struct kvm_vcpu *target_vcpu; + struct kvm *kvm = vcpu->kvm; + void *nshmem = nacl_shmem(); + int rc = 0, gpr_id, offset; + + tvmc = kzalloc(sizeof(*tvmc), GFP_KERNEL); + if (!tvmc) + return -ENOMEM; + + for (gpr_id = 1; gpr_id < 32; gpr_id++) { + offset = KVM_ARCH_GUEST_ZERO + gpr_id * sizeof(unsigned long); + nacl_shmem_gpr_write_cove(nshmem, offset, + ((unsigned long *)cp)[gpr_id]); + } + kvm_arch_vcpu_load(vcpu, smp_processor_id()); + rc = sbi_covh_tsm_promote_to_tvm(fdt_address, tap_addr, cp->sepc, &tvm_gid); + if (rc) + goto done; + + INIT_LIST_HEAD(&tvmc->measured_pages); + INIT_LIST_HEAD(&tvmc->zero_pages); + INIT_LIST_HEAD(&tvmc->shared_pages); + INIT_LIST_HEAD(&tvmc->reclaim_pending_pages); + + tvmc->tvm_guest_id = tvm_gid; + tvmc->kvm = kvm; + kvm->arch.tvmc = tvmc; + + kvm_for_each_vcpu(target_vcpuid, target_vcpu, kvm) { + rc = kvm_riscv_cove_vcpu_init(target_vcpu); + if (rc) + goto vcpus_allocated; + + target_vcpu->requests = 0; + if (target_vcpu->vcpu_idx != 0) + kvm_riscv_vcpu_power_off(target_vcpu); + } + + tvmc->finalized_done = true; + kvm_info("Guest VM creation successful with guest id %lx\n", tvm_gid); + return 0; + +vcpus_allocated: + kvm_for_each_vcpu(target_vcpuid, target_vcpu, kvm) + if (target_vcpu->arch.tc) + kfree(target_vcpu->arch.tc); + +done: + kfree(tvmc); + return rc; +} + int kvm_riscv_cove_init(void) { int rc; diff --git a/arch/riscv/kvm/cove_sbi.c b/arch/riscv/kvm/cove_sbi.c index 4759b4920226..2325ee0f2a15 100644 --- a/arch/riscv/kvm/cove_sbi.c +++ b/arch/riscv/kvm/cove_sbi.c @@ -488,3 +488,23 @@ int sbi_covh_tvm_remove_pages(unsigned long tvmid, return 0; } + +int sbi_covh_tsm_promote_to_tvm(unsigned long fdt_address, + unsigned long tap_addr, + unsigned long sepc, + unsigned long *tvmid) +{ + struct sbiret ret; + int rc = 0; + + ret = sbi_ecall(SBI_EXT_COVH, SBI_EXT_COVH_PROMOTE_TO_TVM, fdt_address, + tap_addr, sepc, 0, 0, 0); + if (ret.error) { + rc = sbi_err_map_linux_errno(ret.error); + goto done; + } + + *tvmid = ret.value; +done: + return rc; +} diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 6d5cfa52e413..bba0d87c7aa8 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -32,10 +32,10 @@ int kvm_arch_hardware_enable(void) /* * We just need to invoke aia enable for CoVE if host is in VS mode and TSM - * supports AIA (COVI extension). However, if the host is running in HS mode, - * we need to initialize other CSRs as well for legacy VMs. + * supports AIA (COVI extension). However, if the host is running in HS + * mode, we need to initialize other CSRs as well for legacy VMs. */ - if (unlikely(kvm_riscv_cove_enabled()) && likely(kvm_riscv_covi_available())) + if (unlikely(kvm_riscv_cove_enabled()) && kvm_riscv_covi_available()) goto enable_aia; hedeleg = 0; diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 8bc7d7398349..9399cf5a3062 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -40,6 +40,11 @@ static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_covg = { .extid_end = -1UL, .handler = NULL, }; +static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_covh = { + .extid_start = -1UL, + .extid_end = -1UL, + .handler = NULL, +}; #endif struct kvm_riscv_sbi_extension_entry { @@ -96,6 +101,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { .dis_idx = KVM_RISCV_SBI_EXT_COVG, .ext_ptr = &vcpu_sbi_ext_covg, }, + { + .dis_idx = KVM_RISCV_SBI_EXT_COVH, + .ext_ptr = &vcpu_sbi_ext_covh, + }, }; void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) diff --git a/arch/riscv/kvm/vcpu_sbi_covh.c b/arch/riscv/kvm/vcpu_sbi_covh.c new file mode 100644 index 000000000000..17e8331bb404 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_covh.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 IBM. + * + * Authors: + * Wojciech Ozga + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int preload_pages(struct kvm_vcpu *vcpu) { + unsigned long hva, fault_addr, page; + struct kvm_memory_slot *memslot; + bool writable; + + memslot = search_memslots(kvm_memslots(vcpu->kvm), + kernel_map.phys_addr, true); + if (memslot) { + for (page = 0; page < memslot->npages; page++) { + fault_addr = gfn_to_gpa(memslot->base_gfn) + + page * PAGE_SIZE; + hva = gfn_to_hva_memslot_prot(memslot, + gpa_to_gfn(fault_addr), + &writable); + if (!kvm_is_error_hva(hva)) + kvm_riscv_gstage_map(vcpu, memslot, fault_addr, + hva, NULL); + } + } + + return 0; +} + +static int kvm_riscv_cove_promote_to_tvm(struct kvm_vcpu *vcpu, + unsigned long fdt_address, + unsigned long tap_addr) { + int rc; + + preload_pages(vcpu); + rc = kvm_riscv_cove_vm_single_step_init(vcpu, fdt_address, tap_addr); + if (rc) + goto done; + + vcpu->kvm->arch.vm_type = KVM_VM_TYPE_RISCV_COVE; +done: + return rc; +} + +static int kvm_sbi_ext_covh_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_vcpu_sbi_return *retdata) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long funcid = cp->a6; + int ret; + + switch (funcid) { + case SBI_EXT_COVH_PROMOTE_TO_TVM: + ret = kvm_riscv_cove_promote_to_tvm(vcpu, cp->a0, cp->a1); + return 0; + + default: + kvm_err("%s: Unsupported guest SBI %ld.\n", __func__, funcid); + retdata->err_val = SBI_ERR_NOT_SUPPORTED; + return -EOPNOTSUPP; + } +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_covh = { + .extid_start = SBI_EXT_COVH, + .extid_end = SBI_EXT_COVH, + .handler = kvm_sbi_ext_covh_handler, +}; diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 8a1460dba76c..c9d8d2b86609 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -48,11 +48,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return -EPERM; } - r = kvm_riscv_cove_vm_init(kvm); + r = kvm_riscv_cove_vm_multi_step_init(kvm); if (r) return r; kvm->arch.vm_type = type; - kvm_info("Trusted VM instance init successful\n"); + kvm_info("CoVE VM instance init successful\n"); } kvm_riscv_aia_init_vm(kvm);