diff --git a/0008-arm64-RME-Introduce-kvm_rec_pre_enter-called-before-.patch b/0008-arm64-RME-Introduce-kvm_rec_pre_enter-called-before-.patch new file mode 100644 index 0000000000000000000000000000000000000000..e1598ea7116c6ed74b0a59a15c906aa54392b8f2 --- /dev/null +++ b/0008-arm64-RME-Introduce-kvm_rec_pre_enter-called-before-.patch @@ -0,0 +1,201 @@ +From 192295a637e9f4d8f76664a508b299595724f204 Mon Sep 17 00:00:00 2001 +From: Steven Price +Date: Tue, 9 Sep 2025 20:40:23 +0800 +Subject: [PATCH 1/2] arm64: RME: Introduce kvm_rec_pre_enter() called before + entering an atomic section + +community inclusion +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/ICX7FX?from=project-issue + +Reference: https://patchew.org/linux/20250820145606.180644-1-steven.price@arm.com/20250820145606.180644-17-steven.price@arm.com + +-------------------------------- + +Entering a realm is done using a SMC call to the RMM. On exit the +exit-codes need to be handled slightly differently to the normal KVM +path so define our own functions for realm enter/exit and hook them +in if the guest is a realm guest. + +Fixes: a9b2e8a67446 ("[v8-16-43]arm64: RME: Handle realm enter/exit") +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +--- +Changes since v8 patch: + * Introduce kvm_rec_pre_enter() called before entering an atomic + section to handle operations that might require memory allocation + (specifically completing a RIPAS change introduced in a later patch). + * Updates to align with upstream changes to hpfar_el2 which now (ab)uses + HPFAR_EL2_NS as a valid flag. + * Fix exit reason when racing with PSCI shutdown to return + KVM_EXIT_SHUTDOWN rather than KVM_EXIT_UNKNOWN. + +Backport cca-v10 changes from v8 for bugfix + +Signed-off-by: houmingyong +--- + arch/arm64/include/asm/cca_base.h | 2 ++ + arch/arm64/include/asm/kvm_rme.h | 1 + + arch/arm64/kvm/arm.c | 3 +++ + arch/arm64/kvm/cca_base.c | 7 +++++++ + arch/arm64/kvm/rme-exit.c | 10 ++++++++-- + arch/arm64/kvm/rme.c | 27 ++++++++++++++++++++++----- + 6 files changed, 43 insertions(+), 7 deletions(-) + +diff --git a/arch/arm64/include/asm/cca_base.h b/arch/arm64/include/asm/cca_base.h +index 22dbf2e68738..dda255aeb760 100644 +--- a/arch/arm64/include/asm/cca_base.h ++++ b/arch/arm64/include/asm/cca_base.h +@@ -19,6 +19,7 @@ struct cca_operations { + int (*enable_cap)(struct kvm *kvm, struct kvm_enable_cap *cap); + int (*init_realm_vm)(struct kvm *kvm); + int (*realm_vm_enter)(struct kvm_vcpu *vcpu); ++ int (*realm_vm_pre_enter)(struct kvm_vcpu *vcpu); + int (*realm_vm_exit)(struct kvm_vcpu *vcpu, int ret); + void (*init_sel2_hypervisor)(void); + int (*psci_complete)(struct kvm_vcpu *calling, struct kvm_vcpu *target, +@@ -45,6 +46,7 @@ int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); + void kvm_init_rme(void); + + int kvm_rec_enter(struct kvm_vcpu *vcpu); ++int kvm_rec_pre_enter(struct kvm_vcpu *vcpu); + int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret); + + int kvm_init_realm_vm(struct kvm *kvm); +diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h +index 568537f96da5..55c4e7b2dcf8 100644 +--- a/arch/arm64/include/asm/kvm_rme.h ++++ b/arch/arm64/include/asm/kvm_rme.h +@@ -108,6 +108,7 @@ int _kvm_create_rec(struct kvm_vcpu *vcpu); + void _kvm_destroy_rec(struct kvm_vcpu *vcpu); + + int _kvm_rec_enter(struct kvm_vcpu *vcpu); ++int _kvm_rec_pre_enter(struct kvm_vcpu *vcpu); + int _handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_status); + + void kvm_realm_unmap_range(struct kvm *kvm, +diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c +index ed0c3c022842..d7511196e607 100644 +--- a/arch/arm64/kvm/arm.c ++++ b/arch/arm64/kvm/arm.c +@@ -1262,6 +1262,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) + if (ret > 0) + ret = check_vcpu_requests(vcpu); + ++ if (ret > 0 && vcpu_is_rec(vcpu)) ++ ret = kvm_rec_pre_enter(vcpu); ++ + /* + * Preparing the interrupts to be injected also + * involves poking the GIC, which must be done in a +diff --git a/arch/arm64/kvm/cca_base.c b/arch/arm64/kvm/cca_base.c +index 19999d4a8a06..ad80b0ea3cf7 100644 +--- a/arch/arm64/kvm/cca_base.c ++++ b/arch/arm64/kvm/cca_base.c +@@ -76,6 +76,13 @@ int kvm_rec_enter(struct kvm_vcpu *vcpu) + return 0; + } + ++int kvm_rec_pre_enter(struct kvm_vcpu *vcpu) ++{ ++ if (g_cca_operations[cca_cvm_type]->realm_vm_pre_enter) ++ return g_cca_operations[cca_cvm_type]->realm_vm_pre_enter(vcpu); ++ return 0; ++} ++ + int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret) + { + if (g_cca_operations[cca_cvm_type]->realm_vm_exit) +diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c +index 83ca2c271f1a..abe13350d118 100644 +--- a/arch/arm64/kvm/rme-exit.c ++++ b/arch/arm64/kvm/rme-exit.c +@@ -154,7 +154,7 @@ int _handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret) + * the VCPU as a result of KVM's PSCI handling. + */ + if (status == RMI_ERROR_REALM && index == 1) { +- vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; ++ vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; + return 0; + } + +@@ -163,7 +163,8 @@ int _handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret) + + vcpu->arch.fault.esr_el2 = rec->run->exit.esr; + vcpu->arch.fault.far_el2 = rec->run->exit.far; +- vcpu->arch.fault.hpfar_el2 = rec->run->exit.hpfar; ++ /* HPFAR_EL2 is only valid for RMI_EXIT_SYNC */ ++ vcpu->arch.fault.hpfar_el2 = 0; + + update_arch_timer_irq_lines(vcpu); + +@@ -172,6 +173,11 @@ int _handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret) + + switch (rec->run->exit.exit_reason) { + case RMI_EXIT_SYNC: ++ /* ++ * HPFAR_EL2_NS is hijacked to indicate a valid HPFAR value, ++ * see __get_fault_info() ++ */ ++ vcpu->arch.fault.hpfar_el2 = rec->run->exit.hpfar; + return rec_exit_handlers[esr_ec](vcpu); + case RMI_EXIT_IRQ: + case RMI_EXIT_FIQ: +diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c +index 9095a1573ecb..69bb3c74f89b 100644 +--- a/arch/arm64/kvm/rme.c ++++ b/arch/arm64/kvm/rme.c +@@ -1469,11 +1469,23 @@ static void kvm_complete_ripas_change(struct kvm_vcpu *vcpu) + base = top_ipa; + } while (top_ipa < top); + } +- +-int _kvm_rec_enter(struct kvm_vcpu *vcpu) ++/* ++ * _kvm_rec_pre_enter - Complete operations before entering a REC ++ * ++ * Some operations require work to be completed before entering a realm. That ++ * work may require memory allocation so cannot be done in the kvm_rec_enter() ++ * call. ++ * ++ * Return: 1 if we should enter the guest ++ * 0 if we should exit to userspace ++ * < 0 if we should exit to userspace, where the return value indicates ++ * an error ++ */ ++int _kvm_rec_pre_enter(struct kvm_vcpu *vcpu) + { + struct realm_rec *rec = vcpu->arch.rec; +- ++ if (kvm_realm_state(vcpu->kvm) != REALM_STATE_ACTIVE) ++ return -EINVAL; + switch (rec->run->exit.exit_reason) { + case RMI_EXIT_HOST_CALL: + case RMI_EXIT_PSCI: +@@ -1485,8 +1497,12 @@ int _kvm_rec_enter(struct kvm_vcpu *vcpu) + break; + } + +- if (kvm_realm_state(vcpu->kvm) != REALM_STATE_ACTIVE) +- return -EINVAL; ++ return 1; ++} ++ ++int _kvm_rec_enter(struct kvm_vcpu *vcpu) ++{ ++ struct realm_rec *rec = vcpu->arch.rec; + + return rmi_rec_enter(virt_to_phys(rec->rec_page), + virt_to_phys(rec->run)); +@@ -1707,6 +1723,7 @@ static struct cca_operations armcca_operations = { + .enable_cap = _kvm_realm_enable_cap, + .init_realm_vm = _kvm_init_realm_vm, + .realm_vm_enter = _kvm_rec_enter, ++ .realm_vm_pre_enter = _kvm_rec_pre_enter, + .realm_vm_exit = _handle_rec_exit, + .init_sel2_hypervisor = _kvm_init_rme, + .psci_complete = _realm_psci_complete, +-- +2.43.0 + diff --git a/0009-arm64-RME-handle-RIPAS-changes-before-kvm_rec_enter.patch b/0009-arm64-RME-handle-RIPAS-changes-before-kvm_rec_enter.patch new file mode 100644 index 0000000000000000000000000000000000000000..81be6dbd9f834babd05e310509f7cc83eab50eb0 --- /dev/null +++ b/0009-arm64-RME-handle-RIPAS-changes-before-kvm_rec_enter.patch @@ -0,0 +1,242 @@ +From 6d843720999deba794135a07d761280dc7632bf6 Mon Sep 17 00:00:00 2001 +From: Steven Price +Date: Tue, 9 Sep 2025 21:12:18 +0800 +Subject: [PATCH 2/2] arm64: RME: handle RIPAS changes before kvm_rec_enter + +community inclusion +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/ICX7FX?from=project-issue + +Reference: https://patchew.org/linux/20250820145606.180644-1-steven.price@arm.com/20250820145606.180644-16-steven.price@arm.com + +------------------------ + +Each page within the protected region of the realm guest can be marked +as either RAM or EMPTY. Allow the VMM to control this before the guest +has started and provide the equivalent functions to change this (with +the guest's approval) at runtime. + +When transitioning from RIPAS RAM (1) to RIPAS EMPTY (0) the memory is +unmapped from the guest and undelegated allowing the memory to be reused +by the host. When transitioning to RIPAS RAM the actual population of +the leaf RTTs is done later on stage 2 fault, however it may be +necessary to allocate additional RTTs to allow the RMM track the RIPAS +for the requested range. + +When freeing a block mapping it is necessary to temporarily unfold the +RTT which requires delegating an extra page to the RMM, this page can +then be recovered once the contents of the block mapping have been +freed. + +Fixes: 4afc64441759 ("[v8-15-43]arm64: RME: Allow VMM to set RIPAS") +Signed-off-by: Steven Price +--- +Changes from v9: + * Minor coding style fixes. +Changes from v8: + * Propagate the 'may_block' flag to allow conditional calls to + cond_resched_rwlock_write(). + * Introduce alloc_rtt() to wrap alloc_delegated_granule() and + kvm_account_pgtable_pages() and use when allocating RTTs. + * Code reorganisation to allow init_ipa_state and set_ipa_state to + share a common ripas_change() function, + * Other minor changes following review. + +Backport cca-v10 changes from v8 for bugfix + +Signed-off-by: Hou Mingyong +--- + arch/arm64/kvm/rme.c | 132 ++++++++++++++++++++++--------------------- + 1 file changed, 67 insertions(+), 65 deletions(-) + +diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c +index 69bb3c74f89b..d60f99899bb8 100644 +--- a/arch/arm64/kvm/rme.c ++++ b/arch/arm64/kvm/rme.c +@@ -214,6 +214,7 @@ static int realm_rtt_fold(struct realm *realm, + unsigned long out_rtt; + int ret; + ++ addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1)); + ret = rmi_rtt_fold(virt_to_phys(realm->rd), addr, level, &out_rtt); + + if (RMI_RETURN_STATUS(ret) == RMI_SUCCESS && rtt_granule) +@@ -284,6 +285,61 @@ static int realm_unmap_private_page(struct realm *realm, + return 0; + } + ++/* ++ * Returns 0 on successful fold, a negative value on error, a positive value if ++ * we were not able to fold all tables at this level. ++ */ ++static int realm_fold_rtt_level(struct realm *realm, int level, ++ unsigned long start, unsigned long end) ++{ ++ int not_folded = 0; ++ ssize_t map_size; ++ unsigned long addr, next_addr; ++ ++ if (WARN_ON(level > RMM_RTT_MAX_LEVEL)) ++ return -EINVAL; ++ ++ map_size = rme_rtt_level_mapsize(level - 1); ++ ++ for (addr = start; addr < end; addr = next_addr) { ++ phys_addr_t rtt_granule; ++ int ret; ++ unsigned long align_addr = ALIGN(addr, map_size); ++ ++ next_addr = ALIGN(addr + 1, map_size); ++ ++ ret = realm_rtt_fold(realm, align_addr, level, &rtt_granule); ++ ++ switch (RMI_RETURN_STATUS(ret)) { ++ case RMI_SUCCESS: ++ free_delegated_granule(rtt_granule); ++ break; ++ case RMI_ERROR_RTT: ++ if (level == RMM_RTT_MAX_LEVEL || ++ RMI_RETURN_INDEX(ret) < level) { ++ not_folded++; ++ break; ++ } ++ /* Recurse a level deeper */ ++ ret = realm_fold_rtt_level(realm, ++ level + 1, ++ addr, ++ next_addr); ++ if (ret < 0) ++ return ret; ++ else if (ret == 0) ++ /* Try again at this level */ ++ next_addr = addr; ++ break; ++ default: ++ WARN_ON(1); ++ return -ENXIO; ++ } ++ } ++ ++ return not_folded; ++} ++ + static void realm_unmap_shared_range(struct kvm *kvm, + int level, + unsigned long start, +@@ -340,6 +396,7 @@ static void realm_unmap_shared_range(struct kvm *kvm, + + cond_resched_rwlock_write(&kvm->mmu_lock); + } ++ realm_fold_rtt_level(realm, get_start_level(realm) + 1, start, end); + } + + static int realm_init_sve_param(struct kvm *kvm, struct realm_params *params) +@@ -523,6 +580,7 @@ static int realm_create_rtt_levels(struct realm *realm, + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT && + RMI_RETURN_INDEX(ret) == level - 1) { + /* The RTT already exists, continue */ ++ free_delegated_granule(rtt); + continue; + } + if (ret) { +@@ -626,61 +684,6 @@ static int realm_tear_down_rtt_range(struct realm *realm, + start, end); + } + +-/* +- * Returns 0 on successful fold, a negative value on error, a positive value if +- * we were not able to fold all tables at this level. +- */ +-static int realm_fold_rtt_level(struct realm *realm, int level, +- unsigned long start, unsigned long end) +-{ +- int not_folded = 0; +- ssize_t map_size; +- unsigned long addr, next_addr; +- +- if (WARN_ON(level > RMM_RTT_MAX_LEVEL)) +- return -EINVAL; +- +- map_size = rme_rtt_level_mapsize(level - 1); +- +- for (addr = start; addr < end; addr = next_addr) { +- phys_addr_t rtt_granule; +- int ret; +- unsigned long align_addr = ALIGN(addr, map_size); +- +- next_addr = ALIGN(addr + 1, map_size); +- +- ret = realm_rtt_fold(realm, align_addr, level, &rtt_granule); +- +- switch (RMI_RETURN_STATUS(ret)) { +- case RMI_SUCCESS: +- free_delegated_granule(rtt_granule); +- break; +- case RMI_ERROR_RTT: +- if (level == RMM_RTT_MAX_LEVEL || +- RMI_RETURN_INDEX(ret) < level) { +- not_folded++; +- break; +- } +- /* Recurse a level deeper */ +- ret = realm_fold_rtt_level(realm, +- level + 1, +- addr, +- next_addr); +- if (ret < 0) +- return ret; +- else if (ret == 0) +- /* Try again at this level */ +- next_addr = addr; +- break; +- default: +- WARN_ON(1); +- return -ENXIO; +- } +- } +- +- return not_folded; +-} +- + void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits) + { + struct realm *realm = &kvm->arch.realm; +@@ -1147,18 +1150,16 @@ static int realm_set_ipa_state(struct kvm_vcpu *vcpu, + * If the RMM walk ended early then more tables are + * needed to reach the required depth to set the RIPAS. + */ +- if (walk_level < level) { +- ret = realm_create_rtt_levels(realm, ipa, ++ if (walk_level >= level) ++ return -EINVAL; ++ ++ ret = realm_create_rtt_levels(realm, ipa, + walk_level, + level, + memcache); +- /* Retry with RTTs created */ +- if (!ret) +- continue; +- } else { +- ret = -EINVAL; +- } +- ++ if (ret) ++ return ret; ++ /* Retry with the RTT levels in place */ + break; + } else { + WARN(1, "Unexpected error in %s: %#x\n", __func__, +@@ -1467,7 +1468,8 @@ static void kvm_complete_ripas_change(struct kvm_vcpu *vcpu) + break; + + base = top_ipa; +- } while (top_ipa < top); ++ } while (base < top); ++ rec->run->exit.ripas_base = base; + } + /* + * _kvm_rec_pre_enter - Complete operations before entering a REC +-- +2.43.0 + diff --git a/kernel.spec b/kernel.spec index f3ed0fbdac741dd56e1f580b4ef415fbaa23c921..02257817aadace5995d0d84bb5663d5234730633 100644 --- a/kernel.spec +++ b/kernel.spec @@ -42,7 +42,7 @@ rm -f test_openEuler_sign.ko test_openEuler_sign.ko.sig %global upstream_sublevel 0 %global devel_release 102 %global maintenance_release .0.0 -%global pkg_release .6 +%global pkg_release .7 %global openeuler_lts 0 %global openeuler_major 2509 @@ -125,6 +125,8 @@ Source9998: patches.tar.bz2 Patch0001: 0001-Support-RME-feature-for-CCA-host.patch Patch0002: 0001-riscv-kernel.patch Patch0003: 0007-backport-KVM-arm64-Select-default-PMU-in-KVM_ARM_VCP.patch +Patch0004: 0008-arm64-RME-Introduce-kvm_rec_pre_enter-called-before-.patch +Patch0005: 0009-arm64-RME-handle-RIPAS-changes-before-kvm_rec_enter.patch #BuildRequires: BuildRequires: module-init-tools, patch >= 2.5.4, bash >= 2.03, tar @@ -367,6 +369,8 @@ Applypatches series.conf %{_builddir}/kernel-%{version}/linux-%{KernelVer} # Arm CCA patch %patch0001 -p1 %patch0003 -p1 +%patch0004 -p1 +%patch0005 -p1 # riscv-kernel patch %ifarch riscv64 @@ -1126,6 +1130,9 @@ fi %endif %changelog +* Thu Sep 11 2025 Hou Mingyong - 6.6.0-102.0.0.7 +- fix realm exit error + * Mon Sep 08 2025 Hou Mingyong - 6.6.0-102.0.0.6 - Backport Select default PMU in KVM_ARM_VCPU_INIT to fix set pmu counters failed