diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index e80fc0985a5021d39e008e3473ec1ae60440f967..d031d7ab07af9495c8925459c6bded3482877692 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -789,6 +789,7 @@ CONFIG_HAVE_KVM_IRQ_BYPASS=y CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE=y CONFIG_KVM_XFER_TO_GUEST_WORK=y CONFIG_KVM_GENERIC_HARDWARE_ENABLING=y +CONFIG_HAVE_KVM_PINNED_VMID=y CONFIG_KVM_HISI_VIRT=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=y diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2ebcd14b16123222d44761e2b4125ba107f64ea5..69a08a4f3d85038181b14b7b2f966004a15d427c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -223,7 +223,7 @@ struct kvm_arch { /* Mandated version of PSCI */ u32 psci_version; - + KABI_FILL_HOLE(refcount_t pinned) /* Protects VM-scoped configuration data */ struct mutex config_lock; @@ -1146,6 +1146,8 @@ int __init kvm_arm_vmid_alloc_init(void); void __init kvm_arm_vmid_alloc_free(void); void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); void kvm_arm_vmid_clear_active(void); +unsigned long kvm_arm_pinned_vmid_get(struct kvm_vmid *kvm_vmid); +void kvm_arm_pinned_vmid_put(struct kvm_vmid *kvm_vmid); static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) { diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 52edbd7f6340cdb39d257a4834c4cf7997247231..d4740f693fdf2c56a23699f694add538d2551581 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -44,6 +44,7 @@ menuconfig KVM select GUEST_PERF_EVENTS if PERF_EVENTS select INTERVAL_TREE select XARRAY_MULTI + select HAVE_KVM_PINNED_VMID help Support hosting virtualized guest machines. diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d3831c27384348a06ef2a028f30a9731442feef8..16fdd57798dd732c5633935b671b9fd11d0388b1 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -960,6 +960,20 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) return ret; } +int kvm_arch_pinned_vmid_get(struct kvm *kvm) +{ + int vmid; + + vmid = kvm_arm_pinned_vmid_get(&kvm->arch.mmu.vmid); + + return (vmid == 0) ? -EINVAL : vmid; +} + +void kvm_arch_pinned_vmid_put(struct kvm *kvm) +{ + kvm_arm_pinned_vmid_put(&kvm->arch.mmu.vmid); +} + bool kvm_arch_intc_initialized(struct kvm *kvm) { return vgic_initialized(kvm); diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c index 7fe8ba1a2851c5b71acbf17075987b96436f1a4a..e1d95864442528686105e9cf17187dbd998f1873 100644 --- a/arch/arm64/kvm/vmid.c +++ b/arch/arm64/kvm/vmid.c @@ -25,6 +25,10 @@ static unsigned long *vmid_map; static DEFINE_PER_CPU(atomic64_t, active_vmids); static DEFINE_PER_CPU(u64, reserved_vmids); +static unsigned long max_pinned_vmids; +static unsigned long nr_pinned_vmids; +static unsigned long *pinned_vmid_map; + #define VMID_MASK (~GENMASK(kvm_arm_vmid_bits - 1, 0)) #define VMID_FIRST_VERSION (1UL << kvm_arm_vmid_bits) @@ -47,7 +51,10 @@ static void flush_context(void) int cpu; u64 vmid; - bitmap_zero(vmid_map, NUM_USER_VMIDS); + if (pinned_vmid_map) + bitmap_copy(vmid_map, pinned_vmid_map, NUM_USER_VMIDS); + else + bitmap_zero(vmid_map, NUM_USER_VMIDS); for_each_possible_cpu(cpu) { vmid = atomic64_xchg_relaxed(&per_cpu(active_vmids, cpu), 0); @@ -97,12 +104,22 @@ static u64 new_vmid(struct kvm_vmid *kvm_vmid) if (vmid != 0) { u64 newvmid = generation | (vmid & ~VMID_MASK); + struct kvm_s2_mmu *kvm_s2_mmu = + container_of(kvm_vmid, struct kvm_s2_mmu, vmid); if (check_update_reserved_vmid(vmid, newvmid)) { atomic64_set(&kvm_vmid->id, newvmid); return newvmid; } + /* + * If it is pinned, we can keep using it. Note that reserved + * takes priority, because even if it is also pinned, we need to + * update the generation into the reserved_vmids. + */ + if (refcount_read(&kvm_s2_mmu->arch->pinned)) + return newvmid; + if (!__test_and_set_bit(vmid2idx(vmid), vmid_map)) { atomic64_set(&kvm_vmid->id, newvmid); return newvmid; @@ -169,6 +186,67 @@ void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags); } +unsigned long kvm_arm_pinned_vmid_get(struct kvm_vmid *kvm_vmid) +{ + unsigned long flags; + u64 vmid; + struct kvm_s2_mmu *kvm_s2_mmu; + + if (!pinned_vmid_map) + return 0; + + raw_spin_lock_irqsave(&cpu_vmid_lock, flags); + + vmid = atomic64_read(&kvm_vmid->id); + + kvm_s2_mmu = container_of(kvm_vmid, struct kvm_s2_mmu, vmid); + if (refcount_inc_not_zero(&kvm_s2_mmu->arch->pinned)) + goto out_unlock; + + if (nr_pinned_vmids >= max_pinned_vmids) { + vmid = 0; + goto out_unlock; + } + + /* + * If we went through one or more rollover since that VMID was + * used, make sure it is still valid, or generate a new one. + */ + if (!vmid_gen_match(vmid)) + vmid = new_vmid(kvm_vmid); + + nr_pinned_vmids++; + __set_bit(vmid2idx(vmid), pinned_vmid_map); + refcount_set(&kvm_s2_mmu->arch->pinned, 1); + +out_unlock: + raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags); + + vmid &= ~VMID_MASK; + + return vmid; +} + +void kvm_arm_pinned_vmid_put(struct kvm_vmid *kvm_vmid) +{ + unsigned long flags; + struct kvm_s2_mmu *kvm_s2_mmu; + u64 vmid = atomic64_read(&kvm_vmid->id); + + if (!pinned_vmid_map) + return; + + raw_spin_lock_irqsave(&cpu_vmid_lock, flags); + + kvm_s2_mmu = container_of(kvm_vmid, struct kvm_s2_mmu, vmid); + if (refcount_dec_and_test(&kvm_s2_mmu->arch->pinned)) { + __clear_bit(vmid2idx(vmid), pinned_vmid_map); + nr_pinned_vmids--; + } + + raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags); +} + /* * Initialize the VMID allocator */ @@ -186,10 +264,20 @@ int __init kvm_arm_vmid_alloc_init(void) if (!vmid_map) return -ENOMEM; + pinned_vmid_map = bitmap_zalloc(NUM_USER_VMIDS, GFP_KERNEL); + nr_pinned_vmids = 0; + + /* + * Ensure we have at least one empty slot available after rollover + * and maximum number of VMIDs are pinned. VMID#0 is reserved. + */ + max_pinned_vmids = NUM_USER_VMIDS - num_possible_cpus() - 2; + return 0; } void __init kvm_arm_vmid_alloc_free(void) { + bitmap_free(pinned_vmid_map); bitmap_free(vmid_map); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index f85492a26e8fe836f57587367769bf9d10e0eb5d..6b87f0116657a14287c8f824f9ab868afee3f176 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3386,6 +3386,7 @@ static struct iommu_domain arm_smmu_blocked_domain = { static struct iommu_domain * arm_smmu_domain_alloc_user(struct device *dev, u32 flags, struct iommu_domain *parent, + struct kvm *kvm, const struct iommu_user_data *user_data) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); @@ -4129,7 +4130,7 @@ static struct iommu_ops arm_smmu_ops = { .hw_info = arm_smmu_hw_info, .domain_alloc_paging = arm_smmu_domain_alloc_paging, .domain_alloc_sva = arm_smmu_sva_domain_alloc, - .domain_alloc_user = arm_smmu_domain_alloc_user, + .domain_alloc_user_v2 = arm_smmu_domain_alloc_user, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index c92e575cf01e96f38c6355767c7656371e058d63..0a5a379a19ac7f241bbfaf29719bd2f8e36d852e 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -118,7 +118,8 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, lockdep_assert_held(&ioas->mutex); - if ((flags || user_data) && !ops->domain_alloc_user) + if ((flags || user_data) && + (!ops->domain_alloc_user && !ops->domain_alloc_user_v2)) return ERR_PTR(-EOPNOTSUPP); if (flags & ~valid_flags) return ERR_PTR(-EOPNOTSUPP); @@ -147,6 +148,15 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, goto out_abort; } hwpt->domain->owner = ops; + } else if (ops->domain_alloc_user_v2) { + hwpt->domain = ops->domain_alloc_user_v2(idev->dev, flags, NULL, + ictx->kvm, user_data); + if (IS_ERR(hwpt->domain)) { + rc = PTR_ERR(hwpt->domain); + hwpt->domain = NULL; + goto out_abort; + } + hwpt->domain->owner = ops; } else { hwpt->domain = iommu_domain_alloc(idev->dev->bus); if (!hwpt->domain) { @@ -224,8 +234,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, struct iommufd_hw_pagetable *hwpt; int rc; - if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) || - !user_data->len || !ops->domain_alloc_user) + if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) || !user_data->len || + (!ops->domain_alloc_user && !ops->domain_alloc_user_v2)) return ERR_PTR(-EOPNOTSUPP); if (parent->auto_domain || !parent->nest_parent) return ERR_PTR(-EINVAL); @@ -239,9 +249,17 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, refcount_inc(&parent->common.obj.users); hwpt_nested->parent = parent; - hwpt->domain = ops->domain_alloc_user(idev->dev, - flags & ~IOMMU_HWPT_FAULT_ID_VALID, - parent->common.domain, user_data); + if (ops->domain_alloc_user) { + hwpt->domain = ops->domain_alloc_user(idev->dev, + flags & ~IOMMU_HWPT_FAULT_ID_VALID, + parent->common.domain, user_data); + } else { + hwpt->domain = ops->domain_alloc_user_v2(idev->dev, + flags & ~IOMMU_HWPT_FAULT_ID_VALID, + parent->common.domain, + ictx->kvm, user_data); + } + if (IS_ERR(hwpt->domain)) { rc = PTR_ERR(hwpt->domain); hwpt->domain = NULL; diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 85a2d4c1cd8384c2e49ebe39944163bd93620d67..53189dfd187904f26649be99d396613c839853c5 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -18,6 +18,7 @@ struct iommu_domain; struct iommu_group; struct iommu_option; struct iommufd_device; +struct kvm; struct iommufd_ctx { struct file *file; @@ -29,6 +30,8 @@ struct iommufd_ctx { /* Compatibility with VFIO no iommu */ u8 no_iommu_mode; struct iommufd_ioas *vfio_ioas; + /* Associated KVM pointer */ + struct kvm *kvm; }; /* diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 3872abbd87293f61a264cf1e2fef0216e72b57fc..0be8d7279bf2ef527b4733f88ce9fa5bd0026366 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -478,6 +478,20 @@ void iommufd_ctx_put(struct iommufd_ctx *ictx) } EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD); +/** + * iommufd_ctx_set_kvm - Called to set a KVM pointer to iommufd context + * @ictx: Context to operate on + * @kvm: KVM pointer with a reference taken using kvm_get_kvm_safe() + */ +void iommufd_ctx_set_kvm(struct iommufd_ctx *ictx, struct kvm *kvm) +{ + xa_lock(&ictx->objects); + if (!ictx->kvm) + ictx->kvm = kvm; + xa_unlock(&ictx->objects); +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_set_kvm, IOMMUFD); + static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_ACCESS] = { .destroy = iommufd_access_destroy_object, diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c index e75da0a70d1f838a1d611e89fa6102876f2c5fba..e75e96fb57cbc852e9354f91496289a626bf3a28 100644 --- a/drivers/vfio/device_cdev.c +++ b/drivers/vfio/device_cdev.c @@ -101,6 +101,9 @@ long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df, */ vfio_df_get_kvm_safe(df); + if (df->kvm) + iommufd_ctx_set_kvm(df->iommufd, df->kvm); + ret = vfio_df_open(df); if (ret) goto out_put_kvm; diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index bde84ad344e50181685f5fbc2620c20b7b33f5a0..488b815db970e60e084dcefaeefdca6249f794db 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -85,6 +85,8 @@ struct vfio_group { struct list_head vfio_next; #if IS_ENABLED(CONFIG_VFIO_CONTAINER) struct list_head container_next; +#else + KABI_DEPRECATE(struct list_head, container_next) #endif enum vfio_group_type type; struct mutex group_lock; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0dea4c637fd28eee90e95fb0bb70f5c39dcbe2ac..ed3e024b1e3991975919091c0a44fbfff8a933ec 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -685,6 +685,7 @@ struct iommu_ops { struct iommu_domain *(*domain_alloc_user)( struct device *dev, u32 flags, struct iommu_domain *parent, const struct iommu_user_data *user_data); + struct iommu_domain *(*domain_alloc_paging)(struct device *dev); struct iommu_device *(*probe_device)(struct device *dev); @@ -734,7 +735,9 @@ struct iommu_ops { KABI_USE(6, struct iommu_domain *release_domain) KABI_RESERVE(7) KABI_RESERVE(8) - KABI_RESERVE(9) + KABI_USE(9, struct iommu_domain *(*domain_alloc_user_v2)( + struct device *dev, u32 flags, struct iommu_domain *parent, + struct kvm *kvm, const struct iommu_user_data *user_data)) }; /** diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 11110c74920090a2a586b97a0140ac76e203bc0a..f3d921b8222f5968508c82f4a91b3c8c244f6bcb 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -22,6 +22,7 @@ struct iommufd_ctx; struct iommufd_device; struct iommufd_viommu_ops; struct page; +struct kvm; enum iommufd_object_type { IOMMUFD_OBJ_NONE, @@ -128,6 +129,7 @@ struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); struct iommufd_ctx *iommufd_ctx_from_fd(int fd); void iommufd_ctx_put(struct iommufd_ctx *ictx); bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group); +void iommufd_ctx_set_kvm(struct iommufd_ctx *ictx, struct kvm *kvm); int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, struct page **out_pages, @@ -149,6 +151,11 @@ static inline void iommufd_ctx_put(struct iommufd_ctx *ictx) { } +static inline void iommufd_ctx_set_kvm(struct iommufd_ctx *ictx, + struct kvm *kvm) +{ +} + static inline int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a5cebf76aaa5de0257370280400089814d2ed437..d3be6620d1be7a827e23c10dfb1b93e45badeacf 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2447,6 +2447,22 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) } #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ +#ifdef CONFIG_HAVE_KVM_PINNED_VMID +int kvm_pinned_vmid_get(struct kvm *kvm); +void kvm_pinned_vmid_put(struct kvm *kvm); +int kvm_arch_pinned_vmid_get(struct kvm *kvm); +void kvm_arch_pinned_vmid_put(struct kvm *kvm); +#else +static inline int kvm_pinned_vmid_get(struct kvm *kvm) +{ + return -EINVAL; +} + +static inline void kvm_pinned_vmid_put(struct kvm *kvm) +{ +} +#endif + /* * If more than one page is being (un)accounted, @virt must be the address of * the first page of a block of pages what were allocated together (i.e diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 484d0873061ca5041c546f1c44520a1848ec1d16..7e19e8ada12150983f2f8bc9be3811c92981f4fa 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -92,3 +92,6 @@ config HAVE_KVM_PM_NOTIFIER config KVM_GENERIC_HARDWARE_ENABLING bool + +config HAVE_KVM_PINNED_VMID + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9e351bce483ede3c0da2336a2304a015834fc663..512e8acfb23bae155de81fce5b639c3b7eaed6bb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3684,6 +3684,29 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up); +#ifdef CONFIG_HAVE_KVM_PINNED_VMID +int kvm_pinned_vmid_get(struct kvm *kvm) +{ + int ret; + + if (!kvm_get_kvm_safe(kvm)) + return -ENOENT; + ret = kvm_arch_pinned_vmid_get(kvm); + if (ret < 0) + kvm_put_kvm(kvm); + + return ret; +} +EXPORT_SYMBOL_GPL(kvm_pinned_vmid_get); + +void kvm_pinned_vmid_put(struct kvm *kvm) +{ + kvm_arch_pinned_vmid_put(kvm); + kvm_put_kvm(kvm); +} +EXPORT_SYMBOL_GPL(kvm_pinned_vmid_put); +#endif + #ifndef CONFIG_S390 /* * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.