diff --git a/Documentation/ABI/testing/sysfs-class-intel_pmt b/Documentation/ABI/testing/sysfs-class-intel_pmt new file mode 100644 index 0000000000000000000000000000000000000000..ed4c886a21b1ee1640d21f11e5347fa06a5b95b6 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-intel_pmt @@ -0,0 +1,119 @@ +What: /sys/class/intel_pmt/ +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + The intel_pmt/ class directory contains information for + devices that expose hardware telemetry using Intel Platform + Monitoring Technology (PMT) + +What: /sys/class/intel_pmt/telem +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + The telem directory contains files describing an instance of + a PMT telemetry device that exposes hardware telemetry. Each + telem directory has an associated telem file. This file + may be opened and mapped or read to access the telemetry space + of the device. The register layout of the telemetry space is + determined from an XML file that matches the PCI device id and + GUID for the device. + +What: /sys/class/intel_pmt/telem/telem +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The telemetry data for this telemetry device. This file + may be mapped or read to obtain the data. + +What: /sys/class/intel_pmt/telem/guid +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The GUID for this telemetry device. The GUID identifies + the version of the XML file for the parent device that is to + be used to get the register layout. + +What: /sys/class/intel_pmt/telem/size +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The size of telemetry region in bytes that corresponds to + the mapping size for the telem file. + +What: /sys/class/intel_pmt/telem/offset +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The offset of telemetry region in bytes that corresponds to + the mapping for the telem file. + +What: /sys/class/intel_pmt/crashlog +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + The crashlog directory contains files for configuring an + instance of a PMT crashlog device that can perform crash data + recording. Each crashlog device has an associated crashlog + file. This file can be opened and mapped or read to access the + resulting crashlog buffer. The register layout for the buffer + can be determined from an XML file of specified GUID for the + parent device. + +What: /sys/class/intel_pmt/crashlog/crashlog +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The crashlog buffer for this crashlog device. This file + may be mapped or read to obtain the data. + +What: /sys/class/intel_pmt/crashlog/guid +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RO) The GUID for this crashlog device. The GUID identifies the + version of the XML file for the parent device that should be + used to determine the register layout. + +What: /sys/class/intel_pmt/crashlog/size +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RO) The length of the result buffer in bytes that corresponds + to the size for the crashlog buffer. + +What: /sys/class/intel_pmt/crashlog/offset +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RO) The offset of the buffer in bytes that corresponds + to the mapping for the crashlog device. + +What: /sys/class/intel_pmt/crashlog/enable +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RW) Boolean value controlling if the crashlog functionality + is enabled for the crashlog device. + +What: /sys/class/intel_pmt/crashlog/trigger +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RW) Boolean value controlling the triggering of the crashlog + device node. When read it provides data on if the crashlog has + been triggered. When written to it can be used to either clear + the current trigger by writing false, or to trigger a new + event if the trigger is not currently set. diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst index 076efd51ef1fe9d6f27c7b962e92d7a87415c2cc..2e9b8b0f9a0f491c6773db0604f389e6741397c9 100644 --- a/Documentation/x86/sva.rst +++ b/Documentation/x86/sva.rst @@ -104,18 +104,47 @@ The MSR must be configured on each logical CPU before any application thread can interact with a device. Threads that belong to the same process share the same page tables, thus the same MSR value. -PASID is cleared when a process is created. The PASID allocation and MSR -programming may occur long after a process and its threads have been created. -One thread must call iommu_sva_bind_device() to allocate the PASID for the -process. If a thread uses ENQCMD without the MSR first being populated, a #GP -will be raised. The kernel will update the PASID MSR with the PASID for all -threads in the process. A single process PASID can be used simultaneously -with multiple devices since they all share the same address space. - -One thread can call iommu_sva_unbind_device() to free the allocated PASID. -The kernel will clear the PASID MSR for all threads belonging to the process. - -New threads inherit the MSR value from the parent. +PASID Life Cycle Management +=========================== + +PASID is initialized as INVALID_IOASID (-1) when a process is created. + +Only processes that access SVA-capable devices need to have a PASID +allocated. This allocation happens when a process opens/binds an SVA-capable +device but finds no PASID for this process. Subsequent binds of the same, or +other devices will share the same PASID. + +Although the PASID is allocated to the process by opening a device, +it is not active in any of the threads of that process. It's loaded to the +IA32_PASID MSR lazily when a thread tries to submit a work descriptor +to a device using the ENQCMD. + +That first access will trigger a #GP fault because the IA32_PASID MSR +has not been initialized with the PASID value assigned to the process +when the device was opened. The Linux #GP handler notes that a PASID has +been allocated for the process, and so initializes the IA32_PASID MSR +and returns so that the ENQCMD instruction is re-executed. + +On fork(2) or exec(2) the PASID is removed from the process as it no +longer has the same address space that it had when the device was opened. + +On clone(2) the new task shares the same address space, so will be +able to use the PASID allocated to the process. The IA32_PASID is not +preemptively initialized as the PASID value might not be allocated yet or +the kernel does not know whether this thread is going to access the device +and the cleared IA32_PASID MSR reduces context switch overhead by xstate +init optimization. Since #GP faults have to be handled on any threads that +were created before the PASID was assigned to the mm of the process, newly +created threads might as well be treated in a consistent way. + +Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in +all threads in unbind, free the PASID lazily only on mm exit. + +If a process does a close(2) of the device file descriptor and munmap(2) +of the device MMIO portal, then the driver will unbind the device. The +PASID is still marked VALID in the PASID_MSR for any threads in the +process that accessed the device. But this is harmless as without the +MMIO portal they cannot submit new work to the device. Relationships ============= diff --git a/MAINTAINERS b/MAINTAINERS index 23a23bd94c0033d95460f96faf799dc6ff9209d4..466b1c599848abe2282e9780fb34059c86f89414 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9038,6 +9038,12 @@ F: drivers/mfd/intel_soc_pmic* F: include/linux/mfd/intel_msic.h F: include/linux/mfd/intel_soc_pmic* +INTEL PMT DRIVER +M: "David E. Box" +S: Maintained +F: drivers/mfd/intel_pmt.c +F: drivers/platform/x86/intel_pmt_* + INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT M: Stanislav Yakovlev L: linux-wireless@vger.kernel.org diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 018654e65c6038be4002bdb262ffe67aa260295c..73aaf1e0cbb63bdc652beea6fcbe63eaff400747 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,8 +56,11 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -/* Force disable because it's broken beyond repair */ -#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#ifdef CONFIG_INTEL_IOMMU_SVM +# define DISABLE_ENQCMD 0 +#else +# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#endif #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 62df2aa1ac32ae0f6a82964e906dacf6f52431c3..bee79d4fb2cb50eb7f9096712d4a3ab60deff528 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -728,10 +728,7 @@ void __init setup_arch(char **cmdline_p) if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) _text; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = _brk_end; + setup_initial_init_mm(_text, _etext, _edata, (void *)_brk_end); code_resource.start = __pa_symbol(_text); code_resource.end = __pa_symbol(_etext)-1; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5da01819fb479858989a0f5a6c18b4a58fafde0a..0778d5b6f265573ee1e4f22f6f694e96b3fbc3de 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -558,6 +559,57 @@ static bool fixup_iopl_exception(struct pt_regs *regs) return true; } +/* + * The unprivileged ENQCMD instruction generates #GPs if the + * IA32_PASID MSR has not been populated. If possible, populate + * the MSR from a PASID previously allocated to the mm. + */ +static bool try_fixup_enqcmd_gp(void) +{ +#ifdef CONFIG_IOMMU_SVA + u32 pasid; + + /* + * MSR_IA32_PASID is managed using XSAVE. Directly + * writing to the MSR is only possible when fpregs + * are valid and the fpstate is not. This is + * guaranteed when handling a userspace exception + * in *before* interrupts are re-enabled. + */ + lockdep_assert_irqs_disabled(); + + /* + * Hardware without ENQCMD will not generate + * #GPs that can be fixed up here. + */ + if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) + return false; + + pasid = current->mm->pasid; + + /* + * If the mm has not been allocated a + * PASID, the #GP can not be fixed up. + */ + if (!pasid_valid(pasid)) + return false; + + /* + * Did this thread already have its PASID activated? + * If so, the #GP must be from something else. + */ + if (current->pasid_activated) + return false; + + wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID); + current->pasid_activated = 1; + + return true; +#else + return false; +#endif +} + DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) { char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; @@ -566,6 +618,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) unsigned long gp_addr; int ret; + if (user_mode(regs) && try_fixup_enqcmd_gp()) + return; + cond_local_irq_enable(regs); if (static_cpu_has(X86_FEATURE_UMIP)) { diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index d79335506ecd3c3f5aa3beabde8e0dd74cef10a3..47551ab73ca8a03ab60448adb8fe0d8cee9aed5d 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -37,7 +37,7 @@ */ /* un-comment DEBUG to enable pr_debug() statements */ -#define DEBUG +/* #define DEBUG */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -64,11 +64,17 @@ static struct cpuidle_driver intel_idle_driver = { /* intel_idle.max_cstate=0 disables driver */ static int max_cstate = CPUIDLE_STATE_MAX - 1; static unsigned int disabled_states_mask; +static unsigned int preferred_states_mask; static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; static unsigned long auto_demotion_disable_flags; -static bool disable_promotion_to_c1e; + +static enum { + C1E_PROMOTION_PRESERVE, + C1E_PROMOTION_ENABLE, + C1E_PROMOTION_DISABLE +} c1e_promotion = C1E_PROMOTION_PRESERVE; struct idle_cpu { struct cpuidle_state *state_table; @@ -88,6 +94,12 @@ static struct cpuidle_state *cpuidle_state_table __initdata; static unsigned int mwait_substates __initdata; +/* + * Enable interrupts before entering the C-state. On some platforms and for + * some C-states, this may measurably decrease interrupt latency. + */ +#define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) + /* * Enable this state by default even if the ACPI _CST does not list it. */ @@ -115,9 +127,6 @@ static unsigned int mwait_substates __initdata; * If the local APIC timer is not known to be reliable in the target idle state, * enable one-shot tick broadcasting for the target CPU before executing MWAIT. * - * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to - * flushing user TLBs. - * * Must be called under local_irq_disable(). */ static __cpuidle int intel_idle(struct cpuidle_device *dev, @@ -127,6 +136,9 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, unsigned long eax = flg2MWAIT(state->flags); unsigned long ecx = 1; /* break on interrupt flag */ + if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) + local_irq_enable(); + mwait_idle_with_hints(eax, ecx); return index; @@ -698,7 +710,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", - .flags = MWAIT2flg(0x00), + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, .exit_latency = 2, .target_residency = 2, .enter = &intel_idle, @@ -727,7 +739,7 @@ static struct cpuidle_state icx_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", - .flags = MWAIT2flg(0x00), + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, .exit_latency = 1, .target_residency = 1, .enter = &intel_idle, @@ -744,8 +756,48 @@ static struct cpuidle_state icx_cstates[] __initdata = { .name = "C6", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, - .exit_latency = 128, - .target_residency = 384, + .exit_latency = 170, + .target_residency = 600, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + +/* + * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice + * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in + * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 + * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then + * both C1 and C1E requests end up with C1, so there is effectively no C1E. + * + * By default we enable C1 and disable C1E by marking it with + * 'CPUIDLE_FLAG_UNUSABLE'. + */ +static struct cpuidle_state spr_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00), + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | + CPUIDLE_FLAG_UNUSABLE, + .exit_latency = 2, + .target_residency = 4, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 290, + .target_residency = 800, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -963,6 +1015,39 @@ static struct cpuidle_state dnv_cstates[] __initdata = { .enter = NULL } }; +/* + * Note, depending on HW and FW revision, SnowRidge SoC may or may not support + * C6, and this is indicated in the CPUID mwait leaf. + */ +static struct cpuidle_state snr_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00), + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 15, + .target_residency = 25, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 130, + .target_residency = 500, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static const struct idle_cpu idle_cpu_nehalem __initconst = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, @@ -1062,6 +1147,12 @@ static const struct idle_cpu idle_cpu_icx __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_spr __initconst = { + .state_table = spr_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_avn __initconst = { .state_table = avn_cstates, .disable_promotion_to_c1e = true, @@ -1084,6 +1175,12 @@ static const struct idle_cpu idle_cpu_dnv __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_snr __initconst = { + .state_table = snr_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), @@ -1117,12 +1214,14 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), {} }; @@ -1444,6 +1543,68 @@ static void __init sklh_idle_state_table_update(void) skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ } +/** + * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake + * idle states table. + */ +static void __init skx_idle_state_table_update(void) +{ + unsigned long long msr; + + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); + + /* + * 000b: C0/C1 (no package C-state support) + * 001b: C2 + * 010b: C6 (non-retention) + * 011b: C6 (retention) + * 111b: No Package C state limits. + */ + if ((msr & 0x7) < 2) { + /* + * Uses the CC6 + PC0 latency and 3 times of + * latency for target_residency if the PC6 + * is disabled in BIOS. This is consistent + * with how intel_idle driver uses _CST + * to set the target_residency. + */ + skx_cstates[2].exit_latency = 92; + skx_cstates[2].target_residency = 276; + } +} + +/** + * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. + */ +static void __init spr_idle_state_table_update(void) +{ + unsigned long long msr; + + /* Check if user prefers C1E over C1. */ + if ((preferred_states_mask & BIT(2)) && + !(preferred_states_mask & BIT(1))) { + /* Disable C1 and enable C1E. */ + spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; + spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; + + /* Enable C1E using the "C1E promotion" bit. */ + c1e_promotion = C1E_PROMOTION_ENABLE; + } + + /* + * By default, the C6 state assumes the worst-case scenario of package + * C6. However, if PC6 is disabled, we update the numbers to match + * core C6. + */ + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); + + /* Limit value 2 and above allow for PC6. */ + if ((msr & 0x7) < 2) { + spr_cstates[2].exit_latency = 190; + spr_cstates[2].target_residency = 600; + } +} + static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) { unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; @@ -1475,6 +1636,12 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) case INTEL_FAM6_SKYLAKE: sklh_idle_state_table_update(); break; + case INTEL_FAM6_SKYLAKE_X: + skx_idle_state_table_update(); + break; + case INTEL_FAM6_SAPPHIRERAPIDS_X: + spr_idle_state_table_update(); + break; } for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { @@ -1547,6 +1714,15 @@ static void auto_demotion_disable(void) wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); } +static void c1e_promotion_enable(void) +{ + unsigned long long msr_bits; + + rdmsrl(MSR_IA32_POWER_CTL, msr_bits); + msr_bits |= 0x2; + wrmsrl(MSR_IA32_POWER_CTL, msr_bits); +} + static void c1e_promotion_disable(void) { unsigned long long msr_bits; @@ -1578,7 +1754,9 @@ static int intel_idle_cpu_init(unsigned int cpu) if (auto_demotion_disable_flags) auto_demotion_disable(); - if (disable_promotion_to_c1e) + if (c1e_promotion == C1E_PROMOTION_ENABLE) + c1e_promotion_enable(); + else if (c1e_promotion == C1E_PROMOTION_DISABLE) c1e_promotion_disable(); return 0; @@ -1657,7 +1835,8 @@ static int __init intel_idle_init(void) if (icpu) { cpuidle_state_table = icpu->state_table; auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; - disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; + if (icpu->disable_promotion_to_c1e) + c1e_promotion = C1E_PROMOTION_DISABLE; if (icpu->use_acpi || force_use_acpi) intel_idle_acpi_cst_extract(); } else if (!intel_idle_acpi_cst_extract()) { @@ -1716,3 +1895,14 @@ module_param(max_cstate, int, 0444); */ module_param_named(states_off, disabled_states_mask, uint, 0444); MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); +/* + * Some platforms come with mutually exclusive C-states, so that if one is + * enabled, the other C-states must not be used. Example: C1 and C1E on + * Sapphire Rapids platform. This parameter allows for selecting the + * preferred C-states among the groups of mutually exclusive C-states - the + * selected C-states will be registered, the other C-states from the mutually + * exclusive group won't be registered. If the platform has no mutually + * exclusive C-states, this parameter has no effect. + */ +module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); +MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d97e38bfbe4b5edec7f2a6f8768f6b5ffddc90c7..d6cb789944614d167e42057584888e289ec800d4 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -103,8 +103,8 @@ config IOMMU_DMA select IRQ_MSI_IOMMU select NEED_SG_DMA_LENGTH -# Shared Virtual Addressing library -config IOMMU_SVA_LIB +# Shared Virtual Addressing +config IOMMU_SVA bool select IOASID @@ -318,7 +318,7 @@ config ARM_SMMU_V3 config ARM_SMMU_V3_SVA bool "Shared Virtual Addressing support for the ARM SMMUv3" depends on ARM_SMMU_V3 - select IOMMU_SVA_LIB + select IOMMU_SVA select MMU_NOTIFIER help Support for sharing process address spaces with devices using the @@ -422,4 +422,16 @@ config SMMU_BYPASS_DEV This feature will be replaced by ACPI IORT RMR node, which will be upstreamed in mainline. +config SPRD_IOMMU + tristate "Unisoc IOMMU Support" + depends on ARCH_SPRD || COMPILE_TEST + select IOMMU_API + help + Support for IOMMU on Unisoc's SoCs, this IOMMU can be used by + Unisoc's multimedia devices, such as display, Image codec(jpeg) + and a few signal processors, including VSP(video), GSP(graphic), + ISP(image), and CPP(camera pixel processor), etc. + + Say Y here if you want to use the multimedia devices listed above. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 1e7519f56afbcb18e795b3223fadc68113440c93..88337bfc0017c22c73cbaa0d316a16e7219efd3f 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -27,5 +27,5 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o -obj-$(CONFIG_IOMMU_SVA_LIB) += io-pgfault.o +obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o +obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index d4e1238347a6cc44d484637cd9dab21e152684a1..a8e8fd9d7c8b6cea5c3cbabbc7275a4efbce5343 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -351,15 +351,13 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm); if (IS_ERR(bond->smmu_mn)) { ret = PTR_ERR(bond->smmu_mn); - goto err_free_pasid; + goto err_free_bond; } list_add(&bond->list, &master->bonds); trace_smmu_bind_alloc(dev, mm->pasid); return &bond->sva; -err_free_pasid: - iommu_sva_free_pasid(mm); err_free_bond: kfree(bond); return ERR_PTR(ret); @@ -403,7 +401,6 @@ void arm_smmu_sva_unbind(struct iommu_sva *handle) trace_smmu_unbind_free(handle->dev, bond->mm->pasid); list_del(&bond->list); arm_smmu_mmu_notifier_put(bond->smmu_mn); - iommu_sva_free_pasid(bond->mm); kfree(bond); } else { trace_smmu_unbind_put(handle->dev, bond->mm->pasid); diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 5337ee1584b0cc2e1f377e742cbddd5b266e2691..cecdad7f2aba6aca5648fe604c71b4a5a67a7a5b 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -40,6 +40,7 @@ config INTEL_IOMMU_SVM select PCI_PRI select MMU_NOTIFIER select IOASID + select IOMMU_SVA help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 8b6d10c2add5d8b1bc8dc6aa10c0c5610f2ee722..6622489e8fa4793d6a6630ee3a936ffba926cb5a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5450,7 +5450,7 @@ static int aux_domain_add_dev(struct dmar_domain *domain, link_failed: spin_unlock_irqrestore(&device_domain_lock, flags); if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + ioasid_free(domain->default_pasid); return ret; } @@ -5480,7 +5480,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, spin_unlock_irqrestore(&device_domain_lock, flags); if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + ioasid_free(domain->default_pasid); } static int prepare_domain_attach_device(struct iommu_domain *domain, @@ -6197,6 +6197,12 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (!info) return -EINVAL; + if (intel_iommu_enable_pasid(info->iommu, dev)) + return -ENODEV; + + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) + return -EINVAL; + if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) return 0; } diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index aaf9f9589bdc38ce1e3d46acd804aea28975bb36..d886b494dddb07a1d8ad18128cb35bd8a326b89e 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -17,17 +17,37 @@ #include #include #include +#include #include #include #include #include "pasid.h" +#include "../iommu-sva-lib.h" static irqreturn_t prq_event_thread(int irq, void *d); static void intel_svm_drain_prq(struct device *dev, u32 pasid); +#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) #define PRQ_ORDER 0 +static DEFINE_XARRAY_ALLOC(pasid_private_array); +static int pasid_private_add(ioasid_t pasid, void *priv) +{ + return xa_alloc(&pasid_private_array, &pasid, priv, + XA_LIMIT(pasid, pasid), GFP_ATOMIC); +} + +static void pasid_private_remove(ioasid_t pasid) +{ + xa_erase(&pasid_private_array, pasid); +} + +static void *pasid_private_find(ioasid_t pasid) +{ + return xa_load(&pasid_private_array, pasid); +} + int intel_svm_enable_prq(struct intel_iommu *iommu) { struct page *pages; @@ -204,7 +224,6 @@ static const struct mmu_notifier_ops intel_mmuops = { }; static DEFINE_MUTEX(pasid_mutex); -static LIST_HEAD(global_svm_list); #define for_each_svm_dev(sdev, svm, d) \ list_for_each_entry((sdev), &(svm)->devs, list) \ @@ -224,7 +243,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, if (pasid == INVALID_IOASID || pasid >= PASID_MAX) return -EINVAL; - svm = ioasid_find(NULL, pasid, NULL); + svm = pasid_private_find(pasid); if (IS_ERR(svm)) return PTR_ERR(svm); @@ -334,7 +353,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, svm->gpasid = data->gpasid; svm->flags |= SVM_FLAG_GUEST_PASID; } - ioasid_set_data(data->hpasid, svm); + pasid_private_add(data->hpasid, svm); INIT_LIST_HEAD_RCU(&svm->devs); mmput(svm->mm); } @@ -388,7 +407,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, list_add_rcu(&sdev->list, &svm->devs); out: if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) { - ioasid_set_data(data->hpasid, NULL); + pasid_private_remove(data->hpasid); kfree(svm); } @@ -431,7 +450,7 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid) * the unbind, IOMMU driver will get notified * and perform cleanup. */ - ioasid_set_data(pasid, NULL); + pasid_private_remove(pasid); kfree(svm); } } @@ -459,88 +478,75 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) mutex_unlock(&mm->context.lock); } -/* Caller must hold pasid_mutex, mm reference */ -static int -intel_svm_bind_mm(struct device *dev, unsigned int flags, - struct svm_dev_ops *ops, - struct mm_struct *mm, struct intel_svm_dev **sd) +static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, + unsigned int flags) { - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - struct device_domain_info *info; - struct intel_svm_dev *sdev; - struct intel_svm *svm = NULL; - unsigned long iflags; - int pasid_max; - int ret; - - if (!iommu || dmar_disabled) - return -EINVAL; - - if (!intel_svm_capable(iommu)) - return -ENOTSUPP; - - if (dev_is_pci(dev)) { - pasid_max = pci_max_pasids(to_pci_dev(dev)); - if (pasid_max < 0) - return -EINVAL; - } else - pasid_max = 1 << 20; + ioasid_t max_pasid = dev_is_pci(dev) ? + pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; - /* Bind supervisor PASID shuld have mm = NULL */ - if (flags & SVM_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap) || mm) { - pr_err("Supervisor PASID with user provided mm.\n"); - return -EINVAL; - } - } + return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); +} - if (!(flags & SVM_FLAG_PRIVATE_PASID)) { - struct intel_svm *t; +static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, + struct device *dev, + struct mm_struct *mm, + unsigned int flags) +{ + struct device_domain_info *info = get_domain_info(dev); + unsigned long iflags, sflags; + struct intel_svm_dev *sdev; + struct intel_svm *svm; + int ret = 0; - list_for_each_entry(t, &global_svm_list, list) { - if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID)) - continue; + svm = pasid_private_find(mm->pasid); + if (!svm) { + svm = kzalloc(sizeof(*svm), GFP_KERNEL); + if (!svm) + return ERR_PTR(-ENOMEM); - svm = t; - if (svm->pasid >= pasid_max) { - dev_warn(dev, - "Limited PASID width. Cannot use existing PASID %d\n", - svm->pasid); - ret = -ENOSPC; - goto out; - } + svm->pasid = mm->pasid; + svm->mm = mm; + svm->flags = flags; + INIT_LIST_HEAD_RCU(&svm->devs); - /* Find the matching device in svm list */ - for_each_svm_dev(sdev, svm, dev) { - if (sdev->ops != ops) { - ret = -EBUSY; - goto out; - } - sdev->users++; - goto success; + if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { + svm->notifier.ops = &intel_mmuops; + ret = mmu_notifier_register(&svm->notifier, mm); + if (ret) { + kfree(svm); + return ERR_PTR(ret); } + } - break; + ret = pasid_private_add(svm->pasid, svm); + if (ret) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + kfree(svm); + return ERR_PTR(ret); } } + /* Find the matching device in svm list */ + for_each_svm_dev(sdev, svm, dev) { + sdev->users++; + goto success; + } + sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); if (!sdev) { ret = -ENOMEM; - goto out; + goto free_svm; } + sdev->dev = dev; sdev->iommu = iommu; - - ret = intel_iommu_enable_pasid(iommu, dev); - if (ret) { - kfree(sdev); - goto out; - } - - info = get_domain_info(dev); sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); + sdev->users = 1; + sdev->pasid = svm->pasid; + sdev->sva.dev = dev; + init_rcu_head(&sdev->rcu); if (info->ats_enabled) { sdev->dev_iotlb = 1; sdev->qdep = info->ats_qdep; @@ -548,96 +554,37 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, sdev->qdep = 0; } - /* Finish the setup now we know we're keeping it */ - sdev->users = 1; - sdev->ops = ops; - init_rcu_head(&sdev->rcu); - - if (!svm) { - svm = kzalloc(sizeof(*svm), GFP_KERNEL); - if (!svm) { - ret = -ENOMEM; - kfree(sdev); - goto out; - } - - if (pasid_max > intel_pasid_max_id) - pasid_max = intel_pasid_max_id; + /* Setup the pasid table: */ + sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? + PASID_FLAG_SUPERVISOR_MODE : 0; + sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; + spin_lock_irqsave(&iommu->lock, iflags); + ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, + FLPT_DEFAULT_DID, sflags); + spin_unlock_irqrestore(&iommu->lock, iflags); - /* Do not use PASID 0, reserved for RID to PASID */ - svm->pasid = ioasid_alloc(NULL, PASID_MIN, - pasid_max - 1, svm); - if (svm->pasid == INVALID_IOASID) { - kfree(svm); - kfree(sdev); - ret = -ENOSPC; - goto out; - } - svm->notifier.ops = &intel_mmuops; - svm->mm = mm; - svm->flags = flags; - INIT_LIST_HEAD_RCU(&svm->devs); - INIT_LIST_HEAD(&svm->list); - ret = -ENOMEM; - if (mm) { - ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) { - ioasid_put(svm->pasid); - kfree(svm); - kfree(sdev); - goto out; - } - } + if (ret) + goto free_sdev; - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - if (mm) - mmu_notifier_unregister(&svm->notifier, mm); - ioasid_put(svm->pasid); - kfree(svm); - kfree(sdev); - goto out; - } + /* The newly allocated pasid is loaded to the mm. */ + if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs)) + load_pasid(mm, svm->pasid); - list_add_tail(&svm->list, &global_svm_list); - if (mm) { - /* The newly allocated pasid is loaded to the mm. */ - load_pasid(mm, svm->pasid); - } - } else { - /* - * Binding a new device with existing PASID, need to setup - * the PASID entry. - */ - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - kfree(sdev); - goto out; - } - } list_add_rcu(&sdev->list, &svm->devs); success: - sdev->pasid = svm->pasid; - sdev->sva.dev = dev; - if (sd) - *sd = sdev; - ret = 0; -out: - return ret; + return &sdev->sva; + +free_sdev: + kfree(sdev); +free_svm: + if (list_empty(&svm->devs)) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + pasid_private_remove(mm->pasid); + kfree(svm); + } + + return ERR_PTR(ret); } /* Caller must hold pasid_mutex */ @@ -646,6 +593,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) struct intel_svm_dev *sdev; struct intel_iommu *iommu; struct intel_svm *svm; + struct mm_struct *mm; int ret = -EINVAL; iommu = device_to_iommu(dev, NULL, NULL); @@ -655,6 +603,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); if (ret) goto out; + mm = svm->mm; if (sdev) { sdev->users--; @@ -673,13 +622,12 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { - ioasid_put(svm->pasid); - if (svm->mm) { - mmu_notifier_unregister(&svm->notifier, svm->mm); + if (svm->notifier.ops) { + mmu_notifier_unregister(&svm->notifier, mm); /* Clear mm's pasid. */ - load_pasid(svm->mm, PASID_DISABLED); + load_pasid(mm, PASID_DISABLED); } - list_del(&svm->list); + pasid_private_remove(svm->pasid); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. * If that is not obeyed, subtle errors will happen. @@ -935,7 +883,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) } if (!svm || svm->pasid != req->pasid) { rcu_read_lock(); - svm = ioasid_find(NULL, req->pasid, NULL); + svm = pasid_private_find(req->pasid); /* It *can't* go away, because the driver is not permitted * to unbind the mm while any page faults are outstanding. * So we only need RCU to protect the internal idr code. */ @@ -1006,13 +954,6 @@ static irqreturn_t prq_event_thread(int irq, void *d) mmap_read_unlock(svm->mm); mmput(svm->mm); bad_req: - WARN_ON(!sdev); - if (sdev && sdev->ops && sdev->ops->fault_cb) { - int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | - (req->exe_req << 1) | (req->pm_req); - sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, - req->priv_data, rwxp, result); - } /* We get here in the error case where the PASID lookup failed, and these can be NULL. Do not use them below this point! */ sdev = NULL; @@ -1070,31 +1011,40 @@ static irqreturn_t prq_event_thread(int irq, void *d) return IRQ_RETVAL(handled); } -#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) -struct iommu_sva * -intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) { - struct iommu_sva *sva = ERR_PTR(-EINVAL); - struct intel_svm_dev *sdev = NULL; + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); unsigned int flags = 0; + struct iommu_sva *sva; int ret; - /* - * TODO: Consolidate with generic iommu-sva bind after it is merged. - * It will require shared SVM data structures, i.e. combine io_mm - * and intel_svm etc. - */ if (drvdata) flags = *(unsigned int *)drvdata; + + if (flags & SVM_FLAG_SUPERVISOR_MODE) { + if (!ecap_srs(iommu->ecap)) { + dev_err(dev, "%s: Supervisor PASID not supported\n", + iommu->name); + return ERR_PTR(-EOPNOTSUPP); + } + + if (mm) { + dev_err(dev, "%s: Supervisor PASID with user provided mm\n", + iommu->name); + return ERR_PTR(-EINVAL); + } + + mm = &init_mm; + } + mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev); - if (ret) - sva = ERR_PTR(ret); - else if (sdev) - sva = &sdev->sva; - else - WARN(!sdev, "SVM bind succeeded with no sdev!\n"); + ret = intel_svm_alloc_pasid(dev, mm, flags); + if (ret) { + mutex_unlock(&pasid_mutex); + return ERR_PTR(ret); + } + sva = intel_svm_bind_mm(iommu, dev, mm, flags); mutex_unlock(&pasid_mutex); return sva; @@ -1102,10 +1052,9 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) void intel_svm_unbind(struct iommu_sva *sva) { - struct intel_svm_dev *sdev; + struct intel_svm_dev *sdev = to_intel_svm_dev(sva); mutex_lock(&pasid_mutex); - sdev = to_intel_svm_dev(sva); intel_svm_unbind_mm(sdev->dev, sdev->pasid); mutex_unlock(&pasid_mutex); } diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c index 50ee27bbd04ec288341df8d8401a02336df9c21c..a786c034907c43ea3fd468dc39e8b4d094260252 100644 --- a/drivers/iommu/ioasid.c +++ b/drivers/iommu/ioasid.c @@ -2,7 +2,7 @@ /* * I/O Address Space ID allocator. There is one global IOASID space, split into * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and - * free IOASIDs with ioasid_alloc and ioasid_put. + * free IOASIDs with ioasid_alloc() and ioasid_free(). */ #include #include @@ -15,7 +15,6 @@ struct ioasid_data { struct ioasid_set *set; void *private; struct rcu_head rcu; - refcount_t refs; }; /* @@ -315,7 +314,6 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, data->set = set; data->private = private; - refcount_set(&data->refs, 1); /* * Custom allocator needs allocator data to perform platform specific @@ -348,34 +346,11 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, EXPORT_SYMBOL_GPL(ioasid_alloc); /** - * ioasid_get - obtain a reference to the IOASID - */ -void ioasid_get(ioasid_t ioasid) -{ - struct ioasid_data *ioasid_data; - - spin_lock(&ioasid_allocator_lock); - ioasid_data = xa_load(&active_allocator->xa, ioasid); - if (ioasid_data) - refcount_inc(&ioasid_data->refs); - else - WARN_ON(1); - spin_unlock(&ioasid_allocator_lock); -} -EXPORT_SYMBOL_GPL(ioasid_get); - -/** - * ioasid_put - Release a reference to an ioasid + * ioasid_free - Free an ioasid * @ioasid: the ID to remove - * - * Put a reference to the IOASID, free it when the number of references drops to - * zero. - * - * Return: %true if the IOASID was freed, %false otherwise. */ -bool ioasid_put(ioasid_t ioasid) +void ioasid_free(ioasid_t ioasid) { - bool free = false; struct ioasid_data *ioasid_data; spin_lock(&ioasid_allocator_lock); @@ -385,10 +360,6 @@ bool ioasid_put(ioasid_t ioasid) goto exit_unlock; } - free = refcount_dec_and_test(&ioasid_data->refs); - if (!free) - goto exit_unlock; - active_allocator->ops->free(ioasid, active_allocator->ops->pdata); /* Custom allocator needs additional steps to free the xa element */ if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) { @@ -398,9 +369,8 @@ bool ioasid_put(ioasid_t ioasid) exit_unlock: spin_unlock(&ioasid_allocator_lock); - return free; } -EXPORT_SYMBOL_GPL(ioasid_put); +EXPORT_SYMBOL_GPL(ioasid_free); /** * ioasid_find - Find IOASID data diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c index bd41405d34e9506304e0a175355bc884d89df748..10650614389605b6442b471af36546fe8d29903f 100644 --- a/drivers/iommu/iommu-sva-lib.c +++ b/drivers/iommu/iommu-sva-lib.c @@ -18,8 +18,7 @@ static DECLARE_IOASID_SET(iommu_sva_pasid); * * Try to allocate a PASID for this mm, or take a reference to the existing one * provided it fits within the [@min, @max] range. On success the PASID is - * available in mm->pasid, and must be released with iommu_sva_free_pasid(). - * @min must be greater than 0, because 0 indicates an unused mm->pasid. + * available in mm->pasid and will be available for the lifetime of the mm. * * Returns 0 on success and < 0 on error. */ @@ -33,38 +32,24 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) return -EINVAL; mutex_lock(&iommu_sva_lock); - if (mm->pasid) { - if (mm->pasid >= min && mm->pasid <= max) - ioasid_get(mm->pasid); - else + /* Is a PASID already associated with this mm? */ + if (pasid_valid(mm->pasid)) { + if (mm->pasid < min || mm->pasid >= max) ret = -EOVERFLOW; - } else { - pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); - if (pasid == INVALID_IOASID) - ret = -ENOMEM; - else - mm->pasid = pasid; + goto out; } + + pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); + if (!pasid_valid(pasid)) + ret = -ENOMEM; + else + mm_pasid_set(mm, pasid); +out: mutex_unlock(&iommu_sva_lock); return ret; } EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid); -/** - * iommu_sva_free_pasid - Release the mm's PASID - * @mm: the mm - * - * Drop one reference to a PASID allocated with iommu_sva_alloc_pasid() - */ -void iommu_sva_free_pasid(struct mm_struct *mm) -{ - mutex_lock(&iommu_sva_lock); - if (ioasid_put(mm->pasid)) - mm->pasid = 0; - mutex_unlock(&iommu_sva_lock); -} -EXPORT_SYMBOL_GPL(iommu_sva_free_pasid); - /* ioasid_find getter() requires a void * argument */ static bool __mmget_not_zero(void *mm) { diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h index 031155010ca855d1d1bbf478fbbf76d37c4f759a..8909ea1094e3a842a92e289a8573cae7859c47fa 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva-lib.h @@ -9,7 +9,6 @@ #include int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max); -void iommu_sva_free_pasid(struct mm_struct *mm); struct mm_struct *iommu_sva_find(ioasid_t pasid); /* I/O Page fault */ @@ -17,7 +16,7 @@ struct device; struct iommu_fault; struct iopf_queue; -#ifdef CONFIG_IOMMU_SVA_LIB +#ifdef CONFIG_IOMMU_SVA int iommu_queue_iopf(struct iommu_fault *fault, void *cookie); int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); @@ -28,7 +27,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); -#else /* CONFIG_IOMMU_SVA_LIB */ +#else /* CONFIG_IOMMU_SVA */ static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) { return -ENODEV; @@ -64,5 +63,5 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue) { return -ENODEV; } -#endif /* CONFIG_IOMMU_SVA_LIB */ +#endif /* CONFIG_IOMMU_SVA */ #endif /* _IOMMU_SVA_LIB_H */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index e61d16f0ede2fe6b90765db1ebedf3ffb6446121..b3d2555a794b96a4ef49b4b32f5a1bc494d584df 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -968,17 +968,6 @@ static int iommu_group_device_count(struct iommu_group *group) return ret; } -/** - * iommu_group_for_each_dev - iterate over each device in the group - * @group: the group - * @data: caller opaque data to be passed to callback function - * @fn: caller supplied callback function - * - * This function is called by group users to iterate over group devices. - * Callers should hold a reference count to the group during callback. - * The group->mutex is held across callbacks, which will block calls to - * iommu_group_add/remove_device. - */ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, int (*fn)(struct device *, void *)) { @@ -993,7 +982,17 @@ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, return ret; } - +/** + * iommu_group_for_each_dev - iterate over each device in the group + * @group: the group + * @data: caller opaque data to be passed to callback function + * @fn: caller supplied callback function + * + * This function is called by group users to iterate over group devices. + * Callers should hold a reference count to the group during callback. + * The group->mutex is held across callbacks, which will block calls to + * iommu_group_add/remove_device. + */ int iommu_group_for_each_dev(struct iommu_group *group, void *data, int (*fn)(struct device *, void *)) { @@ -3486,6 +3485,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_bind_group); * iommu_sva_bind_device() - Bind a process address space to a device * @dev: the device * @mm: the mm to bind, caller must hold a reference to it + * @drvdata: opaque data pointer to pass to bind callback * * Create a bond between device and address space, allowing the device to access * the mm using the returned PASID. If a bond already exists between @device and diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 71f29c0927fc710aa5ff65318fbc3b2702d2700e..cf5e9cb2cb264ba9c3b7054b717fafe66e9b5c2a 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1086,7 +1086,7 @@ static __maybe_unused int omap_iommu_runtime_resume(struct device *dev) } /** - * omap_iommu_suspend_prepare - prepare() dev_pm_ops implementation + * omap_iommu_prepare - prepare() dev_pm_ops implementation * @dev: iommu device * * This function performs the necessary checks to determine if the IOMMU diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c new file mode 100644 index 0000000000000000000000000000000000000000..7100ed17dcce52828f94d5b1b69a039f40b98647 --- /dev/null +++ b/drivers/iommu/sprd-iommu.c @@ -0,0 +1,577 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Unisoc IOMMU driver + * + * Copyright (C) 2020 Unisoc, Inc. + * Author: Chunyan Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SPRD_IOMMU_PAGE_SHIFT 12 +#define SPRD_IOMMU_PAGE_SIZE SZ_4K + +#define SPRD_EX_CFG 0x0 +#define SPRD_IOMMU_VAOR_BYPASS BIT(4) +#define SPRD_IOMMU_GATE_EN BIT(1) +#define SPRD_IOMMU_EN BIT(0) +#define SPRD_EX_UPDATE 0x4 +#define SPRD_EX_FIRST_VPN 0x8 +#define SPRD_EX_VPN_RANGE 0xc +#define SPRD_EX_FIRST_PPN 0x10 +#define SPRD_EX_DEFAULT_PPN 0x14 + +#define SPRD_IOMMU_VERSION 0x0 +#define SPRD_VERSION_MASK GENMASK(15, 8) +#define SPRD_VERSION_SHIFT 0x8 +#define SPRD_VAU_CFG 0x4 +#define SPRD_VAU_UPDATE 0x8 +#define SPRD_VAU_AUTH_CFG 0xc +#define SPRD_VAU_FIRST_PPN 0x10 +#define SPRD_VAU_DEFAULT_PPN_RD 0x14 +#define SPRD_VAU_DEFAULT_PPN_WR 0x18 +#define SPRD_VAU_FIRST_VPN 0x1c +#define SPRD_VAU_VPN_RANGE 0x20 + +enum sprd_iommu_version { + SPRD_IOMMU_EX, + SPRD_IOMMU_VAU, +}; + +/* + * struct sprd_iommu_device - high-level sprd IOMMU device representation, + * including hardware information and configuration, also driver data, etc + * + * @ver: sprd IOMMU IP version + * @prot_page_va: protect page base virtual address + * @prot_page_pa: protect page base physical address, data would be + * written to here while translation fault + * @base: mapped base address for accessing registers + * @dev: pointer to basic device structure + * @iommu: IOMMU core representation + * @group: IOMMU group + * @eb: gate clock which controls IOMMU access + */ +struct sprd_iommu_device { + enum sprd_iommu_version ver; + u32 *prot_page_va; + dma_addr_t prot_page_pa; + void __iomem *base; + struct device *dev; + struct iommu_device iommu; + struct iommu_group *group; + struct clk *eb; +}; + +struct sprd_iommu_domain { + spinlock_t pgtlock; /* lock for page table */ + struct iommu_domain domain; + u32 *pgt_va; /* page table virtual address base */ + dma_addr_t pgt_pa; /* page table physical address base */ + struct sprd_iommu_device *sdev; +}; + +static const struct iommu_ops sprd_iommu_ops; + +static struct sprd_iommu_domain *to_sprd_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct sprd_iommu_domain, domain); +} + +static inline void +sprd_iommu_write(struct sprd_iommu_device *sdev, unsigned int reg, u32 val) +{ + writel_relaxed(val, sdev->base + reg); +} + +static inline u32 +sprd_iommu_read(struct sprd_iommu_device *sdev, unsigned int reg) +{ + return readl_relaxed(sdev->base + reg); +} + +static inline void +sprd_iommu_update_bits(struct sprd_iommu_device *sdev, unsigned int reg, + u32 mask, u32 shift, u32 val) +{ + u32 t = sprd_iommu_read(sdev, reg); + + t = (t & (~(mask << shift))) | ((val & mask) << shift); + sprd_iommu_write(sdev, reg, t); +} + +static inline int +sprd_iommu_get_version(struct sprd_iommu_device *sdev) +{ + int ver = (sprd_iommu_read(sdev, SPRD_IOMMU_VERSION) & + SPRD_VERSION_MASK) >> SPRD_VERSION_SHIFT; + + switch (ver) { + case SPRD_IOMMU_EX: + case SPRD_IOMMU_VAU: + return ver; + default: + return -EINVAL; + } +} + +static size_t +sprd_iommu_pgt_size(struct iommu_domain *domain) +{ + return ((domain->geometry.aperture_end - + domain->geometry.aperture_start + 1) >> + SPRD_IOMMU_PAGE_SHIFT) * sizeof(u32); +} + +static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type) +{ + struct sprd_iommu_domain *dom; + + if (domain_type != IOMMU_DOMAIN_DMA && domain_type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + dom = kzalloc(sizeof(*dom), GFP_KERNEL); + if (!dom) + return NULL; + + if (iommu_get_dma_cookie(&dom->domain)) { + kfree(dom); + return NULL; + } + + spin_lock_init(&dom->pgtlock); + + dom->domain.geometry.aperture_start = 0; + dom->domain.geometry.aperture_end = SZ_256M - 1; + + return &dom->domain; +} + +static void sprd_iommu_domain_free(struct iommu_domain *domain) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + + iommu_put_dma_cookie(domain); + kfree(dom); +} + +static void sprd_iommu_first_vpn(struct sprd_iommu_domain *dom) +{ + struct sprd_iommu_device *sdev = dom->sdev; + u32 val; + unsigned int reg; + + if (sdev->ver == SPRD_IOMMU_EX) + reg = SPRD_EX_FIRST_VPN; + else + reg = SPRD_VAU_FIRST_VPN; + + val = dom->domain.geometry.aperture_start >> SPRD_IOMMU_PAGE_SHIFT; + sprd_iommu_write(sdev, reg, val); +} + +static void sprd_iommu_vpn_range(struct sprd_iommu_domain *dom) +{ + struct sprd_iommu_device *sdev = dom->sdev; + u32 val; + unsigned int reg; + + if (sdev->ver == SPRD_IOMMU_EX) + reg = SPRD_EX_VPN_RANGE; + else + reg = SPRD_VAU_VPN_RANGE; + + val = (dom->domain.geometry.aperture_end - + dom->domain.geometry.aperture_start) >> SPRD_IOMMU_PAGE_SHIFT; + sprd_iommu_write(sdev, reg, val); +} + +static void sprd_iommu_first_ppn(struct sprd_iommu_domain *dom) +{ + u32 val = dom->pgt_pa >> SPRD_IOMMU_PAGE_SHIFT; + struct sprd_iommu_device *sdev = dom->sdev; + unsigned int reg; + + if (sdev->ver == SPRD_IOMMU_EX) + reg = SPRD_EX_FIRST_PPN; + else + reg = SPRD_VAU_FIRST_PPN; + + sprd_iommu_write(sdev, reg, val); +} + +static void sprd_iommu_default_ppn(struct sprd_iommu_device *sdev) +{ + u32 val = sdev->prot_page_pa >> SPRD_IOMMU_PAGE_SHIFT; + + if (sdev->ver == SPRD_IOMMU_EX) { + sprd_iommu_write(sdev, SPRD_EX_DEFAULT_PPN, val); + } else if (sdev->ver == SPRD_IOMMU_VAU) { + sprd_iommu_write(sdev, SPRD_VAU_DEFAULT_PPN_RD, val); + sprd_iommu_write(sdev, SPRD_VAU_DEFAULT_PPN_WR, val); + } +} + +static void sprd_iommu_hw_en(struct sprd_iommu_device *sdev, bool en) +{ + unsigned int reg_cfg; + u32 mask, val; + + if (sdev->ver == SPRD_IOMMU_EX) + reg_cfg = SPRD_EX_CFG; + else + reg_cfg = SPRD_VAU_CFG; + + mask = SPRD_IOMMU_EN | SPRD_IOMMU_GATE_EN; + val = en ? mask : 0; + sprd_iommu_update_bits(sdev, reg_cfg, mask, 0, val); +} + +static int sprd_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct sprd_iommu_device *sdev = dev_iommu_priv_get(dev); + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + size_t pgt_size = sprd_iommu_pgt_size(domain); + + if (dom->sdev) { + pr_err("There's already a device attached to this domain.\n"); + return -EINVAL; + } + + dom->pgt_va = dma_alloc_coherent(sdev->dev, pgt_size, &dom->pgt_pa, GFP_KERNEL); + if (!dom->pgt_va) + return -ENOMEM; + + dom->sdev = sdev; + + sprd_iommu_first_ppn(dom); + sprd_iommu_first_vpn(dom); + sprd_iommu_vpn_range(dom); + sprd_iommu_default_ppn(sdev); + sprd_iommu_hw_en(sdev, true); + + return 0; +} + +static void sprd_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + struct sprd_iommu_device *sdev = dom->sdev; + size_t pgt_size = sprd_iommu_pgt_size(domain); + + if (!sdev) + return; + + dma_free_coherent(sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa); + sprd_iommu_hw_en(sdev, false); + dom->sdev = NULL; +} + +static int sprd_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + unsigned int page_num = size >> SPRD_IOMMU_PAGE_SHIFT; + unsigned long flags; + unsigned int i; + u32 *pgt_base_iova; + u32 pabase = (u32)paddr; + unsigned long start = domain->geometry.aperture_start; + unsigned long end = domain->geometry.aperture_end; + + if (!dom->sdev) { + pr_err("No sprd_iommu_device attached to the domain\n"); + return -EINVAL; + } + + if (iova < start || (iova + size) > (end + 1)) { + dev_err(dom->sdev->dev, "(iova(0x%lx) + sixe(%zx)) are not in the range!\n", + iova, size); + return -EINVAL; + } + + pgt_base_iova = dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT); + + spin_lock_irqsave(&dom->pgtlock, flags); + for (i = 0; i < page_num; i++) { + pgt_base_iova[i] = pabase >> SPRD_IOMMU_PAGE_SHIFT; + pabase += SPRD_IOMMU_PAGE_SIZE; + } + spin_unlock_irqrestore(&dom->pgtlock, flags); + + return 0; +} + +static size_t sprd_iommu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t size, struct iommu_iotlb_gather *iotlb_gather) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + unsigned long flags; + u32 *pgt_base_iova; + unsigned int page_num = size >> SPRD_IOMMU_PAGE_SHIFT; + unsigned long start = domain->geometry.aperture_start; + unsigned long end = domain->geometry.aperture_end; + + if (iova < start || (iova + size) > (end + 1)) + return -EINVAL; + + pgt_base_iova = dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT); + + spin_lock_irqsave(&dom->pgtlock, flags); + memset(pgt_base_iova, 0, page_num * sizeof(u32)); + spin_unlock_irqrestore(&dom->pgtlock, flags); + + return 0; +} + +static void sprd_iommu_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + unsigned int reg; + + if (dom->sdev->ver == SPRD_IOMMU_EX) + reg = SPRD_EX_UPDATE; + else + reg = SPRD_VAU_UPDATE; + + /* clear IOMMU TLB buffer after page table updated */ + sprd_iommu_write(dom->sdev, reg, 0xffffffff); +} + +static void sprd_iommu_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *iotlb_gather) +{ + sprd_iommu_sync_map(domain, 0, 0); +} + +static phys_addr_t sprd_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + unsigned long flags; + phys_addr_t pa; + unsigned long start = domain->geometry.aperture_start; + unsigned long end = domain->geometry.aperture_end; + + if (WARN_ON(iova < start || iova > end)) + return 0; + + spin_lock_irqsave(&dom->pgtlock, flags); + pa = *(dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT)); + pa = (pa << SPRD_IOMMU_PAGE_SHIFT) + ((iova - start) & (SPRD_IOMMU_PAGE_SIZE - 1)); + spin_unlock_irqrestore(&dom->pgtlock, flags); + + return pa; +} + +static struct iommu_device *sprd_iommu_probe_device(struct device *dev) +{ + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct sprd_iommu_device *sdev; + + if (!fwspec || fwspec->ops != &sprd_iommu_ops) + return ERR_PTR(-ENODEV); + + sdev = dev_iommu_priv_get(dev); + + return &sdev->iommu; +} + +static void sprd_iommu_release_device(struct device *dev) +{ + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + if (!fwspec || fwspec->ops != &sprd_iommu_ops) + return; + + iommu_fwspec_free(dev); +} + +static struct iommu_group *sprd_iommu_device_group(struct device *dev) +{ + struct sprd_iommu_device *sdev = dev_iommu_priv_get(dev); + + return iommu_group_ref_get(sdev->group); +} + +static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) +{ + struct platform_device *pdev; + + if (!dev_iommu_priv_get(dev)) { + pdev = of_find_device_by_node(args->np); + dev_iommu_priv_set(dev, platform_get_drvdata(pdev)); + platform_device_put(pdev); + } + + return 0; +} + + +static const struct iommu_ops sprd_iommu_ops = { + .domain_alloc = sprd_iommu_domain_alloc, + .domain_free = sprd_iommu_domain_free, + .attach_dev = sprd_iommu_attach_device, + .detach_dev = sprd_iommu_detach_device, + .map = sprd_iommu_map, + .unmap = sprd_iommu_unmap, + .iotlb_sync_map = sprd_iommu_sync_map, + .iotlb_sync = sprd_iommu_sync, + .iova_to_phys = sprd_iommu_iova_to_phys, + .probe_device = sprd_iommu_probe_device, + .release_device = sprd_iommu_release_device, + .device_group = sprd_iommu_device_group, + .of_xlate = sprd_iommu_of_xlate, + .pgsize_bitmap = ~0UL << SPRD_IOMMU_PAGE_SHIFT, +}; + +static const struct of_device_id sprd_iommu_of_match[] = { + { .compatible = "sprd,iommu-v1" }, + { }, +}; +MODULE_DEVICE_TABLE(of, sprd_iommu_of_match); + +/* + * Clock is not required, access to some of IOMMUs is controlled by gate + * clk, enabled clocks for that kind of IOMMUs before accessing. + * Return 0 for success or no clocks found. + */ +static int sprd_iommu_clk_enable(struct sprd_iommu_device *sdev) +{ + struct clk *eb; + + eb = clk_get_optional(sdev->dev, 0); + if (!eb) + return 0; + + if (IS_ERR(eb)) + return PTR_ERR(eb); + + sdev->eb = eb; + return clk_prepare_enable(eb); +} + +static void sprd_iommu_clk_disable(struct sprd_iommu_device *sdev) +{ + if (sdev->eb) + clk_disable_unprepare(sdev->eb); +} + +static int sprd_iommu_probe(struct platform_device *pdev) +{ + struct sprd_iommu_device *sdev; + struct device *dev = &pdev->dev; + void __iomem *base; + int ret; + + sdev = devm_kzalloc(dev, sizeof(*sdev), GFP_KERNEL); + if (!sdev) + return -ENOMEM; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) { + dev_err(dev, "Failed to get ioremap resource.\n"); + return PTR_ERR(base); + } + sdev->base = base; + + sdev->prot_page_va = dma_alloc_coherent(dev, SPRD_IOMMU_PAGE_SIZE, + &sdev->prot_page_pa, GFP_KERNEL); + if (!sdev->prot_page_va) + return -ENOMEM; + + platform_set_drvdata(pdev, sdev); + sdev->dev = dev; + + /* All the client devices are in the same iommu-group */ + sdev->group = iommu_group_alloc(); + if (IS_ERR(sdev->group)) { + ret = PTR_ERR(sdev->group); + goto free_page; + } + + ret = iommu_device_sysfs_add(&sdev->iommu, dev, NULL, dev_name(dev)); + if (ret) + goto put_group; + + iommu_device_set_ops(&sdev->iommu, &sprd_iommu_ops); + iommu_device_set_fwnode(&sdev->iommu, &dev->of_node->fwnode); + + ret = iommu_device_register(&sdev->iommu); + if (ret) + goto remove_sysfs; + + if (!iommu_present(&platform_bus_type)) + bus_set_iommu(&platform_bus_type, &sprd_iommu_ops); + + ret = sprd_iommu_clk_enable(sdev); + if (ret) + goto unregister_iommu; + + ret = sprd_iommu_get_version(sdev); + if (ret < 0) { + dev_err(dev, "IOMMU version(%d) is invalid.\n", ret); + goto disable_clk; + } + sdev->ver = ret; + + return 0; + +disable_clk: + sprd_iommu_clk_disable(sdev); +unregister_iommu: + iommu_device_unregister(&sdev->iommu); +remove_sysfs: + iommu_device_sysfs_remove(&sdev->iommu); +put_group: + iommu_group_put(sdev->group); +free_page: + dma_free_coherent(sdev->dev, SPRD_IOMMU_PAGE_SIZE, sdev->prot_page_va, sdev->prot_page_pa); + return ret; +} + +static int sprd_iommu_remove(struct platform_device *pdev) +{ + struct sprd_iommu_device *sdev = platform_get_drvdata(pdev); + + dma_free_coherent(sdev->dev, SPRD_IOMMU_PAGE_SIZE, sdev->prot_page_va, sdev->prot_page_pa); + + iommu_group_put(sdev->group); + sdev->group = NULL; + + bus_set_iommu(&platform_bus_type, NULL); + + platform_set_drvdata(pdev, NULL); + iommu_device_sysfs_remove(&sdev->iommu); + iommu_device_unregister(&sdev->iommu); + + return 0; +} + +static struct platform_driver sprd_iommu_driver = { + .driver = { + .name = "sprd-iommu", + .of_match_table = sprd_iommu_of_match, + .suppress_bind_attrs = true, + }, + .probe = sprd_iommu_probe, + .remove = sprd_iommu_remove, +}; +module_platform_driver(sprd_iommu_driver); + +MODULE_DESCRIPTION("IOMMU driver for Unisoc SoCs"); +MODULE_ALIAS("platform:sprd-iommu"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 15680c3c92792aaca3b3b8e83b8b4e87bf82d1fe..3f9f84f9f2880c68c8cf0e0cac511585d7aa401e 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -699,6 +699,16 @@ config MFD_INTEL_PMC_BXT Register and P-unit access. In addition this creates devices for iTCO watchdog and telemetry that are part of the PMC. +config MFD_INTEL_PMT + tristate "Intel Platform Monitoring Technology (PMT) support" + depends on PCI + select MFD_CORE + help + The Intel Platform Monitoring Technology (PMT) is an interface that + provides access to hardware monitor registers. This driver supports + Telemetry, Watcher, and Crashlog PMT capabilities/devices for + platforms starting from Tiger Lake. + config MFD_IPAQ_MICRO bool "Atmel Micro ASIC (iPAQ h3100/h3600/h3700) Support" depends on SA1100_H3100 || SA1100_H3600 diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index fb1df45a301e5d0d1ed21eaeb15419f79ede0558..ce8f1c0583d5ccf741f663bcb7d3563d09767b42 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -216,6 +216,7 @@ obj-$(CONFIG_MFD_INTEL_LPSS_PCI) += intel-lpss-pci.o obj-$(CONFIG_MFD_INTEL_LPSS_ACPI) += intel-lpss-acpi.o obj-$(CONFIG_MFD_INTEL_MSIC) += intel_msic.o obj-$(CONFIG_MFD_INTEL_PMC_BXT) += intel_pmc_bxt.o +obj-$(CONFIG_MFD_INTEL_PMT) += intel_pmt.o obj-$(CONFIG_MFD_PALMAS) += palmas.o obj-$(CONFIG_MFD_VIPERBOARD) += viperboard.o obj-$(CONFIG_MFD_RC5T583) += rc5t583.o rc5t583-irq.o diff --git a/drivers/mfd/intel_pmt.c b/drivers/mfd/intel_pmt.c new file mode 100644 index 0000000000000000000000000000000000000000..dd7eb614c28e47d56fd88fb80cf501e0e67fd12b --- /dev/null +++ b/drivers/mfd/intel_pmt.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Platform Monitoring Technology PMT driver + * + * Copyright (c) 2020, Intel Corporation. + * All Rights Reserved. + * + * Author: David E. Box + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Intel DVSEC capability vendor space offsets */ +#define INTEL_DVSEC_ENTRIES 0xA +#define INTEL_DVSEC_SIZE 0xB +#define INTEL_DVSEC_TABLE 0xC +#define INTEL_DVSEC_TABLE_BAR(x) ((x) & GENMASK(2, 0)) +#define INTEL_DVSEC_TABLE_OFFSET(x) ((x) & GENMASK(31, 3)) +#define INTEL_DVSEC_ENTRY_SIZE 4 + +/* PMT capabilities */ +#define DVSEC_INTEL_ID_TELEMETRY 2 +#define DVSEC_INTEL_ID_WATCHER 3 +#define DVSEC_INTEL_ID_CRASHLOG 4 + +struct intel_dvsec_header { + u16 length; + u16 id; + u8 num_entries; + u8 entry_size; + u8 tbir; + u32 offset; +}; + +enum pmt_quirks { + /* Watcher capability not supported */ + PMT_QUIRK_NO_WATCHER = BIT(0), + + /* Crashlog capability not supported */ + PMT_QUIRK_NO_CRASHLOG = BIT(1), + + /* Use shift instead of mask to read discovery table offset */ + PMT_QUIRK_TABLE_SHIFT = BIT(2), + + /* DVSEC not present (provided in driver data) */ + PMT_QUIRK_NO_DVSEC = BIT(3), +}; + +struct pmt_platform_info { + unsigned long quirks; + struct intel_dvsec_header **capabilities; +}; + +static const struct pmt_platform_info tgl_info = { + .quirks = PMT_QUIRK_NO_WATCHER | PMT_QUIRK_NO_CRASHLOG | + PMT_QUIRK_TABLE_SHIFT, +}; + +/* DG1 Platform with DVSEC quirk*/ +static struct intel_dvsec_header dg1_telemetry = { + .length = 0x10, + .id = 2, + .num_entries = 1, + .entry_size = 3, + .tbir = 0, + .offset = 0x466000, +}; + +static struct intel_dvsec_header *dg1_capabilities[] = { + &dg1_telemetry, + NULL +}; + +static const struct pmt_platform_info dg1_info = { + .quirks = PMT_QUIRK_NO_DVSEC, + .capabilities = dg1_capabilities, +}; + +static int pmt_add_dev(struct pci_dev *pdev, struct intel_dvsec_header *header, + unsigned long quirks) +{ + struct device *dev = &pdev->dev; + struct resource *res, *tmp; + struct mfd_cell *cell; + const char *name; + int count = header->num_entries; + int size = header->entry_size; + int id = header->id; + int i; + + switch (id) { + case DVSEC_INTEL_ID_TELEMETRY: + name = "pmt_telemetry"; + break; + case DVSEC_INTEL_ID_WATCHER: + if (quirks & PMT_QUIRK_NO_WATCHER) { + dev_info(dev, "Watcher not supported\n"); + return -EINVAL; + } + name = "pmt_watcher"; + break; + case DVSEC_INTEL_ID_CRASHLOG: + if (quirks & PMT_QUIRK_NO_CRASHLOG) { + dev_info(dev, "Crashlog not supported\n"); + return -EINVAL; + } + name = "pmt_crashlog"; + break; + default: + return -EINVAL; + } + + if (!header->num_entries || !header->entry_size) { + dev_err(dev, "Invalid count or size for %s header\n", name); + return -EINVAL; + } + + cell = devm_kzalloc(dev, sizeof(*cell), GFP_KERNEL); + if (!cell) + return -ENOMEM; + + res = devm_kcalloc(dev, count, sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + if (quirks & PMT_QUIRK_TABLE_SHIFT) + header->offset >>= 3; + + /* + * The PMT DVSEC contains the starting offset and count for a block of + * discovery tables, each providing access to monitoring facilities for + * a section of the device. Create a resource list of these tables to + * provide to the driver. + */ + for (i = 0, tmp = res; i < count; i++, tmp++) { + tmp->start = pdev->resource[header->tbir].start + + header->offset + i * (size << 2); + tmp->end = tmp->start + (size << 2) - 1; + tmp->flags = IORESOURCE_MEM; + } + + cell->resources = res; + cell->num_resources = count; + cell->name = name; + + return devm_mfd_add_devices(dev, PLATFORM_DEVID_AUTO, cell, 1, NULL, 0, + NULL); +} + +static int pmt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct pmt_platform_info *info; + unsigned long quirks = 0; + bool found_devices = false; + int ret, pos = 0; + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + info = (struct pmt_platform_info *)id->driver_data; + + if (info) + quirks = info->quirks; + + if (info && (info->quirks & PMT_QUIRK_NO_DVSEC)) { + struct intel_dvsec_header **header; + + header = info->capabilities; + while (*header) { + ret = pmt_add_dev(pdev, *header, quirks); + if (ret) + dev_warn(&pdev->dev, + "Failed to add device for DVSEC id %d\n", + (*header)->id); + else + found_devices = true; + + ++header; + } + } else { + do { + struct intel_dvsec_header header; + u32 table; + u16 vid; + + pos = pci_find_next_ext_capability(pdev, pos, PCI_EXT_CAP_ID_DVSEC); + if (!pos) + break; + + pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vid); + if (vid != PCI_VENDOR_ID_INTEL) + continue; + + pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, + &header.id); + pci_read_config_byte(pdev, pos + INTEL_DVSEC_ENTRIES, + &header.num_entries); + pci_read_config_byte(pdev, pos + INTEL_DVSEC_SIZE, + &header.entry_size); + pci_read_config_dword(pdev, pos + INTEL_DVSEC_TABLE, + &table); + + header.tbir = INTEL_DVSEC_TABLE_BAR(table); + header.offset = INTEL_DVSEC_TABLE_OFFSET(table); + + ret = pmt_add_dev(pdev, &header, quirks); + if (ret) + continue; + + found_devices = true; + } while (true); + } + + if (!found_devices) + return -ENODEV; + + pm_runtime_put(&pdev->dev); + pm_runtime_allow(&pdev->dev); + + return 0; +} + +static void pmt_pci_remove(struct pci_dev *pdev) +{ + pm_runtime_forbid(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); +} + +#define PCI_DEVICE_ID_INTEL_PMT_ADL 0x467d +#define PCI_DEVICE_ID_INTEL_PMT_DG1 0x490e +#define PCI_DEVICE_ID_INTEL_PMT_OOBMSM 0x09a7 +#define PCI_DEVICE_ID_INTEL_PMT_TGL 0x9a0d +static const struct pci_device_id pmt_pci_ids[] = { + { PCI_DEVICE_DATA(INTEL, PMT_ADL, &tgl_info) }, + { PCI_DEVICE_DATA(INTEL, PMT_DG1, &dg1_info) }, + { PCI_DEVICE_DATA(INTEL, PMT_OOBMSM, NULL) }, + { PCI_DEVICE_DATA(INTEL, PMT_TGL, &tgl_info) }, + { } +}; +MODULE_DEVICE_TABLE(pci, pmt_pci_ids); + +static struct pci_driver pmt_pci_driver = { + .name = "intel-pmt", + .id_table = pmt_pci_ids, + .probe = pmt_pci_probe, + .remove = pmt_pci_remove, +}; +module_pci_driver(pmt_pci_driver); + +MODULE_AUTHOR("David E. Box "); +MODULE_DESCRIPTION("Intel Platform Monitoring Technology PMT driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index a1858689d6e10b499ddbae04cc4f92b49a9999f9..a24783aa52eae475a78ffc768b93c95eca457c9e 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1362,6 +1362,42 @@ config INTEL_PMC_CORE - LTR Ignore - MPHY/PLL gating status (Sunrisepoint PCH only) +config INTEL_PMT_CLASS + tristate + help + The Intel Platform Monitoring Technology (PMT) class driver provides + the basic sysfs interface and file hierarchy uses by PMT devices. + + For more information, see: + + + To compile this driver as a module, choose M here: the module + will be called intel_pmt_class. + +config INTEL_PMT_TELEMETRY + tristate "Intel Platform Monitoring Technology (PMT) Telemetry driver" + depends on MFD_INTEL_PMT + select INTEL_PMT_CLASS + help + The Intel Platform Monitory Technology (PMT) Telemetry driver provides + access to hardware telemetry metrics on devices that support the + feature. + + To compile this driver as a module, choose M here: the module + will be called intel_pmt_telemetry. + +config INTEL_PMT_CRASHLOG + tristate "Intel Platform Monitoring Technology (PMT) Crashlog driver" + depends on MFD_INTEL_PMT + select INTEL_PMT_CLASS + help + The Intel Platform Monitoring Technology (PMT) crashlog driver provides + access to hardware crashlog capabilities on devices that support the + feature. + + To compile this driver as a module, choose M here: the module + will be called intel_pmt_crashlog. + config INTEL_PUNIT_IPC tristate "Intel P-Unit IPC Driver" help diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index 5f823f7eff45280248f01197cd086fc81d3de66b..ca82c1344977caf10a56f6602315a5c54c3d8716 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -140,6 +140,9 @@ obj-$(CONFIG_INTEL_MFLD_THERMAL) += intel_mid_thermal.o obj-$(CONFIG_INTEL_MID_POWER_BUTTON) += intel_mid_powerbtn.o obj-$(CONFIG_INTEL_MRFLD_PWRBTN) += intel_mrfld_pwrbtn.o obj-$(CONFIG_INTEL_PMC_CORE) += intel_pmc_core.o intel_pmc_core_pltdrv.o +obj-$(CONFIG_INTEL_PMT_CLASS) += intel_pmt_class.o +obj-$(CONFIG_INTEL_PMT_TELEMETRY) += intel_pmt_telemetry.o +obj-$(CONFIG_INTEL_PMT_CRASHLOG) += intel_pmt_crashlog.o obj-$(CONFIG_INTEL_PUNIT_IPC) += intel_punit_ipc.o obj-$(CONFIG_INTEL_SCU_IPC) += intel_scu_ipc.o obj-$(CONFIG_INTEL_SCU_PCI) += intel_scu_pcidrv.o diff --git a/drivers/platform/x86/intel_pmt_class.c b/drivers/platform/x86/intel_pmt_class.c new file mode 100644 index 0000000000000000000000000000000000000000..c86ff15b1ed522be06f2c670a2f62c3bef70aa7d --- /dev/null +++ b/drivers/platform/x86/intel_pmt_class.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Platform Monitory Technology Telemetry driver + * + * Copyright (c) 2020, Intel Corporation. + * All Rights Reserved. + * + * Author: "Alexander Duyck" + */ + +#include +#include +#include +#include + +#include "intel_pmt_class.h" + +#define PMT_XA_START 0 +#define PMT_XA_MAX INT_MAX +#define PMT_XA_LIMIT XA_LIMIT(PMT_XA_START, PMT_XA_MAX) + +/* + * Early implementations of PMT on client platforms have some + * differences from the server platforms (which use the Out Of Band + * Management Services Module OOBMSM). This list tracks those + * platforms as needed to handle those differences. Newer client + * platforms are expected to be fully compatible with server. + */ +static const struct pci_device_id pmt_telem_early_client_pci_ids[] = { + { PCI_VDEVICE(INTEL, 0x467d) }, /* ADL */ + { PCI_VDEVICE(INTEL, 0x490e) }, /* DG1 */ + { PCI_VDEVICE(INTEL, 0x9a0d) }, /* TGL */ + { } +}; + +bool intel_pmt_is_early_client_hw(struct device *dev) +{ + struct pci_dev *parent = to_pci_dev(dev->parent); + + return !!pci_match_id(pmt_telem_early_client_pci_ids, parent); +} +EXPORT_SYMBOL_GPL(intel_pmt_is_early_client_hw); + +/* + * sysfs + */ +static ssize_t +intel_pmt_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, + size_t count) +{ + struct intel_pmt_entry *entry = container_of(attr, + struct intel_pmt_entry, + pmt_bin_attr); + + if (off < 0) + return -EINVAL; + + if (off >= entry->size) + return 0; + + if (count > entry->size - off) + count = entry->size - off; + + memcpy_fromio(buf, entry->base + off, count); + + return count; +} + +static int +intel_pmt_mmap(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, struct vm_area_struct *vma) +{ + struct intel_pmt_entry *entry = container_of(attr, + struct intel_pmt_entry, + pmt_bin_attr); + unsigned long vsize = vma->vm_end - vma->vm_start; + struct device *dev = kobj_to_dev(kobj); + unsigned long phys = entry->base_addr; + unsigned long pfn = PFN_DOWN(phys); + unsigned long psize; + + if (vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) + return -EROFS; + + psize = (PFN_UP(entry->base_addr + entry->size) - pfn) * PAGE_SIZE; + if (vsize > psize) { + dev_err(dev, "Requested mmap size is too large\n"); + return -EINVAL; + } + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, pfn, + vsize, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static ssize_t +guid_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct intel_pmt_entry *entry = dev_get_drvdata(dev); + + return sprintf(buf, "0x%x\n", entry->guid); +} +static DEVICE_ATTR_RO(guid); + +static ssize_t size_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct intel_pmt_entry *entry = dev_get_drvdata(dev); + + return sprintf(buf, "%zu\n", entry->size); +} +static DEVICE_ATTR_RO(size); + +static ssize_t +offset_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct intel_pmt_entry *entry = dev_get_drvdata(dev); + + return sprintf(buf, "%lu\n", offset_in_page(entry->base_addr)); +} +static DEVICE_ATTR_RO(offset); + +static struct attribute *intel_pmt_attrs[] = { + &dev_attr_guid.attr, + &dev_attr_size.attr, + &dev_attr_offset.attr, + NULL +}; +ATTRIBUTE_GROUPS(intel_pmt); + +static struct class intel_pmt_class = { + .name = "intel_pmt", + .owner = THIS_MODULE, + .dev_groups = intel_pmt_groups, +}; + +static int intel_pmt_populate_entry(struct intel_pmt_entry *entry, + struct intel_pmt_header *header, + struct device *dev, + struct resource *disc_res) +{ + struct pci_dev *pci_dev = to_pci_dev(dev->parent); + u8 bir; + + /* + * The base offset should always be 8 byte aligned. + * + * For non-local access types the lower 3 bits of base offset + * contains the index of the base address register where the + * telemetry can be found. + */ + bir = GET_BIR(header->base_offset); + + /* Local access and BARID only for now */ + switch (header->access_type) { + case ACCESS_LOCAL: + if (bir) { + dev_err(dev, + "Unsupported BAR index %d for access type %d\n", + bir, header->access_type); + return -EINVAL; + } + /* + * For access_type LOCAL, the base address is as follows: + * base address = end of discovery region + base offset + */ + entry->base_addr = disc_res->end + 1 + header->base_offset; + + /* + * Some hardware use a different calculation for the base address + * when access_type == ACCESS_LOCAL. On the these systems + * ACCCESS_LOCAL refers to an address in the same BAR as the + * header but at a fixed offset. But as the header address was + * supplied to the driver, we don't know which BAR it was in. + * So search for the bar whose range includes the header address. + */ + if (intel_pmt_is_early_client_hw(dev)) { + int i; + + entry->base_addr = 0; + for (i = 0; i < 6; i++) + if (disc_res->start >= pci_resource_start(pci_dev, i) && + (disc_res->start <= pci_resource_end(pci_dev, i))) { + entry->base_addr = pci_resource_start(pci_dev, i) + + header->base_offset; + break; + } + if (!entry->base_addr) + return -EINVAL; + } + + break; + case ACCESS_BARID: + /* + * If another BAR was specified then the base offset + * represents the offset within that BAR. SO retrieve the + * address from the parent PCI device and add offset. + */ + entry->base_addr = pci_resource_start(pci_dev, bir) + + GET_ADDRESS(header->base_offset); + break; + default: + dev_err(dev, "Unsupported access type %d\n", + header->access_type); + return -EINVAL; + } + + entry->guid = header->guid; + entry->size = header->size; + + return 0; +} + +static int intel_pmt_dev_register(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns, + struct device *parent) +{ + struct resource res = {0}; + struct device *dev; + int ret; + + ret = xa_alloc(ns->xa, &entry->devid, entry, PMT_XA_LIMIT, GFP_KERNEL); + if (ret) + return ret; + + dev = device_create(&intel_pmt_class, parent, MKDEV(0, 0), entry, + "%s%d", ns->name, entry->devid); + + if (IS_ERR(dev)) { + dev_err(parent, "Could not create %s%d device node\n", + ns->name, entry->devid); + ret = PTR_ERR(dev); + goto fail_dev_create; + } + + entry->kobj = &dev->kobj; + + if (ns->attr_grp) { + ret = sysfs_create_group(entry->kobj, ns->attr_grp); + if (ret) + goto fail_sysfs; + } + + /* if size is 0 assume no data buffer, so no file needed */ + if (!entry->size) + return 0; + + res.start = entry->base_addr; + res.end = res.start + entry->size - 1; + res.flags = IORESOURCE_MEM; + + entry->base = devm_ioremap_resource(dev, &res); + if (IS_ERR(entry->base)) { + ret = PTR_ERR(entry->base); + goto fail_ioremap; + } + + sysfs_bin_attr_init(&entry->pmt_bin_attr); + entry->pmt_bin_attr.attr.name = ns->name; + entry->pmt_bin_attr.attr.mode = 0440; + entry->pmt_bin_attr.mmap = intel_pmt_mmap; + entry->pmt_bin_attr.read = intel_pmt_read; + entry->pmt_bin_attr.size = entry->size; + + ret = sysfs_create_bin_file(&dev->kobj, &entry->pmt_bin_attr); + if (!ret) + return 0; + +fail_ioremap: + if (ns->attr_grp) + sysfs_remove_group(entry->kobj, ns->attr_grp); +fail_sysfs: + device_unregister(dev); +fail_dev_create: + xa_erase(ns->xa, entry->devid); + + return ret; +} + +int intel_pmt_dev_create(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns, + struct platform_device *pdev, int idx) +{ + struct intel_pmt_header header; + struct resource *disc_res; + int ret = -ENODEV; + + disc_res = platform_get_resource(pdev, IORESOURCE_MEM, idx); + if (!disc_res) + return ret; + + entry->disc_table = devm_platform_ioremap_resource(pdev, idx); + if (IS_ERR(entry->disc_table)) + return PTR_ERR(entry->disc_table); + + ret = ns->pmt_header_decode(entry, &header, &pdev->dev); + if (ret) + return ret; + + ret = intel_pmt_populate_entry(entry, &header, &pdev->dev, disc_res); + if (ret) + return ret; + + return intel_pmt_dev_register(entry, ns, &pdev->dev); + +} +EXPORT_SYMBOL_GPL(intel_pmt_dev_create); + +void intel_pmt_dev_destroy(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns) +{ + struct device *dev = kobj_to_dev(entry->kobj); + + if (entry->size) + sysfs_remove_bin_file(entry->kobj, &entry->pmt_bin_attr); + + if (ns->attr_grp) + sysfs_remove_group(entry->kobj, ns->attr_grp); + + device_unregister(dev); + xa_erase(ns->xa, entry->devid); +} +EXPORT_SYMBOL_GPL(intel_pmt_dev_destroy); + +static int __init pmt_class_init(void) +{ + return class_register(&intel_pmt_class); +} + +static void __exit pmt_class_exit(void) +{ + class_unregister(&intel_pmt_class); +} + +module_init(pmt_class_init); +module_exit(pmt_class_exit); + +MODULE_AUTHOR("Alexander Duyck "); +MODULE_DESCRIPTION("Intel PMT Class driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/intel_pmt_class.h b/drivers/platform/x86/intel_pmt_class.h new file mode 100644 index 0000000000000000000000000000000000000000..1337019c2873eb37b3abda073700228437bb0b36 --- /dev/null +++ b/drivers/platform/x86/intel_pmt_class.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _INTEL_PMT_CLASS_H +#define _INTEL_PMT_CLASS_H + +#include +#include +#include +#include +#include +#include + +/* PMT access types */ +#define ACCESS_BARID 2 +#define ACCESS_LOCAL 3 + +/* PMT discovery base address/offset register layout */ +#define GET_BIR(v) ((v) & GENMASK(2, 0)) +#define GET_ADDRESS(v) ((v) & GENMASK(31, 3)) + +struct intel_pmt_entry { + struct bin_attribute pmt_bin_attr; + struct kobject *kobj; + void __iomem *disc_table; + void __iomem *base; + unsigned long base_addr; + size_t size; + u32 guid; + int devid; +}; + +struct intel_pmt_header { + u32 base_offset; + u32 size; + u32 guid; + u8 access_type; +}; + +struct intel_pmt_namespace { + const char *name; + struct xarray *xa; + const struct attribute_group *attr_grp; + int (*pmt_header_decode)(struct intel_pmt_entry *entry, + struct intel_pmt_header *header, + struct device *dev); +}; + +bool intel_pmt_is_early_client_hw(struct device *dev); +int intel_pmt_dev_create(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns, + struct platform_device *pdev, int idx); +void intel_pmt_dev_destroy(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns); +#endif diff --git a/drivers/platform/x86/intel_pmt_crashlog.c b/drivers/platform/x86/intel_pmt_crashlog.c new file mode 100644 index 0000000000000000000000000000000000000000..56963ceb6345f88408be9d686ac7f1c3fa057251 --- /dev/null +++ b/drivers/platform/x86/intel_pmt_crashlog.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Platform Monitoring Technology Crashlog driver + * + * Copyright (c) 2020, Intel Corporation. + * All Rights Reserved. + * + * Author: "Alexander Duyck" + */ + +#include +#include +#include +#include +#include +#include + +#include "intel_pmt_class.h" + +#define DRV_NAME "pmt_crashlog" + +/* Crashlog discovery header types */ +#define CRASH_TYPE_OOBMSM 1 + +/* Control Flags */ +#define CRASHLOG_FLAG_DISABLE BIT(28) + +/* + * Bits 29 and 30 control the state of bit 31. + * + * Bit 29 will clear bit 31, if set, allowing a new crashlog to be captured. + * Bit 30 will immediately trigger a crashlog to be generated, setting bit 31. + * Bit 31 is the read-only status with a 1 indicating log is complete. + */ +#define CRASHLOG_FLAG_TRIGGER_CLEAR BIT(29) +#define CRASHLOG_FLAG_TRIGGER_EXECUTE BIT(30) +#define CRASHLOG_FLAG_TRIGGER_COMPLETE BIT(31) +#define CRASHLOG_FLAG_TRIGGER_MASK GENMASK(31, 28) + +/* Crashlog Discovery Header */ +#define CONTROL_OFFSET 0x0 +#define GUID_OFFSET 0x4 +#define BASE_OFFSET 0x8 +#define SIZE_OFFSET 0xC +#define GET_ACCESS(v) ((v) & GENMASK(3, 0)) +#define GET_TYPE(v) (((v) & GENMASK(7, 4)) >> 4) +#define GET_VERSION(v) (((v) & GENMASK(19, 16)) >> 16) +/* size is in bytes */ +#define GET_SIZE(v) ((v) * sizeof(u32)) + +struct crashlog_entry { + /* entry must be first member of struct */ + struct intel_pmt_entry entry; + struct mutex control_mutex; +}; + +struct pmt_crashlog_priv { + int num_entries; + struct crashlog_entry entry[]; +}; + +/* + * I/O + */ +static bool pmt_crashlog_complete(struct intel_pmt_entry *entry) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + /* return current value of the crashlog complete flag */ + return !!(control & CRASHLOG_FLAG_TRIGGER_COMPLETE); +} + +static bool pmt_crashlog_disabled(struct intel_pmt_entry *entry) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + /* return current value of the crashlog disabled flag */ + return !!(control & CRASHLOG_FLAG_DISABLE); +} + +static bool pmt_crashlog_supported(struct intel_pmt_entry *entry) +{ + u32 discovery_header = readl(entry->disc_table + CONTROL_OFFSET); + u32 crash_type, version; + + crash_type = GET_TYPE(discovery_header); + version = GET_VERSION(discovery_header); + + /* + * Currently we only recognize OOBMSM version 0 devices. + * We can ignore all other crashlog devices in the system. + */ + return crash_type == CRASH_TYPE_OOBMSM && version == 0; +} + +static void pmt_crashlog_set_disable(struct intel_pmt_entry *entry, + bool disable) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + /* clear trigger bits so we are only modifying disable flag */ + control &= ~CRASHLOG_FLAG_TRIGGER_MASK; + + if (disable) + control |= CRASHLOG_FLAG_DISABLE; + else + control &= ~CRASHLOG_FLAG_DISABLE; + + writel(control, entry->disc_table + CONTROL_OFFSET); +} + +static void pmt_crashlog_set_clear(struct intel_pmt_entry *entry) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + control &= ~CRASHLOG_FLAG_TRIGGER_MASK; + control |= CRASHLOG_FLAG_TRIGGER_CLEAR; + + writel(control, entry->disc_table + CONTROL_OFFSET); +} + +static void pmt_crashlog_set_execute(struct intel_pmt_entry *entry) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + control &= ~CRASHLOG_FLAG_TRIGGER_MASK; + control |= CRASHLOG_FLAG_TRIGGER_EXECUTE; + + writel(control, entry->disc_table + CONTROL_OFFSET); +} + +/* + * sysfs + */ +static ssize_t +enable_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct intel_pmt_entry *entry = dev_get_drvdata(dev); + int enabled = !pmt_crashlog_disabled(entry); + + return sprintf(buf, "%d\n", enabled); +} + +static ssize_t +enable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct crashlog_entry *entry; + bool enabled; + int result; + + entry = dev_get_drvdata(dev); + + result = kstrtobool(buf, &enabled); + if (result) + return result; + + mutex_lock(&entry->control_mutex); + pmt_crashlog_set_disable(&entry->entry, !enabled); + mutex_unlock(&entry->control_mutex); + + return count; +} +static DEVICE_ATTR_RW(enable); + +static ssize_t +trigger_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct intel_pmt_entry *entry; + int trigger; + + entry = dev_get_drvdata(dev); + trigger = pmt_crashlog_complete(entry); + + return sprintf(buf, "%d\n", trigger); +} + +static ssize_t +trigger_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct crashlog_entry *entry; + bool trigger; + int result; + + entry = dev_get_drvdata(dev); + + result = kstrtobool(buf, &trigger); + if (result) + return result; + + mutex_lock(&entry->control_mutex); + + if (!trigger) { + pmt_crashlog_set_clear(&entry->entry); + } else if (pmt_crashlog_complete(&entry->entry)) { + /* we cannot trigger a new crash if one is still pending */ + result = -EEXIST; + goto err; + } else if (pmt_crashlog_disabled(&entry->entry)) { + /* if device is currently disabled, return busy */ + result = -EBUSY; + goto err; + } else { + pmt_crashlog_set_execute(&entry->entry); + } + + result = count; +err: + mutex_unlock(&entry->control_mutex); + return result; +} +static DEVICE_ATTR_RW(trigger); + +static struct attribute *pmt_crashlog_attrs[] = { + &dev_attr_enable.attr, + &dev_attr_trigger.attr, + NULL +}; + +static const struct attribute_group pmt_crashlog_group = { + .attrs = pmt_crashlog_attrs, +}; + +static int pmt_crashlog_header_decode(struct intel_pmt_entry *entry, + struct intel_pmt_header *header, + struct device *dev) +{ + void __iomem *disc_table = entry->disc_table; + struct crashlog_entry *crashlog; + + if (!pmt_crashlog_supported(entry)) + return 1; + + /* initialize control mutex */ + crashlog = container_of(entry, struct crashlog_entry, entry); + mutex_init(&crashlog->control_mutex); + + header->access_type = GET_ACCESS(readl(disc_table)); + header->guid = readl(disc_table + GUID_OFFSET); + header->base_offset = readl(disc_table + BASE_OFFSET); + + /* Size is measured in DWORDS, but accessor returns bytes */ + header->size = GET_SIZE(readl(disc_table + SIZE_OFFSET)); + + return 0; +} + +static DEFINE_XARRAY_ALLOC(crashlog_array); +static struct intel_pmt_namespace pmt_crashlog_ns = { + .name = "crashlog", + .xa = &crashlog_array, + .attr_grp = &pmt_crashlog_group, + .pmt_header_decode = pmt_crashlog_header_decode, +}; + +/* + * initialization + */ +static int pmt_crashlog_remove(struct platform_device *pdev) +{ + struct pmt_crashlog_priv *priv = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < priv->num_entries; i++) + intel_pmt_dev_destroy(&priv->entry[i].entry, &pmt_crashlog_ns); + + return 0; +} + +static int pmt_crashlog_probe(struct platform_device *pdev) +{ + struct pmt_crashlog_priv *priv; + size_t size; + int i, ret; + + size = struct_size(priv, entry, pdev->num_resources); + priv = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + if (!priv) + return -ENOMEM; + + platform_set_drvdata(pdev, priv); + + for (i = 0; i < pdev->num_resources; i++) { + struct intel_pmt_entry *entry = &priv->entry[i].entry; + + ret = intel_pmt_dev_create(entry, &pmt_crashlog_ns, pdev, i); + if (ret < 0) + goto abort_probe; + if (ret) + continue; + + priv->num_entries++; + } + + return 0; +abort_probe: + pmt_crashlog_remove(pdev); + return ret; +} + +static struct platform_driver pmt_crashlog_driver = { + .driver = { + .name = DRV_NAME, + }, + .remove = pmt_crashlog_remove, + .probe = pmt_crashlog_probe, +}; + +static int __init pmt_crashlog_init(void) +{ + return platform_driver_register(&pmt_crashlog_driver); +} + +static void __exit pmt_crashlog_exit(void) +{ + platform_driver_unregister(&pmt_crashlog_driver); + xa_destroy(&crashlog_array); +} + +module_init(pmt_crashlog_init); +module_exit(pmt_crashlog_exit); + +MODULE_AUTHOR("Alexander Duyck "); +MODULE_DESCRIPTION("Intel PMT Crashlog driver"); +MODULE_ALIAS("platform:" DRV_NAME); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/intel_pmt_telemetry.c b/drivers/platform/x86/intel_pmt_telemetry.c new file mode 100644 index 0000000000000000000000000000000000000000..9f845e70a1f84f9d54fbf07896a786ecc74b7307 --- /dev/null +++ b/drivers/platform/x86/intel_pmt_telemetry.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Platform Monitory Technology Telemetry driver + * + * Copyright (c) 2020, Intel Corporation. + * All Rights Reserved. + * + * Author: "David E. Box" + */ + +#include +#include +#include +#include +#include +#include + +#include "intel_pmt_class.h" + +#define TELEM_DEV_NAME "pmt_telemetry" + +#define TELEM_SIZE_OFFSET 0x0 +#define TELEM_GUID_OFFSET 0x4 +#define TELEM_BASE_OFFSET 0x8 +#define TELEM_ACCESS(v) ((v) & GENMASK(3, 0)) +/* size is in bytes */ +#define TELEM_SIZE(v) (((v) & GENMASK(27, 12)) >> 10) + +/* Used by client hardware to identify a fixed telemetry entry*/ +#define TELEM_CLIENT_FIXED_BLOCK_GUID 0x10000000 + +struct pmt_telem_priv { + int num_entries; + struct intel_pmt_entry entry[]; +}; + +static bool pmt_telem_region_overlaps(struct intel_pmt_entry *entry, + struct device *dev) +{ + u32 guid = readl(entry->disc_table + TELEM_GUID_OFFSET); + + if (guid != TELEM_CLIENT_FIXED_BLOCK_GUID) + return false; + + return intel_pmt_is_early_client_hw(dev); +} + +static int pmt_telem_header_decode(struct intel_pmt_entry *entry, + struct intel_pmt_header *header, + struct device *dev) +{ + void __iomem *disc_table = entry->disc_table; + + if (pmt_telem_region_overlaps(entry, dev)) + return 1; + + header->access_type = TELEM_ACCESS(readl(disc_table)); + header->guid = readl(disc_table + TELEM_GUID_OFFSET); + header->base_offset = readl(disc_table + TELEM_BASE_OFFSET); + + /* Size is measured in DWORDS, but accessor returns bytes */ + header->size = TELEM_SIZE(readl(disc_table)); + + /* + * Some devices may expose non-functioning entries that are + * reserved for future use. They have zero size. Do not fail + * probe for these. Just ignore them. + */ + if (header->size == 0) + return 1; + + return 0; +} + +static DEFINE_XARRAY_ALLOC(telem_array); +static struct intel_pmt_namespace pmt_telem_ns = { + .name = "telem", + .xa = &telem_array, + .pmt_header_decode = pmt_telem_header_decode, +}; + +static int pmt_telem_remove(struct platform_device *pdev) +{ + struct pmt_telem_priv *priv = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < priv->num_entries; i++) + intel_pmt_dev_destroy(&priv->entry[i], &pmt_telem_ns); + + return 0; +} + +static int pmt_telem_probe(struct platform_device *pdev) +{ + struct pmt_telem_priv *priv; + size_t size; + int i, ret; + + size = struct_size(priv, entry, pdev->num_resources); + priv = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + if (!priv) + return -ENOMEM; + + platform_set_drvdata(pdev, priv); + + for (i = 0; i < pdev->num_resources; i++) { + struct intel_pmt_entry *entry = &priv->entry[i]; + + ret = intel_pmt_dev_create(entry, &pmt_telem_ns, pdev, i); + if (ret < 0) + goto abort_probe; + if (ret) + continue; + + priv->num_entries++; + } + + return 0; +abort_probe: + pmt_telem_remove(pdev); + return ret; +} + +static struct platform_driver pmt_telem_driver = { + .driver = { + .name = TELEM_DEV_NAME, + }, + .remove = pmt_telem_remove, + .probe = pmt_telem_probe, +}; + +static int __init pmt_telem_init(void) +{ + return platform_driver_register(&pmt_telem_driver); +} +module_init(pmt_telem_init); + +static void __exit pmt_telem_exit(void) +{ + platform_driver_unregister(&pmt_telem_driver); + xa_destroy(&telem_array); +} +module_exit(pmt_telem_exit); + +MODULE_AUTHOR("David E. Box "); +MODULE_DESCRIPTION("Intel PMT Telemetry driver"); +MODULE_ALIAS("platform:" TELEM_DEV_NAME); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 805e748f7eae2eb36eadb01f3cb832a595b8054c..3f54def870b8bc7c8baa6b21f5e4138d5a170cc2 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -761,14 +761,11 @@ u32 intel_svm_get_pasid(struct iommu_sva *handle); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); -struct svm_dev_ops; - struct intel_svm_dev { struct list_head list; struct rcu_head rcu; struct device *dev; struct intel_iommu *iommu; - struct svm_dev_ops *ops; struct iommu_sva sva; u32 pasid; int users; @@ -785,7 +782,6 @@ struct intel_svm { u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; - struct list_head list; }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 39d368a810b856ba19f5bc6aa2be8049c6a7acb9..10fa80eef13a68d0c4b88a71fbf06f75c8bf2084 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -8,29 +8,12 @@ #ifndef __INTEL_SVM_H__ #define __INTEL_SVM_H__ -struct device; - -struct svm_dev_ops { - void (*fault_cb)(struct device *dev, u32 pasid, u64 address, - void *private, int rwxp, int response); -}; - /* Values for rxwp in fault_cb callback */ #define SVM_REQ_READ (1<<3) #define SVM_REQ_WRITE (1<<2) #define SVM_REQ_EXEC (1<<1) #define SVM_REQ_PRIV (1<<0) -/* - * The SVM_FLAG_PRIVATE_PASID flag requests a PASID which is *not* the "main" - * PASID for the current process. Even if a PASID already exists, a new one - * will be allocated. And the PASID allocated with SVM_FLAG_PRIVATE_PASID - * will not be given to subsequent callers. This facility allows a driver to - * disambiguate between multiple device contexts which access the same MM, - * if there is no other way to do so. It should be used sparingly, if at all. - */ -#define SVM_FLAG_PRIVATE_PASID (1<<0) - /* * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only * for access to kernel addresses. No IOTLB flushes are automatically done @@ -42,18 +25,18 @@ struct svm_dev_ops { * It is unlikely that we will ever hook into flush_tlb_kernel_range() to * do such IOTLB flushes automatically. */ -#define SVM_FLAG_SUPERVISOR_MODE (1<<1) +#define SVM_FLAG_SUPERVISOR_MODE BIT(0) /* * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest * processes. Compared to the host bind, the primary differences are: * 1. mm life cycle management * 2. fault reporting */ -#define SVM_FLAG_GUEST_MODE (1<<2) +#define SVM_FLAG_GUEST_MODE BIT(1) /* * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space, * which requires guest and host PASID translation at both directions. */ -#define SVM_FLAG_GUEST_PASID (1<<3) +#define SVM_FLAG_GUEST_PASID BIT(2) #endif /* __INTEL_SVM_H__ */ diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index e9dacd4b9f6bb5d8a6f67dda14d3904ada9089bb..af1c9d62e64281ee27d75e368b58ef24a453dfed 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -34,13 +34,16 @@ struct ioasid_allocator_ops { #if IS_ENABLED(CONFIG_IOASID) ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, void *private); -void ioasid_get(ioasid_t ioasid); -bool ioasid_put(ioasid_t ioasid); +void ioasid_free(ioasid_t ioasid); void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)); int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator); int ioasid_set_data(ioasid_t ioasid, void *data); +static inline bool pasid_valid(ioasid_t ioasid) +{ + return ioasid != INVALID_IOASID; +} #else /* !CONFIG_IOASID */ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, @@ -49,14 +52,7 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, return INVALID_IOASID; } -static inline void ioasid_get(ioasid_t ioasid) -{ -} - -static inline bool ioasid_put(ioasid_t ioasid) -{ - return false; -} +static inline void ioasid_free(ioasid_t ioasid) { } static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)) @@ -78,5 +74,10 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data) return -ENOTSUPP; } +static inline bool pasid_valid(ioasid_t ioasid) +{ + return false; +} + #endif /* CONFIG_IOASID */ #endif /* __LINUX_IOASID_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index a886f48b6a0e293aa734a8242732e2300c1e8dcb..e73f987516807871b755bd8571b81e0d28105f8f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -246,6 +246,9 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +void setup_initial_init_mm(void *start_code, void *end_code, + void *end_data, void *brk); + /* * Linux kernel virtual memory manager primitives. * The idea being to have a "virtual" mm in the same way diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1c22e294f083be2503b76eedf4dd77f13278daa4..d9d10990661b1b88e56732692f75285fd5044568 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -582,7 +582,7 @@ struct mm_struct { #endif struct work_struct async_put_work; -#ifdef CONFIG_IOMMU_SUPPORT +#ifdef CONFIG_IOMMU_SVA u32 pasid; #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 47f462040f4dfc4c1baed9960ece7aa2ba8e8b4a..c763bd95a7ea38a39fbd2549fa826960b6acf924 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -865,6 +865,9 @@ struct task_struct { /* Stalled due to lack of memory */ unsigned in_memstall:1; #endif +#ifdef CONFIG_IOMMU_SVA + unsigned pasid_activated:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index dc1f4dcd9a82531dd39df5833f25dad5a3919625..88adf3a686bd7f212a4321c60e0fad0e19580cf5 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -8,6 +8,7 @@ #include #include #include +#include /* * Routines for handling mm_structs @@ -364,4 +365,29 @@ static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) } #endif +#ifdef CONFIG_IOMMU_SVA +static inline void mm_pasid_init(struct mm_struct *mm) +{ + mm->pasid = INVALID_IOASID; +} + +/* Associate a PASID with an mm_struct: */ +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) +{ + mm->pasid = pasid; +} + +static inline void mm_pasid_drop(struct mm_struct *mm) +{ + if (pasid_valid(mm->pasid)) { + ioasid_free(mm->pasid); + mm->pasid = INVALID_IOASID; + } +} +#else +static inline void mm_pasid_init(struct mm_struct *mm) {} +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {} +static inline void mm_pasid_drop(struct mm_struct *mm) {} +#endif + #endif /* _LINUX_SCHED_MM_H */ diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 7e0d526dd96f3308aa33038b41280a06956e68c2..9e257fe9efa536e05044608d5718580be730d80b 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -729,6 +729,7 @@ #define PCI_EXT_CAP_ID_DPC 0x1D /* Downstream Port Containment */ #define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */ #define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */ +#define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */ #define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */ #define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_16GT @@ -1079,6 +1080,10 @@ #define PCI_L1SS_CTL1_LTR_L12_TH_SCALE 0xe0000000 /* LTR_L1.2_THRESHOLD_Scale */ #define PCI_L1SS_CTL2 0x0c /* Control 2 Register */ +/* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */ +#define PCI_DVSEC_HEADER1 0x4 /* Designated Vendor-Specific Header1 */ +#define PCI_DVSEC_HEADER2 0x8 /* Designated Vendor-Specific Header2 */ + /* Data Link Feature */ #define PCI_DLF_CAP 0x04 /* Capabilities Register */ #define PCI_DLF_EXCHANGE_ENABLE 0x80000000 /* Data Link Feature Exchange Enable */ diff --git a/kernel/fork.c b/kernel/fork.c index 0fb86b65ae60ca5e1fc9fdc757fa4151ec98d7e3..5bfe124bcd7b359ac0326033b7db11396dc5e1a6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -97,8 +97,8 @@ #include #include #include +#include -#include #include #include #include @@ -943,6 +943,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->use_memdelay = 0; #endif +#ifdef CONFIG_IOMMU_SVA + tsk->pasid_activated = 0; +#endif + #ifdef CONFIG_MEMCG tsk->active_memcg = NULL; #endif @@ -995,13 +999,6 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } -static void mm_init_pasid(struct mm_struct *mm) -{ -#ifdef CONFIG_IOMMU_SUPPORT - mm->pasid = INIT_PASID; -#endif -} - static void mm_init_uprobes_state(struct mm_struct *mm) { #ifdef CONFIG_UPROBES @@ -1032,7 +1029,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); - mm_init_pasid(mm); + mm_pasid_init(mm); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); @@ -1105,6 +1102,7 @@ static inline void __mmput(struct mm_struct *mm) } if (mm->binfmt) module_put(mm->binfmt->module); + mm_pasid_drop(mm); mmdrop(mm); } diff --git a/mm/init-mm.c b/mm/init-mm.c index 153162669f806289dd2a0129a19edc954fb37567..fbe7844d0912f54a79114db2ec7c205fa1cdcb5b 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -10,6 +10,7 @@ #include #include +#include #include #ifndef INIT_MM_CONTEXT @@ -38,5 +39,17 @@ struct mm_struct init_mm = { .mmlist = LIST_HEAD_INIT(init_mm.mmlist), .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, +#ifdef CONFIG_IOMMU_SVA + .pasid = INVALID_IOASID, +#endif INIT_MM_CONTEXT(init_mm) }; + +void setup_initial_init_mm(void *start_code, void *end_code, + void *end_data, void *brk) +{ + init_mm.start_code = (unsigned long)start_code; + init_mm.end_code = (unsigned long)end_code; + init_mm.end_data = (unsigned long)end_data; + init_mm.brk = (unsigned long)brk; +} diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index cde9c36e40ae0ccff941a22b07f8cc7d1313ac54..dac78f7c8ad0c416ea3e89f4ee4f96f56d9530ca 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -91,7 +91,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, { struct insn insn; int x86_64, sign; - unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, + unsigned char op1, op2, op3, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0; struct stack_op *op = NULL; @@ -117,6 +117,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op1 = insn.opcode.bytes[0]; op2 = insn.opcode.bytes[1]; + op3 = insn.opcode.bytes[2]; if (insn.rex_prefix.nbytes) { rex = insn.rex_prefix.bytes[0]; @@ -423,6 +424,14 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, /* nopl/nopw */ *type = INSN_NOP; + } else if (op2 == 0x38 && op3 == 0xf8) { + if (insn.prefixes.nbytes == 1 && + insn.prefixes.bytes[0] == 0xf2) { + /* ENQCMD cannot be used in the kernel. */ + WARN("ENQCMD instruction at %s:%lx", sec->name, + offset); + } + } else if (op2 == 0xa0 || op2 == 0xa8) { /* push fs/gs */