diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/common/mcpm_entry.c linux-dovetail-v5.15.y-dovetail/arch/arm/common/mcpm_entry.c --- linux-5.15.26/arch/arm/common/mcpm_entry.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/common/mcpm_entry.c 2022-03-10 09:47:50.000000000 +0100 @@ -206,7 +206,7 @@ int mcpm_cpu_power_up(unsigned int cpu, * Since this is called with IRQs enabled, and no arch_spin_lock_irq * variant exists, we need to disable IRQs manually here. */ - local_irq_disable(); + hard_local_irq_disable(); arch_spin_lock(&mcpm_lock); cpu_is_down = !mcpm_cpu_use_count[cluster][cpu]; @@ -230,7 +230,7 @@ int mcpm_cpu_power_up(unsigned int cpu, ret = platform_ops->cpu_powerup(cpu, cluster); arch_spin_unlock(&mcpm_lock); - local_irq_enable(); + hard_local_irq_enable(); return ret; } @@ -349,7 +349,7 @@ int mcpm_cpu_powered_up(void) mpidr = read_cpuid_mpidr(); cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); - local_irq_save(flags); + flags = hard_local_irq_save(); arch_spin_lock(&mcpm_lock); cpu_was_down = !mcpm_cpu_use_count[cluster][cpu]; @@ -363,7 +363,7 @@ int mcpm_cpu_powered_up(void) platform_ops->cpu_is_up(cpu, cluster); arch_spin_unlock(&mcpm_lock); - local_irq_restore(flags); + hard_local_irq_restore(flags); return 0; } @@ -402,7 +402,7 @@ int __init mcpm_loopback(void (*cache_di * infrastructure. Let's play it safe by using cpu_pm_enter() * in case the CPU init code path resets the VFP or similar. */ - local_irq_disable(); + hard_local_irq_disable(); local_fiq_disable(); ret = cpu_pm_enter(); if (!ret) { @@ -410,7 +410,7 @@ int __init mcpm_loopback(void (*cache_di cpu_pm_exit(); } local_fiq_enable(); - local_irq_enable(); + hard_local_irq_enable(); if (ret) pr_err("%s returned %d\n", __func__, ret); return ret; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/assembler.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/assembler.h --- linux-5.15.26/arch/arm/include/asm/assembler.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/assembler.h 2022-03-10 09:47:50.000000000 +0100 @@ -107,12 +107,24 @@ .endm #endif + .macro disable_irq_if_pipelined +#ifdef CONFIG_IRQ_PIPELINE + disable_irq_notrace +#endif + .endm + + .macro enable_irq_if_pipelined +#ifdef CONFIG_IRQ_PIPELINE + enable_irq_notrace +#endif + .endm + .macro asm_trace_hardirqs_off, save=1 #if defined(CONFIG_TRACE_IRQFLAGS) .if \save stmdb sp!, {r0-r3, ip, lr} .endif - bl trace_hardirqs_off + bl trace_hardirqs_off_pipelined .if \save ldmia sp!, {r0-r3, ip, lr} .endif @@ -128,7 +140,7 @@ .if \save stmdb sp!, {r0-r3, ip, lr} .endif - bl\cond trace_hardirqs_on + bl\cond trace_hardirqs_on_pipelined .if \save ldmia sp!, {r0-r3, ip, lr} .endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/atomic.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/atomic.h --- linux-5.15.26/arch/arm/include/asm/atomic.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/atomic.h 2022-03-10 09:47:50.000000000 +0100 @@ -164,9 +164,9 @@ static inline void arch_atomic_##op(int { \ unsigned long flags; \ \ - raw_local_irq_save(flags); \ + flags = hard_local_irq_save(); \ v->counter c_op i; \ - raw_local_irq_restore(flags); \ + hard_local_irq_restore(flags); \ } \ #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ @@ -175,10 +175,10 @@ static inline int arch_atomic_##op##_ret unsigned long flags; \ int val; \ \ - raw_local_irq_save(flags); \ + flags = hard_local_irq_save(); \ v->counter c_op i; \ val = v->counter; \ - raw_local_irq_restore(flags); \ + hard_local_irq_restore(flags); \ \ return val; \ } @@ -189,10 +189,10 @@ static inline int arch_atomic_fetch_##op unsigned long flags; \ int val; \ \ - raw_local_irq_save(flags); \ + flags = hard_local_irq_save(); \ val = v->counter; \ v->counter c_op i; \ - raw_local_irq_restore(flags); \ + hard_local_irq_restore(flags); \ \ return val; \ } @@ -202,11 +202,11 @@ static inline int arch_atomic_cmpxchg(at int ret; unsigned long flags; - raw_local_irq_save(flags); + flags = hard_local_irq_save(); ret = v->counter; if (likely(ret == old)) v->counter = new; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); return ret; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/bitops.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/bitops.h --- linux-5.15.26/arch/arm/include/asm/bitops.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/bitops.h 2022-03-10 09:47:50.000000000 +0100 @@ -40,9 +40,9 @@ static inline void ____atomic_set_bit(un p += BIT_WORD(bit); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); *p |= mask; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); } static inline void ____atomic_clear_bit(unsigned int bit, volatile unsigned long *p) @@ -52,9 +52,9 @@ static inline void ____atomic_clear_bit( p += BIT_WORD(bit); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); *p &= ~mask; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); } static inline void ____atomic_change_bit(unsigned int bit, volatile unsigned long *p) @@ -64,9 +64,9 @@ static inline void ____atomic_change_bit p += BIT_WORD(bit); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); *p ^= mask; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); } static inline int @@ -78,10 +78,10 @@ ____atomic_test_and_set_bit(unsigned int p += BIT_WORD(bit); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); res = *p; *p = res | mask; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); return (res & mask) != 0; } @@ -95,10 +95,10 @@ ____atomic_test_and_clear_bit(unsigned i p += BIT_WORD(bit); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); res = *p; *p = res & ~mask; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); return (res & mask) != 0; } @@ -112,10 +112,10 @@ ____atomic_test_and_change_bit(unsigned p += BIT_WORD(bit); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); res = *p; *p = res ^ mask; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); return (res & mask) != 0; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/cmpxchg.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/cmpxchg.h --- linux-5.15.26/arch/arm/include/asm/cmpxchg.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/cmpxchg.h 2022-03-10 09:47:50.000000000 +0100 @@ -77,17 +77,17 @@ static inline unsigned long __xchg(unsig #error SMP is not supported on this platform #endif case 1: - raw_local_irq_save(flags); + flags = hard_local_irq_save(); ret = *(volatile unsigned char *)ptr; *(volatile unsigned char *)ptr = x; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); break; case 4: - raw_local_irq_save(flags); + flags = hard_local_irq_save(); ret = *(volatile unsigned long *)ptr; *(volatile unsigned long *)ptr = x; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); break; #else case 1: diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/dovetail.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/dovetail.h --- linux-5.15.26/arch/arm/include/asm/dovetail.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/dovetail.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2016 Philippe Gerum. + */ +#ifndef _ASM_ARM_DOVETAIL_H +#define _ASM_ARM_DOVETAIL_H + +/* ARM traps */ +#define ARM_TRAP_ACCESS 0 /* Data or instruction access exception */ +#define ARM_TRAP_SECTION 1 /* Section fault */ +#define ARM_TRAP_DABT 2 /* Generic data abort */ +#define ARM_TRAP_PABT 3 /* Prefetch abort */ +#define ARM_TRAP_BREAK 4 /* Instruction breakpoint */ +#define ARM_TRAP_FPU 5 /* Floating point exception */ +#define ARM_TRAP_VFP 6 /* VFP floating point exception */ +#define ARM_TRAP_UNDEFINSTR 7 /* Undefined instruction */ +#define ARM_TRAP_ALIGNMENT 8 /* Unaligned access exception */ + +#if !defined(__ASSEMBLY__) && defined(CONFIG_DOVETAIL) + +static inline void arch_dovetail_exec_prepare(void) +{ } + +static inline void arch_dovetail_switch_prepare(bool leave_inband) +{ } + +static inline void arch_dovetail_switch_finish(bool enter_inband) +{ } + +#endif + +#endif /* _ASM_ARM_DOVETAIL_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/efi.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/efi.h --- linux-5.15.26/arch/arm/include/asm/efi.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/efi.h 2022-03-10 09:47:50.000000000 +0100 @@ -37,7 +37,11 @@ int efi_set_mapping_permissions(struct m static inline void efi_set_pgd(struct mm_struct *mm) { + unsigned long flags; + + protect_inband_mm(flags); check_and_switch_context(mm, NULL); + unprotect_inband_mm(flags); } void efi_virtmap_load(void); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/irqflags.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/irqflags.h --- linux-5.15.26/arch/arm/include/asm/irqflags.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/irqflags.h 2022-03-10 09:47:50.000000000 +0100 @@ -5,6 +5,7 @@ #ifdef __KERNEL__ #include +#include /* * CPU interrupt mask handling. @@ -13,41 +14,44 @@ #define IRQMASK_REG_NAME_R "primask" #define IRQMASK_REG_NAME_W "primask" #define IRQMASK_I_BIT 1 +#define IRQMASK_I_POS 0 #else #define IRQMASK_REG_NAME_R "cpsr" #define IRQMASK_REG_NAME_W "cpsr_c" #define IRQMASK_I_BIT PSR_I_BIT +#define IRQMASK_I_POS 7 #endif +#define IRQMASK_i_POS 31 #if __LINUX_ARM_ARCH__ >= 6 #define arch_local_irq_save arch_local_irq_save -static inline unsigned long arch_local_irq_save(void) +static inline unsigned long native_irq_save(void) { unsigned long flags; asm volatile( - " mrs %0, " IRQMASK_REG_NAME_R " @ arch_local_irq_save\n" + " mrs %0, " IRQMASK_REG_NAME_R " @ native_irq_save\n" " cpsid i" : "=r" (flags) : : "memory", "cc"); return flags; } #define arch_local_irq_enable arch_local_irq_enable -static inline void arch_local_irq_enable(void) +static inline void native_irq_enable(void) { asm volatile( - " cpsie i @ arch_local_irq_enable" + " cpsie i @ native_irq_enable" : : : "memory", "cc"); } #define arch_local_irq_disable arch_local_irq_disable -static inline void arch_local_irq_disable(void) +static inline void native_irq_disable(void) { asm volatile( - " cpsid i @ arch_local_irq_disable" + " cpsid i @ native_irq_disable" : : : "memory", "cc"); @@ -69,12 +73,12 @@ static inline void arch_local_irq_disabl * Save the current interrupt enable state & disable IRQs */ #define arch_local_irq_save arch_local_irq_save -static inline unsigned long arch_local_irq_save(void) +static inline unsigned long native_irq_save(void) { unsigned long flags, temp; asm volatile( - " mrs %0, cpsr @ arch_local_irq_save\n" + " mrs %0, cpsr @ native_irq_save\n" " orr %1, %0, #128\n" " msr cpsr_c, %1" : "=r" (flags), "=r" (temp) @@ -87,11 +91,11 @@ static inline unsigned long arch_local_i * Enable IRQs */ #define arch_local_irq_enable arch_local_irq_enable -static inline void arch_local_irq_enable(void) +static inline void native_irq_enable(void) { unsigned long temp; asm volatile( - " mrs %0, cpsr @ arch_local_irq_enable\n" + " mrs %0, cpsr @ native_irq_enable\n" " bic %0, %0, #128\n" " msr cpsr_c, %0" : "=r" (temp) @@ -103,11 +107,11 @@ static inline void arch_local_irq_enable * Disable IRQs */ #define arch_local_irq_disable arch_local_irq_disable -static inline void arch_local_irq_disable(void) +static inline void native_irq_disable(void) { unsigned long temp; asm volatile( - " mrs %0, cpsr @ arch_local_irq_disable\n" + " mrs %0, cpsr @ native_irq_disable\n" " orr %0, %0, #128\n" " msr cpsr_c, %0" : "=r" (temp) @@ -149,15 +153,22 @@ static inline void arch_local_irq_disabl #define local_abt_disable() do { } while (0) #endif +static inline void native_irq_sync(void) +{ + native_irq_enable(); + isb(); + native_irq_disable(); +} + /* * Save the current interrupt enable state. */ #define arch_local_save_flags arch_local_save_flags -static inline unsigned long arch_local_save_flags(void) +static inline unsigned long native_save_flags(void) { unsigned long flags; asm volatile( - " mrs %0, " IRQMASK_REG_NAME_R " @ local_save_flags" + " mrs %0, " IRQMASK_REG_NAME_R " @ native_save_flags" : "=r" (flags) : : "memory", "cc"); return flags; } @@ -166,21 +177,28 @@ static inline unsigned long arch_local_s * restore saved IRQ & FIQ state */ #define arch_local_irq_restore arch_local_irq_restore -static inline void arch_local_irq_restore(unsigned long flags) +static inline void native_irq_restore(unsigned long flags) { asm volatile( - " msr " IRQMASK_REG_NAME_W ", %0 @ local_irq_restore" + " msr " IRQMASK_REG_NAME_W ", %0 @ native_irq_restore" : : "r" (flags) : "memory", "cc"); } #define arch_irqs_disabled_flags arch_irqs_disabled_flags -static inline int arch_irqs_disabled_flags(unsigned long flags) +static inline int native_irqs_disabled_flags(unsigned long flags) { return flags & IRQMASK_I_BIT; } +static inline bool native_irqs_disabled(void) +{ + unsigned long flags = native_save_flags(); + return native_irqs_disabled_flags(flags); +} + +#include #include #endif /* ifdef __KERNEL__ */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/irq_pipeline.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/irq_pipeline.h --- linux-5.15.26/arch/arm/include/asm/irq_pipeline.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/irq_pipeline.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,142 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2016 Philippe Gerum . + */ +#ifndef _ASM_ARM_IRQ_PIPELINE_H +#define _ASM_ARM_IRQ_PIPELINE_H + +#include + +#ifdef CONFIG_IRQ_PIPELINE + +/* + * In order to cope with the limited number of SGIs available to us, + * In-band IPI messages are multiplexed over SGI0, whereas out-of-band + * IPIs are directly mapped to SGI1-2. + */ +#define OOB_NR_IPI 2 +#define OOB_IPI_OFFSET 1 /* SGI1 */ +#define TIMER_OOB_IPI (ipi_irq_base + OOB_IPI_OFFSET) +#define RESCHEDULE_OOB_IPI (TIMER_OOB_IPI + 1) + +extern int ipi_irq_base; + +static inline notrace +unsigned long arch_irqs_virtual_to_native_flags(int stalled) +{ + return (!!stalled) << IRQMASK_I_POS; +} + +static inline notrace +unsigned long arch_irqs_native_to_virtual_flags(unsigned long flags) +{ + return (!!hard_irqs_disabled_flags(flags)) << IRQMASK_i_POS; +} + +static inline notrace unsigned long arch_local_irq_save(void) +{ + int stalled = inband_irq_save(); + barrier(); + return arch_irqs_virtual_to_native_flags(stalled); +} + +static inline notrace void arch_local_irq_enable(void) +{ + barrier(); + inband_irq_enable(); +} + +static inline notrace void arch_local_irq_disable(void) +{ + inband_irq_disable(); + barrier(); +} + +static inline notrace unsigned long arch_local_save_flags(void) +{ + int stalled = inband_irqs_disabled(); + barrier(); + return arch_irqs_virtual_to_native_flags(stalled); +} + +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return native_irqs_disabled_flags(flags); +} + +static inline notrace void arch_local_irq_restore(unsigned long flags) +{ + inband_irq_restore(arch_irqs_disabled_flags(flags)); + barrier(); +} + +static inline +void arch_save_timer_regs(struct pt_regs *dst, struct pt_regs *src) +{ + dst->ARM_cpsr = src->ARM_cpsr; + dst->ARM_pc = src->ARM_pc; +} + +static inline bool arch_steal_pipelined_tick(struct pt_regs *regs) +{ + return !!(regs->ARM_cpsr & IRQMASK_I_BIT); +} + +static inline int arch_enable_oob_stage(void) +{ + return 0; +} + +extern void (*handle_arch_irq)(struct pt_regs *); + +static inline void arch_handle_irq_pipelined(struct pt_regs *regs) +{ + handle_arch_irq(regs); +} + +#define arch_kentry_get_irqstate(__regs) \ + ({ \ + to_svc_pt_regs(__regs)->irqstate; \ + }) + +#define arch_kentry_set_irqstate(__regs, __irqstate) \ + do { \ + to_svc_pt_regs(__regs)->irqstate = __irqstate; \ + } while (0) + +#else /* !CONFIG_IRQ_PIPELINE */ + +static inline unsigned long arch_local_irq_save(void) +{ + return native_irq_save(); +} + +static inline void arch_local_irq_enable(void) +{ + native_irq_enable(); +} + +static inline void arch_local_irq_disable(void) +{ + native_irq_disable(); +} + +static inline unsigned long arch_local_save_flags(void) +{ + return native_save_flags(); +} + +static inline void arch_local_irq_restore(unsigned long flags) +{ + native_irq_restore(flags); +} + +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return native_irqs_disabled_flags(flags); +} + +#endif /* !CONFIG_IRQ_PIPELINE */ + +#endif /* _ASM_ARM_IRQ_PIPELINE_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/mmu_context.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/mmu_context.h --- linux-5.15.26/arch/arm/include/asm/mmu_context.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/mmu_context.h 2022-03-10 09:47:50.000000000 +0100 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -74,6 +75,7 @@ static inline void check_and_switch_cont static inline void finish_arch_post_lock_switch(void) { struct mm_struct *mm = current->mm; + unsigned long flags; if (mm && mm->context.switch_pending) { /* @@ -85,7 +87,9 @@ static inline void finish_arch_post_lock preempt_disable(); if (mm->context.switch_pending) { mm->context.switch_pending = 0; + protect_inband_mm(flags); cpu_switch_mm(mm->pgd, mm); + unprotect_inband_mm(flags); } preempt_enable_no_resched(); } @@ -96,7 +100,7 @@ static inline void finish_arch_post_lock #endif /* CONFIG_CPU_HAS_ASID */ -#define activate_mm(prev,next) switch_mm(prev, next, NULL) +#define activate_mm(prev,next) __switch_mm(prev, next, NULL) /* * This is the actual mm switch as far as the scheduler @@ -105,8 +109,8 @@ static inline void finish_arch_post_lock * actually changed. */ static inline void -switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) +__switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) { #ifdef CONFIG_MMU unsigned int cpu = smp_processor_id(); @@ -131,4 +135,28 @@ switch_mm(struct mm_struct *prev, struct #include +/* + * This is the actual mm switch as far as the scheduler + * is concerned. No registers are touched. We avoid + * calling the CPU specific function when the mm hasn't + * actually changed. + */ +static inline void +switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned long flags; + + protect_inband_mm(flags); + __switch_mm(prev, next, tsk); + unprotect_inband_mm(flags); +} + +static inline void +switch_oob_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + __switch_mm(prev, next, tsk); +} + #endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/outercache.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/outercache.h --- linux-5.15.26/arch/arm/include/asm/outercache.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/outercache.h 2022-03-10 09:47:50.000000000 +0100 @@ -78,8 +78,13 @@ static inline void outer_flush_range(phy */ static inline void outer_flush_all(void) { - if (outer_cache.flush_all) + unsigned long flags; + + if (outer_cache.flush_all) { + flags = hard_cond_local_irq_save(); outer_cache.flush_all(); + hard_cond_local_irq_restore(flags); + } } /** diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/ptrace.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/ptrace.h --- linux-5.15.26/arch/arm/include/asm/ptrace.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/ptrace.h 2022-03-10 09:47:50.000000000 +0100 @@ -19,6 +19,9 @@ struct pt_regs { struct svc_pt_regs { struct pt_regs regs; u32 dacr; +#ifdef CONFIG_IRQ_PIPELINE + long irqstate; +#endif }; #define to_svc_pt_regs(r) container_of(r, struct svc_pt_regs, regs) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/syscall.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/syscall.h --- linux-5.15.26/arch/arm/include/asm/syscall.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/syscall.h 2022-03-10 09:47:50.000000000 +0100 @@ -77,6 +77,11 @@ static inline void syscall_get_arguments memcpy(args, ®s->ARM_r0 + 1, 5 * sizeof(args[0])); } +static inline unsigned long syscall_get_arg0(struct pt_regs *regs) +{ + return regs->ARM_ORIG_r0; +} + static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, const unsigned long *args) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/thread_info.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/thread_info.h --- linux-5.15.26/arch/arm/include/asm/thread_info.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/thread_info.h 2022-03-10 09:47:50.000000000 +0100 @@ -29,6 +29,7 @@ struct task_struct; +#include #include struct cpu_context_save { @@ -51,6 +52,7 @@ struct cpu_context_save { */ struct thread_info { unsigned long flags; /* low level flags */ + __u32 local_flags; /* local (synchronous) flags */ int preempt_count; /* 0 => preemptable, <0 => bug */ struct task_struct *task; /* main task structure */ __u32 cpu; /* cpu */ @@ -67,15 +69,19 @@ struct thread_info { #ifdef CONFIG_ARM_THUMBEE unsigned long thumbee_state; /* ThumbEE Handler Base register */ #endif + struct oob_thread_state oob_state; /* co-kernel thread state */ }; #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ .flags = 0, \ + .local_flags = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ } +#define ti_local_flags(__ti) ((__ti)->local_flags) + /* * how to get the thread information struct from C */ @@ -134,10 +140,12 @@ extern int vfp_restore_user_hwstate(stru #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SECCOMP 7 /* seccomp syscall filtering active */ #define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */ +#define TIF_RETUSER 9 /* INBAND_TASK_RETUSER is pending */ #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 20 +#define TIF_MAYDAY 21 /* emergency trap pending */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) @@ -148,9 +156,14 @@ extern int vfp_restore_user_hwstate(stru #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) +#define _TIF_RETUSER (1 << TIF_RETUSER) #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) +#define _TIF_MAYDAY (1 << TIF_MAYDAY) -/* Checks for any syscall work in entry-common.S */ +/* + * Checks for any syscall work in entry-common.S. + * CAUTION: Only bit0-bit15 are tested there. + */ #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP) @@ -159,7 +172,15 @@ extern int vfp_restore_user_hwstate(stru */ #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NOTIFY_SIGNAL) + _TIF_NOTIFY_SIGNAL | _TIF_RETUSER) + +/* + * Local (synchronous) thread flags. + */ +#define _TLF_OOB 0x0001 +#define _TLF_DOVETAIL 0x0002 +#define _TLF_OFFSTAGE 0x0004 +#define _TLF_OOBTRAP 0x0008 #endif /* __KERNEL__ */ #endif /* __ASM_ARM_THREAD_INFO_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/trace/exceptions.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/trace/exceptions.h --- linux-5.15.26/arch/arm/include/asm/trace/exceptions.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/trace/exceptions.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM exceptions + +#if !defined(_TRACE_EXCEPTIONS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_EXCEPTIONS_H + +#include +#include +#include + +#define __trace_trap(__sym) { __sym, #__sym } + +#define trace_trap_symbolic(__trapnr) \ + __print_symbolic(__trapnr, \ + __trace_trap(ARM_TRAP_ACCESS), \ + __trace_trap(ARM_TRAP_SECTION), \ + __trace_trap(ARM_TRAP_DABT), \ + __trace_trap(ARM_TRAP_PABT), \ + __trace_trap(ARM_TRAP_BREAK), \ + __trace_trap(ARM_TRAP_FPU), \ + __trace_trap(ARM_TRAP_VFP), \ + __trace_trap(ARM_TRAP_UNDEFINSTR), \ + __trace_trap(ARM_TRAP_ALIGNMENT)) + +DECLARE_EVENT_CLASS(ARM_trap_event, + TP_PROTO(int trapnr, struct pt_regs *regs), + TP_ARGS(trapnr, regs), + + TP_STRUCT__entry( + __field(int, trapnr) + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->trapnr = trapnr; + __entry->regs = regs; + ), + + TP_printk("%s mode trap: %s", + user_mode(__entry->regs) ? "user" : "kernel", + trace_trap_symbolic(__entry->trapnr)) +); + +DEFINE_EVENT(ARM_trap_event, ARM_trap_entry, + TP_PROTO(int trapnr, struct pt_regs *regs), + TP_ARGS(trapnr, regs) +); + +DEFINE_EVENT(ARM_trap_event, ARM_trap_exit, + TP_PROTO(int trapnr, struct pt_regs *regs), + TP_ARGS(trapnr, regs) +); + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH asm/trace +#define TRACE_INCLUDE_FILE exceptions +#endif /* _TRACE_EXCEPTIONS_H */ + +/* This part must be outside protection */ +#include diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/include/asm/vdso/gettimeofday.h linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/vdso/gettimeofday.h --- linux-5.15.26/arch/arm/include/asm/vdso/gettimeofday.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/include/asm/vdso/gettimeofday.h 2022-03-10 09:47:50.000000000 +0100 @@ -142,6 +142,66 @@ static __always_inline const struct vdso return __get_datapage(); } +#ifdef CONFIG_GENERIC_CLOCKSOURCE_VDSO + +extern struct vdso_priv *__get_privpage(void); + +static __always_inline struct vdso_priv *__arch_get_vdso_priv(void) +{ + return __get_privpage(); +} + +static __always_inline long clock_open_device(const char *path, int mode) +{ + register u32 r0 asm("r0") = (u32)path; + register u32 r1 asm("r1") = (u32)mode; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_open; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r"(r0), "r"(r1), "r"(nr) + : "memory"); + + return ret; +} + +static __always_inline +long clock_ioctl_device(int fd, unsigned int cmd, long arg) +{ + register u32 r0 asm("r0") = (u32)fd; + register u32 r1 asm("r1") = (u32)cmd; + register u32 r2 asm("r2") = (u32)arg; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_ioctl; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r"(r0), "r"(r1), "r"(r2), "r"(nr) + : "memory"); + + return ret; +} + +static __always_inline long clock_close_device(int fd) +{ + register u32 r0 asm("r0") = (u32)fd; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_close; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r"(r0), "r"(nr) + : "memory"); + + return ret; +} + +#endif /* CONFIG_GENERIC_CLOCKSOURCE_VDSO */ + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/Kconfig linux-dovetail-v5.15.y-dovetail/arch/arm/Kconfig --- linux-5.15.26/arch/arm/Kconfig 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/Kconfig 2022-03-10 09:47:50.000000000 +0100 @@ -231,6 +231,11 @@ config NEED_RET_TO_USER config ARCH_MTD_XIP bool +# Limited I-pipe compat (syscall routing only). +config IPIPE_COMPAT + bool + select DOVETAIL_LEGACY_SYSCALL_RANGE + config ARM_PATCH_PHYS_VIRT bool "Patch physical to virtual translations at runtime" if EMBEDDED default y @@ -551,6 +556,8 @@ config ARCH_MULTI_V7 config ARCH_MULTI_V6_V7 bool select MIGHT_HAVE_CACHE_L2X0 + select HAVE_IRQ_PIPELINE + select HAVE_DOVETAIL if CPU_HAS_ASID config ARCH_MULTI_CPU_AUTO def_bool !(ARCH_MULTI_V4 || ARCH_MULTI_V4T || ARCH_MULTI_V6_V7) @@ -1183,6 +1190,8 @@ config SCHED_SMT MultiThreading at a cost of slightly increased overhead in some places. If unsure say N here. +source "kernel/Kconfig.dovetail" + config HAVE_ARM_SCU bool help diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/asm-offsets.c linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/asm-offsets.c --- linux-5.15.26/arch/arm/kernel/asm-offsets.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/asm-offsets.c 2022-03-10 09:47:50.000000000 +0100 @@ -42,6 +42,7 @@ int main(void) #endif BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); @@ -51,6 +52,7 @@ int main(void) DEFINE(TI_USED_CP, offsetof(struct thread_info, used_cp)); DEFINE(TI_TP_VALUE, offsetof(struct thread_info, tp_value)); DEFINE(TI_FPSTATE, offsetof(struct thread_info, fpstate)); + DEFINE(TI_OOB_MASK, STAGE_MASK); #ifdef CONFIG_VFP DEFINE(TI_VFPSTATE, offsetof(struct thread_info, vfpstate)); #ifdef CONFIG_SMP @@ -157,6 +159,7 @@ int main(void) BLANK(); #ifdef CONFIG_VDSO DEFINE(VDSO_DATA_SIZE, sizeof(union vdso_data_store)); + DEFINE(VDSO_PRIV_SIZE, PAGE_SIZE); #endif BLANK(); #ifdef CONFIG_ARM_MPU diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/entry-armv.S linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/entry-armv.S --- linux-5.15.26/arch/arm/kernel/entry-armv.S 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/entry-armv.S 2022-03-10 09:47:50.000000000 +0100 @@ -5,6 +5,7 @@ * Copyright (C) 1996,1997,1998 Russell King. * ARM700 fix by Matthew Godbolt (linux-user@willothewisp.demon.co.uk) * nommu support by Hyok S. Choi (hyok.choi@samsung.com) + * Copyright (C) 2005 Stelian Pop. * * Low-level vector interface routines * @@ -32,16 +33,24 @@ #include "entry-header.S" #include #include +#include /* * Interrupt handling. */ .macro irq_handler #ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER - ldr r1, =handle_arch_irq mov r0, sp badr lr, 9997f +#ifdef CONFIG_IRQ_PIPELINE + ldr r1, =handle_arch_irq_pipelined + mov pc, r1 +#else + ldr r1, =handle_arch_irq ldr pc, [r1] +#endif +#elif CONFIG_IRQ_PIPELINE +#error "Legacy IRQ handling not pipelined" #else arch_irq_handler_default #endif @@ -183,7 +192,10 @@ ENDPROC(__und_invalid) uaccess_entry tsk, r0, r1, r2, \uaccess .if \trace -#ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_IRQ_PIPELINE + mov r0, sp + bl kentry_enter_pipelined +#elif defined(CONFIG_TRACE_IRQFLAGS) bl trace_hardirqs_off #endif .endif @@ -203,6 +215,10 @@ ENDPROC(__dabt_svc) __irq_svc: svc_entry irq_handler +#ifdef CONFIG_IRQ_PIPELINE + tst r0, r0 @ skip epilogue if oob or in-band stalled + beq 1f +#endif #ifdef CONFIG_PREEMPTION ldr r8, [tsk, #TI_PREEMPT] @ get preempt count @@ -213,6 +229,7 @@ __irq_svc: blne svc_preempt #endif +1: svc_exit r5, irq = 1 @ return from exception UNWIND(.fnend ) ENDPROC(__irq_svc) @@ -222,7 +239,7 @@ ENDPROC(__irq_svc) #ifdef CONFIG_PREEMPTION svc_preempt: mov r8, lr -1: bl preempt_schedule_irq @ irq en/disable is done inside +1: bl arm_preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED reteq r8 @ go again @@ -252,6 +269,16 @@ __und_svc: #else svc_entry #endif +#ifdef CONFIG_DOVETAIL + get_thread_info tsk + ldr r0, [tsk, #TI_PREEMPT] @ get preempt count + tst r0, #TI_OOB_MASK @ oob stage? + beq 1f + mov r0, #ARM_TRAP_UNDEFINSTR + mov r1, sp @ r1 = ®s + bl __oob_trap_notify +1: +#endif mov r1, #4 @ PC correction to apply THUMB( tst r5, #PSR_T_BIT ) @ exception taken in Thumb mode? @@ -261,6 +288,15 @@ __und_svc: __und_svc_finish: get_thread_info tsk +#ifdef CONFIG_DOVETAIL + ldr r0, [tsk, #TI_PREEMPT] @ get preempt count + tst r0, #TI_OOB_MASK @ oob stage? + beq 1f + mov r0, #ARM_TRAP_UNDEFINSTR + mov r1, sp @ r1 = ®s + bl __oob_trap_unwind +1: +#endif ldr r5, [sp, #S_PSR] @ Get SVC cpsr svc_exit r5 @ return from exception UNWIND(.fnend ) @@ -391,7 +427,7 @@ ENDPROC(__fiq_abt) .if \trace #ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off + bl trace_hardirqs_off_pipelined #endif ct_user_exit save = 0 .endif @@ -428,8 +464,13 @@ __irq_usr: usr_entry kuser_cmpxchg_check irq_handler - get_thread_info tsk mov why, #0 +#ifdef CONFIG_IRQ_PIPELINE +THUMB( it ne) + tst r0, r0 + beq fast_ret_to_user @ skip epilogue if oob (in-band cannot be stalled) +#endif + get_thread_info tsk b ret_to_user_from_irq UNWIND(.fnend ) ENDPROC(__irq_usr) @@ -716,7 +757,7 @@ ENTRY(ret_from_exception) UNWIND(.cantunwind ) get_thread_info tsk mov why, #0 - b ret_to_user + ret_to_user_pipelined r1 UNWIND(.fnend ) ENDPROC(__pabt_usr) ENDPROC(ret_from_exception) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/entry-common.S linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/entry-common.S --- linux-5.15.26/arch/arm/kernel/entry-common.S 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/entry-common.S 2022-03-10 09:47:50.000000000 +0100 @@ -3,6 +3,7 @@ * linux/arch/arm/kernel/entry-common.S * * Copyright (C) 2000 Russell King + * Copyright (C) 2005 Stelian Pop. */ #include @@ -12,6 +13,7 @@ #include #ifdef CONFIG_AEABI #include +#include #endif .equ NR_syscalls, __NR_syscalls @@ -134,6 +136,10 @@ no_work_pending: restore_user_regs fast = 0, offset = 0 ENDPROC(ret_to_user_from_irq) ENDPROC(ret_to_user) +ENTRY(fast_ret_to_user) + disable_irq_notrace @ disable interrupts + b no_work_pending +ENDPROC(fast_ret_to_user) /* * This is how we return from a fork. @@ -244,6 +250,70 @@ ENTRY(vector_swi) TRACE( ldmia sp, {r0 - r3} ) local_restart: +#ifdef CONFIG_DOVETAIL + ldr r10, [tsk, #TI_LOCAL_FLAGS] @ tsk(r10) is callee-saved +#ifdef CONFIG_IPIPE_COMPAT + ldr r0, =#0xf0042 @ old syscall signature + cmp scno, r0 + bne 1f + add scno, scno, #__OOB_SYSCALL_BIT @ force in oob marker + b fastcall_try +1: +#endif +#ifdef CONFIG_DOVETAIL_LEGACY_SYSCALL_RANGE + ldr r0, =#__OOB_SYSCALL_BIT + ands r0, scno, r0 + bne fastcall_try +#endif + cmp scno, #__NR_prctl + bne slow_path + ldr r0, [sp, #S_OLD_R0] + tst r0, #__OOB_SYSCALL_BIT + beq slow_path +fastcall_try: + tst r10, #_TLF_OOB + beq slow_path + mov r0, sp @ regs + bl handle_oob_syscall + ldr r10, [tsk, #TI_LOCAL_FLAGS] + tst r0, r0 + beq slow_path + tst r10, #_TLF_OOB + bne fastcall_exit_check @ check for MAYDAY + bl sync_inband_irqs + b ret_slow_syscall +fastcall_exit_check: + ldr r10, [tsk, #TI_FLAGS] + tst r10, #_TIF_MAYDAY + beq fast_ret_to_user + mov r0, sp @ regs + bl dovetail_call_mayday + b fast_ret_to_user +slow_path: + tst r10, #_TLF_DOVETAIL + bne pipeline_syscall +#ifdef CONFIG_DOVETAIL_LEGACY_SYSCALL_RANGE + ldr r0, =#__OOB_SYSCALL_BIT + ands r0, scno, r0 + bne pipeline_syscall +#endif + cmp scno, #__NR_prctl + bne root_syscall + ldr r0, [sp, #S_OLD_R0] + tst r0, #__OOB_SYSCALL_BIT + beq root_syscall +pipeline_syscall: + mov r0, sp @ regs + bl __pipeline_syscall + ldr r10, [tsk, #TI_LOCAL_FLAGS] + tst r10, #_TLF_OOB + bne fast_ret_to_user + cmp r0, #0 + bgt ret_slow_syscall +root_syscall: + ldmia sp, { r0 - r3 } +#endif /* CONFIG_DOVETAIL */ + ldr r10, [tsk, #TI_FLAGS] @ check for syscall tracing stmdb sp!, {r4, r5} @ push fifth and sixth args diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/entry-header.S linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/entry-header.S --- linux-5.15.26/arch/arm/kernel/entry-header.S 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/entry-header.S 2022-03-10 09:47:50.000000000 +0100 @@ -203,15 +203,21 @@ .macro svc_exit, rpsr, irq = 0 .if \irq != 0 @ IRQs already off -#ifdef CONFIG_TRACE_IRQFLAGS @ The parent context IRQs must have been enabled to get here in @ the first place, so there's no point checking the PSR I bit. +#ifdef CONFIG_IRQ_PIPELINE + mov r0, sp + bl kentry_exit_pipelined +#elif defined(CONFIG_TRACE_IRQFLAGS) bl trace_hardirqs_on #endif .else @ IRQs off again before pulling preserved data off the stack disable_irq_notrace -#ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_IRQ_PIPELINE + mov r0, sp + bl kentry_exit_pipelined +#elif defined(CONFIG_TRACE_IRQFLAGS) tst \rpsr, #PSR_I_BIT bleq trace_hardirqs_on tst \rpsr, #PSR_I_BIT @@ -402,6 +408,20 @@ .endm /* + * Branch to the exception epilogue, skipping the in-band work + * if running over the out-of-band interrupt stage. + */ + .macro ret_to_user_pipelined, tmp +#ifdef CONFIG_IRQ_PIPELINE + ldr \tmp, [tsk, #TI_LOCAL_FLAGS] +THUMB( it ne) + tst \tmp, #_TLF_OOB + bne fast_ret_to_user +#endif + b ret_to_user + .endm + +/* * These are the registers used in the syscall handler, and allow us to * have in theory up to 7 arguments to a function - r0 to r6. * diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/irq.c linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/irq.c --- linux-5.15.26/arch/arm/kernel/irq.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/irq.c 2022-03-10 09:47:50.000000000 +0100 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -117,6 +118,14 @@ void __init init_IRQ(void) uniphier_cache_init(); } +#ifdef CONFIG_IRQ_PIPELINE +asmlinkage int __exception_irq_entry +handle_arch_irq_pipelined(struct pt_regs *regs) +{ + return handle_irq_pipelined(regs); +} +#endif + #ifdef CONFIG_SPARSE_IRQ int __init arch_probe_nr_irqs(void) { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/irq_pipeline.c linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/irq_pipeline.c --- linux-5.15.26/arch/arm/kernel/irq_pipeline.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/irq_pipeline.c 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2016 Philippe Gerum . + */ +#include +#include + +void arch_do_IRQ_pipelined(struct irq_desc *desc) +{ + struct pt_regs *regs = raw_cpu_ptr(&irq_pipeline.tick_regs); + struct pt_regs *old_regs = set_irq_regs(regs); + + irq_enter(); + handle_irq_desc(desc); + irq_exit(); + + set_irq_regs(old_regs); +} + +void __init arch_irq_pipeline_init(void) +{ + /* no per-arch init. */ +} diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/Makefile linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/Makefile --- linux-5.15.26/arch/arm/kernel/Makefile 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/Makefile 2022-03-10 09:47:50.000000000 +0100 @@ -92,6 +92,11 @@ head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_PATCH_PHYS_VIRT) += phys2virt.o +ifeq ($(CONFIG_DEBUG_LL),y) +obj-$(CONFIG_RAW_PRINTK) += raw_printk.o +endif + +obj-$(CONFIG_IRQ_PIPELINE) += irq_pipeline.o # This is executed very early using a temporary stack when no memory allocator # nor global data is available. Everything has to be allocated on the stack. diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/patch.c linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/patch.c --- linux-5.15.26/arch/arm/kernel/patch.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/patch.c 2022-03-10 09:47:50.000000000 +0100 @@ -17,7 +17,7 @@ struct patch { }; #ifdef CONFIG_MMU -static DEFINE_RAW_SPINLOCK(patch_lock); +static DEFINE_HARD_SPINLOCK(patch_lock); static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/process.c linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/process.c --- linux-5.15.26/arch/arm/kernel/process.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/process.c 2022-03-10 09:47:50.000000000 +0100 @@ -69,6 +69,7 @@ void arch_cpu_idle(void) arm_pm_idle(); else cpu_do_idle(); + hard_cond_local_irq_enable(); raw_local_irq_enable(); } @@ -441,3 +442,28 @@ int arch_setup_additional_pages(struct l return ret; } #endif + +#ifdef CONFIG_IRQ_PIPELINE + +/* + * When pipelining interrupts, we have to reconcile the hardware and + * the virtual states. Hard irqs are off on entry while the current + * stage has to be unstalled: fix this up by stalling the in-band + * stage on entry, unstalling on exit. + */ +asmlinkage void __sched arm_preempt_schedule_irq(void) +{ + WARN_ON_ONCE(irq_pipeline_debug() && test_inband_stall()); + stall_inband_nocheck(); + preempt_schedule_irq(); + unstall_inband_nocheck(); +} + +#else + +asmlinkage void __sched arm_preempt_schedule_irq(void) +{ + preempt_schedule_irq(); +} + +#endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/ptrace.c linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/ptrace.c --- linux-5.15.26/arch/arm/kernel/ptrace.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/ptrace.c 2022-03-10 09:47:50.000000000 +0100 @@ -206,7 +206,9 @@ void ptrace_break(struct pt_regs *regs) static int break_trap(struct pt_regs *regs, unsigned int instr) { + oob_trap_notify(ARM_TRAP_BREAK, regs); ptrace_break(regs); + oob_trap_unwind(ARM_TRAP_BREAK, regs); return 0; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/raw_printk.c linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/raw_printk.c --- linux-5.15.26/arch/arm/kernel/raw_printk.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/raw_printk.c 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,30 @@ +#include +#include +#include + +/* + * If both CONFIG_DEBUG_LL and CONFIG_RAW_PRINTK are set, create a + * console device sending the raw output to printascii(). + */ +void printascii(const char *s); + +static void raw_console_write(struct console *co, + const char *s, unsigned count) +{ + printascii(s); +} + +static struct console raw_console = { + .name = "rawcon", + .write_raw = raw_console_write, + .flags = CON_PRINTBUFFER | CON_ENABLED, + .index = -1, +}; + +static int __init raw_console_init(void) +{ + register_console(&raw_console); + + return 0; +} +console_initcall(raw_console_init); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/signal.c linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/signal.c --- linux-5.15.26/arch/arm/kernel/signal.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/signal.c 2022-03-10 09:47:50.000000000 +0100 @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -597,16 +598,36 @@ static int do_signal(struct pt_regs *reg return 0; } +static inline void do_retuser(void) +{ + unsigned int thread_flags; + + if (dovetailing()) { + thread_flags = current_thread_info()->flags; + if (thread_flags & _TIF_RETUSER) + inband_retuser_notify(); + } +} + asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) { + WARN_ON_ONCE(irq_pipeline_debug() && + (irqs_disabled() || running_oob())); + /* * The assembly code enters us with IRQs off, but it hasn't * informed the tracing code of that for efficiency reasons. * Update the trace code with the current status. */ - trace_hardirqs_off(); + if (!irqs_pipelined()) + trace_hardirqs_off(); do { + if (irqs_pipelined()) { + local_irq_disable(); + hard_cond_local_irq_enable(); + } + if (likely(thread_flags & _TIF_NEED_RESCHED)) { schedule(); } else { @@ -616,6 +637,7 @@ do_work_pending(struct pt_regs *regs, un if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) { int restart = do_signal(regs, syscall); if (unlikely(restart)) { + do_retuser(); /* * Restart without handlers. * Deal with it without leaving @@ -629,10 +651,16 @@ do_work_pending(struct pt_regs *regs, un } else { tracehook_notify_resume(regs); } + do_retuser(); } - local_irq_disable(); + hard_local_irq_disable(); + + /* RETUSER might have switched oob */ + if (!running_inband()) + break; + thread_flags = current_thread_info()->flags; - } while (thread_flags & _TIF_WORK_MASK); + } while (inband_irq_pending() || (thread_flags & _TIF_WORK_MASK)); return 0; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/smp.c linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/smp.c --- linux-5.15.26/arch/arm/kernel/smp.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/smp.c 2022-03-10 09:47:50.000000000 +0100 @@ -80,7 +80,7 @@ enum ipi_msg_type { MAX_IPI }; -static int ipi_irq_base __read_mostly; +int ipi_irq_base __read_mostly; static int nr_ipi __read_mostly = NR_IPI; static struct irq_desc *ipi_desc[MAX_IPI] __read_mostly; @@ -325,7 +325,7 @@ void arch_cpu_idle_dead(void) idle_task_exit(); - local_irq_disable(); + local_irq_disable_full(); /* * Flush the data out of the L1 cache for this CPU. This must be @@ -417,6 +417,13 @@ asmlinkage void secondary_start_kernel(v local_flush_tlb_all(); /* + * irq_pipeline: debug_smp_processor_id() accesses percpu + * data. + */ + if (irqs_pipelined()) + set_my_cpu_offset(per_cpu_offset(raw_smp_processor_id())); + + /* * All kernel threads share the same mm context; grab a * reference and switch to it. */ @@ -459,7 +466,7 @@ asmlinkage void secondary_start_kernel(v complete(&cpu_running); - local_irq_enable(); + local_irq_enable_full(); local_fiq_enable(); local_abt_enable(); @@ -534,6 +541,8 @@ static const char *ipi_types[NR_IPI] __t static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); +static unsigned int get_ipi_count(struct irq_desc *desc, unsigned int cpu); + void show_ipi_list(struct seq_file *p, int prec) { unsigned int cpu, i; @@ -545,7 +554,7 @@ void show_ipi_list(struct seq_file *p, i seq_printf(p, "%*s%u: ", prec - 1, "IPI", i); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu)); + seq_printf(p, "%10u ", get_ipi_count(ipi_desc[i], cpu)); seq_printf(p, " %s\n", ipi_types[i]); } @@ -598,7 +607,7 @@ static void ipi_cpu_stop(unsigned int cp set_cpu_online(cpu, false); local_fiq_disable(); - local_irq_disable(); + local_irq_disable_full(); while (1) { cpu_relax(); @@ -687,6 +696,12 @@ void handle_IPI(int ipinr, struct pt_reg { struct pt_regs *old_regs = set_irq_regs(regs); + /* + * We don't support legacy IPI delivery when pipelining + * interrupts. + */ + WARN_ON_ONCE(irqs_pipelined()); + irq_enter(); do_handle_IPI(ipinr); irq_exit(); @@ -694,6 +709,74 @@ void handle_IPI(int ipinr, struct pt_reg set_irq_regs(old_regs); } +static void __smp_cross_call(const struct cpumask *target, unsigned int ipinr) +{ + trace_ipi_raise(target, ipi_types[ipinr]); + __ipi_send_mask(ipi_desc[ipinr], target); +} + +#ifdef CONFIG_IRQ_PIPELINE + +static DEFINE_PER_CPU(unsigned long, ipi_messages); + +static DEFINE_PER_CPU(unsigned int [MAX_IPI], ipi_counts); + +static irqreturn_t ipi_handler(int irq, void *data) +{ + unsigned long *pmsg; + unsigned int ipinr; + + /* + * Decode in-band IPIs (0..MAX_IPI - 1) multiplexed over + * SGI0. Out-of-band IPIs (SGI1, SGI2) have their own + * individual handler. + */ + pmsg = raw_cpu_ptr(&ipi_messages); + while (*pmsg) { + ipinr = ffs(*pmsg) - 1; + clear_bit(ipinr, pmsg); + __this_cpu_inc(ipi_counts[ipinr]); + do_handle_IPI(ipinr); + } + + return IRQ_HANDLED; +} + +static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) +{ + unsigned int cpu; + + /* regular in-band IPI (multiplexed over SGI0). */ + for_each_cpu(cpu, target) + set_bit(ipinr, &per_cpu(ipi_messages, cpu)); + + wmb(); + __smp_cross_call(target, 0); +} + +static unsigned int get_ipi_count(struct irq_desc *desc, unsigned int cpu) +{ + unsigned int irq = irq_desc_get_irq(desc); + return per_cpu(ipi_counts[irq - ipi_irq_base], cpu); +} + +void irq_send_oob_ipi(unsigned int irq, + const struct cpumask *cpumask) +{ + unsigned int sgi = irq - ipi_irq_base; + + if (WARN_ON(irq_pipeline_debug() && + (sgi < OOB_IPI_OFFSET || + sgi >= OOB_IPI_OFFSET + OOB_NR_IPI))) + return; + + /* Out-of-band IPI (SGI1-2). */ + __smp_cross_call(cpumask, sgi); +} +EXPORT_SYMBOL_GPL(irq_send_oob_ipi); + +#else + static irqreturn_t ipi_handler(int irq, void *data) { do_handle_IPI(irq - ipi_irq_base); @@ -702,10 +785,16 @@ static irqreturn_t ipi_handler(int irq, static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) { - trace_ipi_raise_rcuidle(target, ipi_types[ipinr]); - __ipi_send_mask(ipi_desc[ipinr], target); + __smp_cross_call(target, ipinr); +} + +static unsigned int get_ipi_count(struct irq_desc *desc, unsigned int cpu) +{ + return irq_desc_kstat_cpu(desc, cpu); } +#endif /* CONFIG_IRQ_PIPELINE */ + static void ipi_setup(int cpu) { int i; @@ -719,18 +808,25 @@ static void ipi_setup(int cpu) void __init set_smp_ipi_range(int ipi_base, int n) { - int i; + int i, inband_nr_ipi; WARN_ON(n < MAX_IPI); nr_ipi = min(n, MAX_IPI); + /* + * irq_pipeline: the in-band stage traps SGI0 only, + * over which IPI messages are mutiplexed. Other SGIs + * are available for exchanging out-of-band IPIs. + */ + inband_nr_ipi = irqs_pipelined() ? 1 : nr_ipi; for (i = 0; i < nr_ipi; i++) { - int err; - - err = request_percpu_irq(ipi_base + i, ipi_handler, - "IPI", &irq_stat); - WARN_ON(err); + if (i < inband_nr_ipi) { + int err; + err = request_percpu_irq(ipi_base + i, ipi_handler, + "IPI", &irq_stat); + WARN_ON(err); + } ipi_desc[i] = irq_to_desc(ipi_base + i); irq_set_status_flags(ipi_base + i, IRQ_HIDDEN); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/smp_twd.c linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/smp_twd.c --- linux-5.15.26/arch/arm/kernel/smp_twd.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/smp_twd.c 2022-03-10 09:47:50.000000000 +0100 @@ -31,7 +31,7 @@ static DEFINE_PER_CPU(bool, percpu_setup static struct clock_event_device __percpu *twd_evt; static unsigned int twd_features = - CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; + CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PIPELINE; static int twd_ppi; static int twd_shutdown(struct clock_event_device *clk) @@ -182,7 +182,7 @@ static irqreturn_t twd_handler(int irq, struct clock_event_device *evt = dev_id; if (twd_timer_ack()) { - evt->event_handler(evt); + clockevents_handle_event(evt); return IRQ_HANDLED; } @@ -279,7 +279,8 @@ static int __init twd_local_timer_common goto out_free; } - err = request_percpu_irq(twd_ppi, twd_handler, "twd", twd_evt); + err = __request_percpu_irq(twd_ppi, twd_handler, + IRQF_TIMER, "twd", twd_evt); if (err) { pr_err("twd: can't register interrupt %d (%d)\n", twd_ppi, err); goto out_free; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/traps.c linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/traps.c --- linux-5.15.26/arch/arm/kernel/traps.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/traps.c 2022-03-10 09:47:50.000000000 +0100 @@ -391,7 +391,7 @@ int is_valid_bugaddr(unsigned long pc) #endif static LIST_HEAD(undef_hook); -static DEFINE_RAW_SPINLOCK(undef_lock); +static DEFINE_HARD_SPINLOCK(undef_lock); void register_undef_hook(struct undef_hook *hook) { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/kernel/vdso.c linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/vdso.c --- linux-5.15.26/arch/arm/kernel/vdso.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/kernel/vdso.c 2022-03-10 09:47:50.000000000 +0100 @@ -32,7 +32,10 @@ static struct page **vdso_text_pagelist; extern char vdso_start[], vdso_end[]; -/* Total number of pages needed for the data and text portions of the VDSO. */ +/* + * Total number of pages needed for the data, private and text + * portions of the VDSO. + */ unsigned int vdso_total_pages __ro_after_init; /* @@ -171,8 +174,10 @@ static void __init patch_vdso(void *ehdr /* If the virtual counter is absent or non-functional we don't * want programs to incur the slight additional overhead of * dispatching through the VDSO only to fall back to syscalls. + * However, if clocksources supporting generic MMIO access can + * be reached via the vDSO, keep this fast path enabled. */ - if (!cntvct_ok) { + if (!cntvct_ok && !IS_ENABLED(CONFIG_GENERIC_CLOCKSOURCE_VDSO)) { vdso_nullpatch_one(&einfo, "__vdso_gettimeofday"); vdso_nullpatch_one(&einfo, "__vdso_clock_gettime"); vdso_nullpatch_one(&einfo, "__vdso_clock_gettime64"); @@ -210,17 +215,27 @@ static int __init vdso_init(void) vdso_text_mapping.pages = vdso_text_pagelist; - vdso_total_pages = 1; /* for the data/vvar page */ + vdso_total_pages = 2; /* for the data/vvar and vpriv pages */ vdso_total_pages += text_pages; cntvct_ok = cntvct_functional(); patch_vdso(vdso_start); +#ifdef CONFIG_GENERIC_CLOCKSOURCE_VDSO + vdso_data->cs_type_seq = CLOCKSOURCE_VDSO_NONE << 16 | 1; +#endif return 0; } arch_initcall(vdso_init); +static int install_vpriv(struct mm_struct *mm, unsigned long addr) +{ + return mmap_region(NULL, addr, PAGE_SIZE, + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, + 0, NULL) != addr ? -EINVAL : 0; +} + static int install_vvar(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma; @@ -228,8 +243,13 @@ static int install_vvar(struct mm_struct vma = _install_special_mapping(mm, addr, PAGE_SIZE, VM_READ | VM_MAYREAD, &vdso_data_mapping); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + if (cache_is_vivt()) + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - return PTR_ERR_OR_ZERO(vma); + return vma->vm_start != addr ? -EINVAL : 0; } /* assumes mmap_lock is write-locked */ @@ -243,18 +263,29 @@ void arm_install_vdso(struct mm_struct * if (vdso_text_pagelist == NULL) return; - if (install_vvar(mm, addr)) + if (install_vpriv(mm, addr)) { + pr_err("cannot map VPRIV at expected address!\n"); return; + } + + /* Account for the private storage. */ + addr += PAGE_SIZE; + if (install_vvar(mm, addr)) { + WARN(1, "cannot map VVAR at expected address!\n"); + return; + } - /* Account for vvar page. */ + /* Account for vvar and vpriv pages. */ addr += PAGE_SIZE; - len = (vdso_total_pages - 1) << PAGE_SHIFT; + len = (vdso_total_pages - 2) << PAGE_SHIFT; vma = _install_special_mapping(mm, addr, len, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, &vdso_text_mapping); - if (!IS_ERR(vma)) + if (IS_ERR(vma) || vma->vm_start != addr) + WARN(1, "cannot map VDSO at expected address!\n"); + else mm->context.vdso = addr; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/mach-imx/gpc.c linux-dovetail-v5.15.y-dovetail/arch/arm/mach-imx/gpc.c --- linux-5.15.26/arch/arm/mach-imx/gpc.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/mach-imx/gpc.c 2022-03-10 09:47:50.000000000 +0100 @@ -62,28 +62,38 @@ void imx_gpc_set_l2_mem_power_in_lpm(boo void imx_gpc_pre_suspend(bool arm_power_off) { void __iomem *reg_imr1 = gpc_base + GPC_IMR1; + unsigned long flags; int i; /* Tell GPC to power off ARM core when suspend */ if (arm_power_off) imx_gpc_set_arm_power_in_lpm(arm_power_off); + flags = hard_cond_local_irq_save(); + for (i = 0; i < IMR_NUM; i++) { gpc_saved_imrs[i] = readl_relaxed(reg_imr1 + i * 4); writel_relaxed(~gpc_wake_irqs[i], reg_imr1 + i * 4); } + + hard_cond_local_irq_restore(flags); } void imx_gpc_post_resume(void) { void __iomem *reg_imr1 = gpc_base + GPC_IMR1; + unsigned long flags; int i; /* Keep ARM core powered on for other low-power modes */ imx_gpc_set_arm_power_in_lpm(false); + flags = hard_cond_local_irq_save(); + for (i = 0; i < IMR_NUM; i++) writel_relaxed(gpc_saved_imrs[i], reg_imr1 + i * 4); + + hard_cond_local_irq_restore(flags); } static int imx_gpc_irq_set_wake(struct irq_data *d, unsigned int on) @@ -105,21 +115,31 @@ static int imx_gpc_irq_set_wake(struct i void imx_gpc_mask_all(void) { void __iomem *reg_imr1 = gpc_base + GPC_IMR1; + unsigned long flags; int i; + flags = hard_cond_local_irq_save(); + for (i = 0; i < IMR_NUM; i++) { gpc_saved_imrs[i] = readl_relaxed(reg_imr1 + i * 4); writel_relaxed(~0, reg_imr1 + i * 4); } + + hard_cond_local_irq_restore(flags); } void imx_gpc_restore_all(void) { void __iomem *reg_imr1 = gpc_base + GPC_IMR1; + unsigned long flags; int i; + flags = hard_cond_local_irq_save(); + for (i = 0; i < IMR_NUM; i++) writel_relaxed(gpc_saved_imrs[i], reg_imr1 + i * 4); + + hard_cond_local_irq_restore(flags); } void imx_gpc_hwirq_unmask(unsigned int hwirq) @@ -167,6 +187,7 @@ static struct irq_chip imx_gpc_chip = { #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, #endif + .flags = IRQCHIP_PIPELINE_SAFE, }; static int imx_gpc_domain_translate(struct irq_domain *d, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/mm/alignment.c linux-dovetail-v5.15.y-dovetail/arch/arm/mm/alignment.c --- linux-5.15.26/arch/arm/mm/alignment.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/mm/alignment.c 2022-03-10 09:47:50.000000000 +0100 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -807,10 +808,12 @@ do_alignment(unsigned long addr, unsigne u16 tinstr = 0; int isize = 4; int thumb2_32b = 0; - int fault; + int fault, ret = 0; if (interrupts_enabled(regs)) - local_irq_enable(); + hard_local_irq_enable(); + + oob_trap_notify(ARM_TRAP_ALIGNMENT, regs); instrptr = instruction_pointer(regs); @@ -935,7 +938,7 @@ do_alignment(unsigned long addr, unsigne if (type == TYPE_LDST) do_alignment_finish_ldst(addr, instr, regs, offset); - return 0; + goto out; bad_or_fault: if (type == TYPE_ERROR) @@ -944,7 +947,7 @@ do_alignment(unsigned long addr, unsigne * We got a fault - fix it up, or die. */ do_bad_area(addr, fsr, regs); - return 0; + goto out; swp: pr_err("Alignment trap: not handling swp instruction\n"); @@ -958,7 +961,8 @@ do_alignment(unsigned long addr, unsigne isize << 1, isize == 2 ? tinstr : instr, instrptr); ai_skipped += 1; - return 1; + ret = 1; + goto out; user: ai_user += 1; @@ -989,12 +993,15 @@ do_alignment(unsigned long addr, unsigne * entry-common.S) and disable the alignment trap only if * there is no work pending for this thread. */ - raw_local_irq_disable(); + hard_local_irq_disable(); if (!(current_thread_info()->flags & _TIF_WORK_MASK)) set_cr(cr_no_alignment); } - return 0; +out: + oob_trap_unwind(ARM_TRAP_ALIGNMENT, regs); + + return ret; } static int __init noalign_setup(char *__unused) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/mm/cache-l2x0.c linux-dovetail-v5.15.y-dovetail/arch/arm/mm/cache-l2x0.c --- linux-5.15.26/arch/arm/mm/cache-l2x0.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/mm/cache-l2x0.c 2022-03-10 09:47:50.000000000 +0100 @@ -38,7 +38,7 @@ struct l2c_init_data { static void __iomem *l2x0_base; static const struct l2c_init_data *l2x0_data; -static DEFINE_RAW_SPINLOCK(l2x0_lock); +static DEFINE_HARD_SPINLOCK(l2x0_lock); static u32 l2x0_way_mask; /* Bitmask of active ways */ static u32 l2x0_size; static unsigned long sync_reg_offset = L2X0_CACHE_SYNC; @@ -48,6 +48,19 @@ struct l2x0_regs l2x0_saved_regs; static bool l2x0_bresp_disable; static bool l2x0_flz_disable; +#ifdef CONFIG_IRQ_PIPELINE +#define CACHE_RANGE_ATOMIC_MAX 512UL +static int l2x0_wa = -1; +static int __init l2x0_setup_wa(char *str) +{ + l2x0_wa = !!simple_strtol(str, NULL, 0); + return 0; +} +early_param("l2x0_write_allocate", l2x0_setup_wa); +#else +#define CACHE_RANGE_ATOMIC_MAX 4096UL +#endif + /* * Common code for all cache controllers. */ @@ -120,11 +133,11 @@ static void l2c_enable(void __iomem *bas l2x0_data->unlock(base, num_lock); - local_irq_save(flags); + flags = hard_local_irq_save(); __l2c_op_way(base + L2X0_INV_WAY); writel_relaxed(0, base + sync_reg_offset); l2c_wait_mask(base + sync_reg_offset, 1); - local_irq_restore(flags); + hard_local_irq_restore(flags); l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL); } @@ -225,7 +238,7 @@ static void l2c210_flush_all(void) { void __iomem *base = l2x0_base; - BUG_ON(!irqs_disabled()); + BUG_ON(!hard_irqs_disabled()); __l2c_op_way(base + L2X0_CLEAN_INV_WAY); __l2c210_cache_sync(base); @@ -284,10 +297,10 @@ static void l2c220_op_way(void __iomem * static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start, unsigned long end, unsigned long flags) { - raw_spinlock_t *lock = &l2x0_lock; + typeof(l2x0_lock) *lock = &l2x0_lock; while (start < end) { - unsigned long blk_end = start + min(end - start, 4096UL); + unsigned long blk_end = start + min(end - start, CACHE_RANGE_ATOMIC_MAX); while (start < blk_end) { l2c_wait_mask(reg, 1); @@ -498,13 +511,13 @@ static void l2c310_inv_range_erratum(uns static void l2c310_flush_range_erratum(unsigned long start, unsigned long end) { - raw_spinlock_t *lock = &l2x0_lock; + typeof(l2x0_lock) *lock = &l2x0_lock; unsigned long flags; void __iomem *base = l2x0_base; raw_spin_lock_irqsave(lock, flags); while (start < end) { - unsigned long blk_end = start + min(end - start, 4096UL); + unsigned long blk_end = start + min(end - start, CACHE_RANGE_ATOMIC_MAX); l2c_set_debug(base, 0x03); while (start < blk_end) { @@ -800,6 +813,24 @@ static int __init __l2c_init(const struc if (aux_val & aux_mask) pr_alert("L2C: platform provided aux values permit register corruption.\n"); +#ifdef CONFIG_IRQ_PIPELINE + if (!l2x0_wa) { + /* + * Disable WA by setting bit 23 in the auxiliary + * control register. + */ + aux_mask &= ~L220_AUX_CTRL_FWA_MASK; + aux_val &= ~L220_AUX_CTRL_FWA_MASK; + aux_val |= 1 << L220_AUX_CTRL_FWA_SHIFT; + pr_warn("%s: irq_pipeline: write-allocate disabled via command line\n", + data->type); + } else if ((cache_id & L2X0_CACHE_ID_PART_MASK) == L2X0_CACHE_ID_PART_L220 || + ((cache_id & L2X0_CACHE_ID_PART_MASK) == L2X0_CACHE_ID_PART_L310 && + (cache_id & L2X0_CACHE_ID_RTL_MASK) < L310_CACHE_ID_RTL_R3P2)) + pr_alert("%s: irq_pipeline: write-allocate enabled, may induce high latency\n", + data->type); +#endif + old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); aux &= aux_mask; aux |= aux_val; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/mm/context.c linux-dovetail-v5.15.y-dovetail/arch/arm/mm/context.c --- linux-5.15.26/arch/arm/mm/context.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/mm/context.c 2022-03-10 09:47:50.000000000 +0100 @@ -39,7 +39,7 @@ #define ASID_FIRST_VERSION (1ULL << ASID_BITS) #define NUM_USER_ASIDS ASID_FIRST_VERSION -static DEFINE_RAW_SPINLOCK(cpu_asid_lock); +static DEFINE_HARD_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); @@ -237,9 +237,12 @@ static u64 new_context(struct mm_struct void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) { unsigned long flags; - unsigned int cpu = smp_processor_id(); + unsigned int cpu = raw_smp_processor_id(); + bool need_flush; u64 asid; + WARN_ON_ONCE(dovetail_debug() && !hard_irqs_disabled()); + if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) __check_vmalloc_seq(mm); @@ -263,15 +266,16 @@ void check_and_switch_context(struct mm_ atomic64_set(&mm->context.id, asid); } - if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { - local_flush_bp_all(); - local_flush_tlb_all(); - } - + need_flush = cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending); atomic64_set(&per_cpu(active_asids, cpu), asid); cpumask_set_cpu(cpu, mm_cpumask(mm)); raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + if (need_flush) { + local_flush_bp_all(); + local_flush_tlb_all(); + } + switch_mm_fastpath: cpu_switch_mm(mm->pgd, mm); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/mm/fault.c linux-dovetail-v5.15.y-dovetail/arch/arm/mm/fault.c --- linux-5.15.26/arch/arm/mm/fault.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/mm/fault.c 2022-03-10 09:47:50.000000000 +0100 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -21,11 +22,71 @@ #include #include #include +#include +#define CREATE_TRACE_POINTS +#include #include "fault.h" #ifdef CONFIG_MMU +#ifdef CONFIG_IRQ_PIPELINE +/* + * We need to synchronize the virtual interrupt state with the hard + * interrupt state we received on entry, then turn hardirqs back on to + * allow code which does not require strict serialization to be + * preempted by an out-of-band activity. + */ +static inline +unsigned long fault_entry(int exception, struct pt_regs *regs) +{ + unsigned long flags; + + trace_ARM_trap_entry(exception, regs); + + flags = hard_local_save_flags(); + + oob_trap_notify(exception, regs); + + /* + * CAUTION: The co-kernel might have to demote the current + * context to the in-band stage as a result of handling this + * trap, returning with hard irqs on. We expect stall_inband() + * to complain loudly if we are still running oob afterwards. + */ + if (raw_irqs_disabled_flags(flags)) { + stall_inband(); + trace_hardirqs_off(); + } + + hard_local_irq_enable(); + + return flags; +} + +static inline +void fault_exit(int exception, struct pt_regs *regs, + unsigned long flags) +{ + WARN_ON_ONCE(irq_pipeline_debug() && hard_irqs_disabled()); + + /* + * We expect kentry_exit_pipelined() to clear the stall bit if + * kentry_enter_pipelined() observed it that way. + */ + oob_trap_unwind(exception, regs); + trace_ARM_trap_exit(exception, regs); + hard_local_irq_restore(flags); +} + +#else /* !CONFIG_IRQ_PIPELINE */ + +#define fault_entry(__exception, __regs) ({ 0; }) +#define fault_exit(__exception, __regs, __flags) \ + do { (void)(__flags); } while (0) + +#endif /* !CONFIG_IRQ_PIPELINE */ + /* * This is useful to dump out the page tables associated with * 'addr' in mm 'mm'. @@ -96,6 +157,15 @@ void show_pte(const char *lvl, struct mm pr_cont("\n"); } #else /* CONFIG_MMU */ +unsigned long fault_entry(int exception, struct pt_regs *regs) +{ + return 0; +} + +static inline void fault_exit(int exception, struct pt_regs *regs, + unsigned long combo) +{ } + void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) { } #endif /* CONFIG_MMU */ @@ -116,6 +186,7 @@ __do_kernel_fault(struct mm_struct *mm, /* * No handler, we'll have to terminate things with extreme prejudice. */ + irq_pipeline_oops(); bust_spinlocks(1); pr_alert("8<--- cut here ---\n"); pr_alert("Unable to handle kernel %s at virtual address %08lx\n", @@ -168,14 +239,22 @@ void do_bad_area(unsigned long addr, uns { struct task_struct *tsk = current; struct mm_struct *mm = tsk->active_mm; + unsigned long irqflags; /* * If we are in kernel mode at this point, we * have no context to handle this fault with. */ - if (user_mode(regs)) + if (user_mode(regs)) { + irqflags = fault_entry(ARM_TRAP_ACCESS, regs); __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs); - else + fault_exit(ARM_TRAP_ACCESS, regs, irqflags); + } else + /* + * irq_pipeline: kernel faults are either quickly + * recoverable via fixup, or lethal. In both cases, we + * can skip the interrupt state synchronization. + */ __do_kernel_fault(mm, addr, fsr, regs); } @@ -244,9 +323,12 @@ do_page_fault(unsigned long addr, unsign int sig, code; vm_fault_t fault; unsigned int flags = FAULT_FLAG_DEFAULT; + unsigned long irqflags; + + irqflags = fault_entry(ARM_TRAP_ACCESS, regs); if (kprobe_page_fault(regs, fsr)) - return 0; + goto out; tsk = current; mm = tsk->mm; @@ -302,7 +384,7 @@ retry: if (fault_signal_pending(fault, regs)) { if (!user_mode(regs)) goto no_context; - return 0; + goto out; } if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) { @@ -318,7 +400,7 @@ retry: * Handle the "normal" case first - VM_FAULT_MAJOR */ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) - return 0; + goto out; /* * If we are in kernel mode at this point, we @@ -334,7 +416,7 @@ retry: * got oom-killed) */ pagefault_out_of_memory(); - return 0; + goto out; } if (fault & VM_FAULT_SIGBUS) { @@ -355,10 +437,13 @@ retry: } __do_user_fault(addr, fsr, sig, code, regs); - return 0; + goto out; no_context: __do_kernel_fault(mm, addr, fsr, regs); +out: + fault_exit(ARM_TRAP_ACCESS, regs, irqflags); + return 0; } #else /* CONFIG_MMU */ @@ -397,6 +482,8 @@ do_translation_fault(unsigned long addr, pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; + WARN_ON_ONCE(irqs_pipelined() && !hard_irqs_disabled()); + if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); @@ -470,7 +557,11 @@ do_translation_fault(unsigned long addr, static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + unsigned long irqflags; + + irqflags = fault_entry(ARM_TRAP_SECTION, regs); do_bad_area(addr, fsr, regs); + fault_exit(ARM_TRAP_SECTION, regs, irqflags); return 0; } #endif /* CONFIG_ARM_LPAE */ @@ -518,10 +609,12 @@ asmlinkage void do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { const struct fsr_info *inf = fsr_info + fsr_fs(fsr); + unsigned long irqflags; if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) return; + irqflags = fault_entry(ARM_TRAP_DABT, regs); pr_alert("8<--- cut here ---\n"); pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n", inf->name, fsr, addr); @@ -529,6 +622,7 @@ do_DataAbort(unsigned long addr, unsigne arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr, fsr, 0); + fault_exit(ARM_TRAP_DABT, regs, irqflags); } void __init @@ -548,15 +642,18 @@ asmlinkage void do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) { const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr); + unsigned long irqflags; if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs)) return; + irqflags = fault_entry(ARM_TRAP_PABT, regs); pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n", inf->name, ifsr, addr); arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr, ifsr, 0); + fault_exit(ARM_TRAP_PABT, regs, irqflags); } /* diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/vdso/datapage.S linux-dovetail-v5.15.y-dovetail/arch/arm/vdso/datapage.S --- linux-5.15.26/arch/arm/vdso/datapage.S 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/vdso/datapage.S 2022-03-10 09:47:50.000000000 +0100 @@ -5,6 +5,8 @@ .align 2 .L_vdso_data_ptr: .long _start - . - VDSO_DATA_SIZE +.L_vdso_priv_ptr: + .long _start - . - VDSO_DATA_SIZE - VDSO_PRIV_SIZE ENTRY(__get_datapage) .fnstart @@ -14,3 +16,12 @@ ENTRY(__get_datapage) bx lr .fnend ENDPROC(__get_datapage) + +ENTRY(__get_privpage) + .fnstart + adr r0, .L_vdso_priv_ptr + ldr r1, [r0] + add r0, r0, r1 + bx lr + .fnend +ENDPROC(__get_privpage) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/vfp/entry.S linux-dovetail-v5.15.y-dovetail/arch/arm/vfp/entry.S --- linux-5.15.26/arch/arm/vfp/entry.S 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/vfp/entry.S 2022-03-10 09:47:50.000000000 +0100 @@ -23,6 +23,7 @@ @ ENTRY(do_vfp) inc_preempt_count r10, r4 + disable_irq_if_pipelined ldr r4, .LCvfp ldr r11, [r10, #TI_CPU] @ CPU number add r10, r10, #TI_VFPSTATE @ r10 = workspace @@ -30,6 +31,7 @@ ENTRY(do_vfp) ENDPROC(do_vfp) ENTRY(vfp_null_entry) + enable_irq_if_pipelined dec_preempt_count_ti r10, r4 ret lr ENDPROC(vfp_null_entry) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/vfp/vfphw.S linux-dovetail-v5.15.y-dovetail/arch/arm/vfp/vfphw.S --- linux-5.15.26/arch/arm/vfp/vfphw.S 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/vfp/vfphw.S 2022-03-10 09:47:50.000000000 +0100 @@ -170,6 +170,7 @@ vfp_hw_state_valid: @ out before setting an FPEXC that @ stops us reading stuff VFPFMXR FPEXC, r1 @ Restore FPEXC last + enable_irq_if_pipelined sub r2, r2, #4 @ Retry current instruction - if Thumb str r2, [sp, #S_PC] @ mode it's two 16-bit instructions, @ else it's one 32-bit instruction, so @@ -199,6 +200,7 @@ skip: @ Fall into hand on to next handler - appropriate coproc instr @ not recognised by VFP + enable_irq_if_pipelined DBGSTR "not VFP" dec_preempt_count_ti r10, r4 ret lr diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm/vfp/vfpmodule.c linux-dovetail-v5.15.y-dovetail/arch/arm/vfp/vfpmodule.c --- linux-5.15.26/arch/arm/vfp/vfpmodule.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm/vfp/vfpmodule.c 2022-03-10 09:47:50.000000000 +0100 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -90,6 +91,7 @@ static void vfp_force_reload(unsigned in static void vfp_thread_flush(struct thread_info *thread) { union vfp_state *vfp = &thread->vfpstate; + unsigned long flags; unsigned int cpu; /* @@ -100,11 +102,11 @@ static void vfp_thread_flush(struct thre * Do this first to ensure that preemption won't overwrite our * state saving should access to the VFP be enabled at this point. */ - cpu = get_cpu(); + cpu = hard_get_cpu(flags); if (vfp_current_hw_state[cpu] == vfp) vfp_current_hw_state[cpu] = NULL; fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); - put_cpu(); + hard_put_cpu(flags); memset(vfp, 0, sizeof(union vfp_state)); @@ -119,11 +121,12 @@ static void vfp_thread_exit(struct threa { /* release case: Per-thread VFP cleanup. */ union vfp_state *vfp = &thread->vfpstate; - unsigned int cpu = get_cpu(); + unsigned long flags; + unsigned int cpu = hard_get_cpu(flags); if (vfp_current_hw_state[cpu] == vfp) vfp_current_hw_state[cpu] = NULL; - put_cpu(); + hard_put_cpu(flags); } static void vfp_thread_copy(struct thread_info *thread) @@ -159,6 +162,7 @@ static void vfp_thread_copy(struct threa static int vfp_notifier(struct notifier_block *self, unsigned long cmd, void *v) { struct thread_info *thread = v; + unsigned long flags; u32 fpexc; #ifdef CONFIG_SMP unsigned int cpu; @@ -166,6 +170,7 @@ static int vfp_notifier(struct notifier_ switch (cmd) { case THREAD_NOTIFY_SWITCH: + flags = hard_cond_local_irq_save(); fpexc = fmrx(FPEXC); #ifdef CONFIG_SMP @@ -185,6 +190,7 @@ static int vfp_notifier(struct notifier_ * old state. */ fmxr(FPEXC, fpexc & ~FPEXC_EN); + hard_cond_local_irq_restore(flags); break; case THREAD_NOTIFY_FLUSH: @@ -322,7 +328,7 @@ static u32 vfp_emulate_instruction(u32 i */ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) { - u32 fpscr, orig_fpscr, fpsid, exceptions; + u32 fpscr, orig_fpscr, fpsid, exceptions, next_trigger = 0; pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc); @@ -352,6 +358,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, /* * Synchronous exception, emulate the trigger instruction */ + hard_cond_local_irq_enable(); goto emulate; } @@ -364,7 +371,18 @@ void VFP_bounce(u32 trigger, u32 fpexc, trigger = fmrx(FPINST); regs->ARM_pc -= 4; #endif - } else if (!(fpexc & FPEXC_DEX)) { + if (fpexc & FPEXC_FP2V) { + /* + * The barrier() here prevents fpinst2 being read + * before the condition above. + */ + barrier(); + next_trigger = fmrx(FPINST2); + } + } + hard_cond_local_irq_enable(); + + if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) { /* * Illegal combination of bits. It can be caused by an * unallocated VFP instruction but with FPSCR.IXE set and not @@ -404,18 +422,14 @@ void VFP_bounce(u32 trigger, u32 fpexc, if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V)) goto exit; - /* - * The barrier() here prevents fpinst2 being read - * before the condition above. - */ - barrier(); - trigger = fmrx(FPINST2); + trigger = next_trigger; emulate: exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs); if (exceptions) vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); exit: + hard_cond_local_irq_enable(); preempt_enable(); } @@ -515,7 +529,8 @@ static inline void vfp_pm_init(void) { } */ void vfp_sync_hwstate(struct thread_info *thread) { - unsigned int cpu = get_cpu(); + unsigned long flags; + unsigned int cpu = hard_get_cpu(flags); if (vfp_state_in_hw(cpu, thread)) { u32 fpexc = fmrx(FPEXC); @@ -528,17 +543,18 @@ void vfp_sync_hwstate(struct thread_info fmxr(FPEXC, fpexc); } - put_cpu(); + hard_put_cpu(flags); } /* Ensure that the thread reloads the hardware VFP state on the next use. */ void vfp_flush_hwstate(struct thread_info *thread) { - unsigned int cpu = get_cpu(); + unsigned long flags; + unsigned int cpu = hard_get_cpu(flags); vfp_force_reload(cpu, thread); - put_cpu(); + hard_put_cpu(flags); } /* diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b-nobt.dts linux-dovetail-v5.15.y-dovetail/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b-nobt.dts --- linux-5.15.26/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b-nobt.dts 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b-nobt.dts 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,12 @@ +/dts-v1/; +#include "bcm2837-rpi-3-b.dts" + +&uart0 { + status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&uart0_gpio32>; +}; + +&uart1 { + status = "disabled"; +}; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/boot/dts/broadcom/Makefile linux-dovetail-v5.15.y-dovetail/arch/arm64/boot/dts/broadcom/Makefile --- linux-5.15.26/arch/arm64/boot/dts/broadcom/Makefile 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/boot/dts/broadcom/Makefile 2022-03-10 09:47:50.000000000 +0100 @@ -4,6 +4,7 @@ dtb-$(CONFIG_ARCH_BCM2835) += bcm2711-rp bcm2837-rpi-3-a-plus.dtb \ bcm2837-rpi-3-b.dtb \ bcm2837-rpi-3-b-plus.dtb \ + bcm2837-rpi-3-b-nobt.dtb \ bcm2837-rpi-cm3-io3.dtb subdir-y += bcm4908 diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/include/asm/daifflags.h linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/daifflags.h --- linux-5.15.26/arch/arm64/include/asm/daifflags.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/daifflags.h 2022-03-10 09:47:50.000000000 +0100 @@ -12,6 +12,12 @@ #include #include +/* + * irq_pipeline: DAIF masking is only used in contexts where hard + * interrupt masking applies, so no need to virtualize for the inband + * stage here (the pipeline core does assume this). + */ + #define DAIF_PROCCTX 0 #define DAIF_PROCCTX_NOIRQ (PSR_I_BIT | PSR_F_BIT) #define DAIF_ERRCTX (PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) @@ -35,7 +41,7 @@ static inline void local_daif_mask(void) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - trace_hardirqs_off(); + trace_hardirqs_off_pipelined(); } static inline unsigned long local_daif_save_flags(void) @@ -72,7 +78,7 @@ static inline void local_daif_restore(un (read_sysreg(daif) & (PSR_I_BIT | PSR_F_BIT)) != (PSR_I_BIT | PSR_F_BIT)); if (!irq_disabled) { - trace_hardirqs_on(); + trace_hardirqs_on_pipelined(); if (system_uses_irq_prio_masking()) { gic_write_pmr(GIC_PRIO_IRQON); @@ -117,7 +123,7 @@ static inline void local_daif_restore(un write_sysreg(flags, daif); if (irq_disabled) - trace_hardirqs_off(); + trace_hardirqs_off_pipelined(); } /* @@ -129,7 +135,7 @@ static inline void local_daif_inherit(st unsigned long flags = regs->pstate & DAIF_MASK; if (interrupts_enabled(regs)) - trace_hardirqs_on(); + trace_hardirqs_on_pipelined(); if (system_uses_irq_prio_masking()) gic_write_pmr(regs->pmr_save); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/include/asm/dovetail.h linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/dovetail.h --- linux-5.15.26/arch/arm64/include/asm/dovetail.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/dovetail.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2018 Philippe Gerum . + */ +#ifndef _ASM_ARM64_DOVETAIL_H +#define _ASM_ARM64_DOVETAIL_H + +#include + +/* ARM64 traps */ +#define ARM64_TRAP_ACCESS 0 /* Data or instruction access exception */ +#define ARM64_TRAP_ALIGN 1 /* SP/PC alignment abort */ +#define ARM64_TRAP_SEA 2 /* Synchronous external abort */ +#define ARM64_TRAP_DEBUG 3 /* Debug trap */ +#define ARM64_TRAP_UNDI 4 /* Undefined instruction */ +#define ARM64_TRAP_UNDSE 5 /* Undefined synchronous exception */ +#define ARM64_TRAP_FPE 6 /* FPSIMD exception */ +#define ARM64_TRAP_SVE 7 /* SVE access trap */ +#define ARM64_TRAP_BTI 8 /* Branch target identification */ + +#ifdef CONFIG_DOVETAIL + +static inline void arch_dovetail_exec_prepare(void) +{ } + +static inline void arch_dovetail_switch_prepare(bool leave_inband) +{ } + +static inline void arch_dovetail_switch_finish(bool enter_inband) +{ + fpsimd_restore_current_oob(); +} + +#endif + +#endif /* _ASM_ARM64_DOVETAIL_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/include/asm/efi.h linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/efi.h --- linux-5.15.26/arch/arm64/include/asm/efi.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/efi.h 2022-03-10 09:47:50.000000000 +0100 @@ -102,6 +102,10 @@ static inline void free_screen_info(stru static inline void efi_set_pgd(struct mm_struct *mm) { + unsigned long flags; + + protect_inband_mm(flags); + __switch_mm(mm); if (system_uses_ttbr0_pan()) { @@ -126,6 +130,8 @@ static inline void efi_set_pgd(struct mm update_saved_ttbr0(current, current->active_mm); } } + + unprotect_inband_mm(flags); } void efi_virtmap_load(void); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/include/asm/fpsimd.h linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/fpsimd.h --- linux-5.15.26/arch/arm64/include/asm/fpsimd.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/fpsimd.h 2022-03-10 09:47:50.000000000 +0100 @@ -43,6 +43,7 @@ extern void fpsimd_flush_thread(void); extern void fpsimd_signal_preserve_current_state(void); extern void fpsimd_preserve_current_state(void); extern void fpsimd_restore_current_state(void); +extern void fpsimd_restore_current_oob(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/include/asm/irqflags.h linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/irqflags.h --- linux-5.15.26/arch/arm64/include/asm/irqflags.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/irqflags.h 2022-03-10 09:47:50.000000000 +0100 @@ -10,6 +10,10 @@ #include #include +#define IRQMASK_I_BIT PSR_I_BIT +#define IRQMASK_I_POS 7 +#define IRQMASK_i_POS 31 + /* * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'daif' @@ -24,7 +28,7 @@ /* * CPU interrupt mask handling. */ -static inline void arch_local_irq_enable(void) +static inline void native_irq_enable(void) { if (system_has_prio_mask_debugging()) { u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); @@ -33,7 +37,7 @@ static inline void arch_local_irq_enable } asm volatile(ALTERNATIVE( - "msr daifclr, #3 // arch_local_irq_enable", + "msr daifclr, #3 // native_irq_enable", __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : @@ -43,7 +47,7 @@ static inline void arch_local_irq_enable pmr_sync(); } -static inline void arch_local_irq_disable(void) +static inline void native_irq_disable(void) { if (system_has_prio_mask_debugging()) { u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); @@ -52,7 +56,7 @@ static inline void arch_local_irq_disabl } asm volatile(ALTERNATIVE( - "msr daifset, #3 // arch_local_irq_disable", + "msr daifset, #3 // native_irq_disable", __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : @@ -60,10 +64,17 @@ static inline void arch_local_irq_disabl : "memory"); } +static inline void native_irq_sync(void) +{ + native_irq_enable(); + isb(); + native_irq_disable(); +} + /* * Save the current interrupt enable state. */ -static inline unsigned long arch_local_save_flags(void) +static inline unsigned long native_save_flags(void) { unsigned long flags; @@ -78,7 +89,7 @@ static inline unsigned long arch_local_s return flags; } -static inline int arch_irqs_disabled_flags(unsigned long flags) +static inline int native_irqs_disabled_flags(unsigned long flags) { int res; @@ -93,23 +104,18 @@ static inline int arch_irqs_disabled_fla return res; } -static inline int arch_irqs_disabled(void) -{ - return arch_irqs_disabled_flags(arch_local_save_flags()); -} - -static inline unsigned long arch_local_irq_save(void) +static inline unsigned long native_irq_save(void) { unsigned long flags; - flags = arch_local_save_flags(); + flags = native_save_flags(); /* * There are too many states with IRQs disabled, just keep the current * state if interrupts are already disabled/masked. */ - if (!arch_irqs_disabled_flags(flags)) - arch_local_irq_disable(); + if (!native_irqs_disabled_flags(flags)) + native_irq_disable(); return flags; } @@ -117,7 +123,7 @@ static inline unsigned long arch_local_i /* * restore saved IRQ state */ -static inline void arch_local_irq_restore(unsigned long flags) +static inline void native_irq_restore(unsigned long flags) { asm volatile(ALTERNATIVE( "msr daif, %0", @@ -130,4 +136,12 @@ static inline void arch_local_irq_restor pmr_sync(); } +static inline bool native_irqs_disabled(void) +{ + unsigned long flags = native_save_flags(); + return native_irqs_disabled_flags(flags); +} + +#include + #endif /* __ASM_IRQFLAGS_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/include/asm/irq_pipeline.h linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/irq_pipeline.h --- linux-5.15.26/arch/arm64/include/asm/irq_pipeline.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/irq_pipeline.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,148 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2018 Philippe Gerum . + */ +#ifndef _ASM_ARM64_IRQ_PIPELINE_H +#define _ASM_ARM64_IRQ_PIPELINE_H + +#include + +#ifdef CONFIG_IRQ_PIPELINE + +/* + * In order to cope with the limited number of SGIs available to us, + * In-band IPI messages are multiplexed over SGI0, whereas out-of-band + * IPIs are directly mapped to SGI1-2. + */ +#define OOB_NR_IPI 2 +#define OOB_IPI_OFFSET 1 /* SGI1 */ +#define TIMER_OOB_IPI (ipi_irq_base + OOB_IPI_OFFSET) +#define RESCHEDULE_OOB_IPI (TIMER_OOB_IPI + 1) + +extern int ipi_irq_base; + +static inline notrace +unsigned long arch_irqs_virtual_to_native_flags(int stalled) +{ + return (!!stalled) << IRQMASK_I_POS; +} + +static inline notrace +unsigned long arch_irqs_native_to_virtual_flags(unsigned long flags) +{ + return (!!hard_irqs_disabled_flags(flags)) << IRQMASK_i_POS; +} + +static inline notrace unsigned long arch_local_irq_save(void) +{ + int stalled = inband_irq_save(); + barrier(); + return arch_irqs_virtual_to_native_flags(stalled); +} + +static inline notrace void arch_local_irq_enable(void) +{ + barrier(); + inband_irq_enable(); +} + +static inline notrace void arch_local_irq_disable(void) +{ + inband_irq_disable(); + barrier(); +} + +static inline notrace unsigned long arch_local_save_flags(void) +{ + int stalled = inband_irqs_disabled(); + barrier(); + return arch_irqs_virtual_to_native_flags(stalled); +} + +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return native_irqs_disabled_flags(flags); +} + +static inline notrace void arch_local_irq_restore(unsigned long flags) +{ + inband_irq_restore(arch_irqs_disabled_flags(flags)); + barrier(); +} + +static inline +void arch_save_timer_regs(struct pt_regs *dst, struct pt_regs *src) +{ + dst->pstate = src->pstate; + dst->pc = src->pc; +} + +static inline bool arch_steal_pipelined_tick(struct pt_regs *regs) +{ + return !!(regs->pstate & IRQMASK_I_BIT); +} + +static inline int arch_enable_oob_stage(void) +{ + return 0; +} + +extern void (*handle_arch_irq)(struct pt_regs *); + +static inline void arch_handle_irq_pipelined(struct pt_regs *regs) +{ + handle_arch_irq(regs); +} + +/* + * We use neither the generic entry code nor + * kentry_enter/exit_pipelined yet. We still build a no-op version of + * the latter for now, until we enventually switch to using whichever + * of them is available first. + */ +#define arch_kentry_get_irqstate(__regs) 0 + +#define arch_kentry_set_irqstate(__regs, __irqstate) \ + do { (void)__irqstate; } while (0) + +#else /* !CONFIG_IRQ_PIPELINE */ + +static inline unsigned long arch_local_irq_save(void) +{ + return native_irq_save(); +} + +static inline void arch_local_irq_enable(void) +{ + native_irq_enable(); +} + +static inline void arch_local_irq_disable(void) +{ + native_irq_disable(); +} + +static inline unsigned long arch_local_save_flags(void) +{ + return native_save_flags(); +} + +static inline void arch_local_irq_restore(unsigned long flags) +{ + native_irq_restore(flags); +} + +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return native_irqs_disabled_flags(flags); +} + +#endif /* !CONFIG_IRQ_PIPELINE */ + +static inline int arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + +#endif /* _ASM_ARM64_IRQ_PIPELINE_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/include/asm/mmu_context.h linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/mmu_context.h --- linux-5.15.26/arch/arm64/include/asm/mmu_context.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/mmu_context.h 2022-03-10 09:47:50.000000000 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -97,6 +98,9 @@ static inline void __cpu_set_tcr_t0sz(un static inline void cpu_uninstall_idmap(void) { struct mm_struct *mm = current->active_mm; + unsigned long flags; + + flags = hard_cond_local_irq_save(); cpu_set_reserved_ttbr0(); local_flush_tlb_all(); @@ -104,15 +108,23 @@ static inline void cpu_uninstall_idmap(v if (mm != &init_mm && !system_uses_ttbr0_pan()) cpu_switch_mm(mm->pgd, mm); + + hard_cond_local_irq_restore(flags); } static inline void cpu_install_idmap(void) { + unsigned long flags; + + flags = hard_cond_local_irq_save(); + cpu_set_reserved_ttbr0(); local_flush_tlb_all(); cpu_set_idmap_tcr_t0sz(); cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm); + + hard_cond_local_irq_restore(flags); } /* @@ -216,7 +228,7 @@ static inline void __switch_mm(struct mm } static inline void -switch_mm(struct mm_struct *prev, struct mm_struct *next, +do_switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { if (prev != next) @@ -231,6 +243,24 @@ switch_mm(struct mm_struct *prev, struct update_saved_ttbr0(tsk, next); } +static inline void +switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned long flags; + + protect_inband_mm(flags); + do_switch_mm(prev, next, tsk); + unprotect_inband_mm(flags); +} + +static inline void +switch_oob_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) /* hard irqs off */ +{ + do_switch_mm(prev, next, tsk); +} + static inline const struct cpumask * task_cpu_possible_mask(struct task_struct *p) { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/include/asm/ptrace.h linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/ptrace.h --- linux-5.15.26/arch/arm64/include/asm/ptrace.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/ptrace.h 2022-03-10 09:47:50.000000000 +0100 @@ -200,7 +200,13 @@ struct pt_regs { /* Only valid for some EL1 exceptions. */ u64 lockdep_hardirqs; +#ifdef CONFIG_IRQ_PIPELINE + u64 exit_rcu : 1, + oob_on_entry : 1, + stalled_on_entry : 1; +#else u64 exit_rcu; +#endif }; static inline bool in_syscall(struct pt_regs const *regs) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/include/asm/syscall.h linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/syscall.h --- linux-5.15.26/arch/arm64/include/asm/syscall.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/syscall.h 2022-03-10 09:47:50.000000000 +0100 @@ -73,6 +73,11 @@ static inline void syscall_get_arguments memcpy(args, ®s->regs[1], 5 * sizeof(args[0])); } +static inline unsigned long syscall_get_arg0(struct pt_regs *regs) +{ + return regs->orig_x0; +} + static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, const unsigned long *args) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/include/asm/thread_info.h linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/thread_info.h --- linux-5.15.26/arch/arm64/include/asm/thread_info.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/thread_info.h 2022-03-10 09:47:50.000000000 +0100 @@ -14,6 +14,7 @@ struct task_struct; +#include #include #include #include @@ -23,6 +24,7 @@ struct task_struct; */ struct thread_info { unsigned long flags; /* low level flags */ + unsigned long local_flags; /* local (synchronous) flags */ #ifdef CONFIG_ARM64_SW_TTBR0_PAN u64 ttbr0; /* saved TTBR0_EL1 */ #endif @@ -42,6 +44,7 @@ struct thread_info { void *scs_base; void *scs_sp; #endif + struct oob_thread_state oob_state; }; #define thread_saved_pc(tsk) \ @@ -58,6 +61,8 @@ void arch_release_task_struct(struct tas int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); +#define ti_local_flags(__ti) ((__ti)->local_flags) + #endif #define TIF_SIGPENDING 0 /* signal pending */ @@ -67,6 +72,7 @@ int arch_dup_task_struct(struct task_str #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ #define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ +#define TIF_RETUSER 7 /* INBAND_TASK_RETUSER is pending */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ @@ -81,6 +87,7 @@ int arch_dup_task_struct(struct task_str #define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ +#define TIF_MAYDAY 27 /* Emergency trap pending */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) @@ -97,11 +104,13 @@ int arch_dup_task_struct(struct task_str #define _TIF_SVE (1 << TIF_SVE) #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) +#define _TIF_RETUSER (1 << TIF_RETUSER) +#define _TIF_MAYDAY (1 << TIF_MAYDAY) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ - _TIF_NOTIFY_SIGNAL) + _TIF_NOTIFY_SIGNAL | _TIF_RETUSER) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ @@ -122,4 +131,12 @@ int arch_dup_task_struct(struct task_str INIT_SCS \ } +/* + * Local (synchronous) thread flags. + */ +#define _TLF_OOB 0x0001 +#define _TLF_DOVETAIL 0x0002 +#define _TLF_OFFSTAGE 0x0004 +#define _TLF_OOBTRAP 0x0008 + #endif /* __ASM_THREAD_INFO_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/include/asm/uaccess.h linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/uaccess.h --- linux-5.15.26/arch/arm64/include/asm/uaccess.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/include/asm/uaccess.h 2022-03-10 09:47:50.000000000 +0100 @@ -84,7 +84,7 @@ static inline void __uaccess_ttbr0_disab { unsigned long flags, ttbr; - local_irq_save(flags); + flags = hard_local_irq_save(); ttbr = read_sysreg(ttbr1_el1); ttbr &= ~TTBR_ASID_MASK; /* reserved_pg_dir placed before swapper_pg_dir */ @@ -93,7 +93,7 @@ static inline void __uaccess_ttbr0_disab /* Set reserved ASID */ write_sysreg(ttbr, ttbr1_el1); isb(); - local_irq_restore(flags); + hard_local_irq_restore(flags); } static inline void __uaccess_ttbr0_enable(void) @@ -105,7 +105,7 @@ static inline void __uaccess_ttbr0_enabl * variable and the MSR. A context switch could trigger an ASID * roll-over and an update of 'ttbr0'. */ - local_irq_save(flags); + flags = hard_local_irq_save(); ttbr0 = READ_ONCE(current_thread_info()->ttbr0); /* Restore active ASID */ @@ -118,7 +118,7 @@ static inline void __uaccess_ttbr0_enabl /* Restore user page table */ write_sysreg(ttbr0, ttbr0_el1); isb(); - local_irq_restore(flags); + hard_local_irq_restore(flags); } static inline bool uaccess_ttbr0_disable(void) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/Kconfig linux-dovetail-v5.15.y-dovetail/arch/arm64/Kconfig --- linux-5.15.26/arch/arm64/Kconfig 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/Kconfig 2022-03-10 09:47:50.000000000 +0100 @@ -185,6 +185,8 @@ config ARM64 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS + select HAVE_IRQ_PIPELINE + select HAVE_DOVETAIL select HAVE_IRQ_TIME_ACCOUNTING select HAVE_NMI select HAVE_PATA_PLATFORM @@ -1060,6 +1062,8 @@ config ARCH_HAS_FILTER_PGPROT config CC_HAVE_SHADOW_CALL_STACK def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18) +source "kernel/Kconfig.dovetail" + config PARAVIRT bool "Enable paravirtualization code" help diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/kernel/asm-offsets.c linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/asm-offsets.c --- linux-5.15.26/arch/arm64/kernel/asm-offsets.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/asm-offsets.c 2022-03-10 09:47:50.000000000 +0100 @@ -30,6 +30,7 @@ int main(void) DEFINE(TSK_CPU, offsetof(struct task_struct, cpu)); BLANK(); DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); + DEFINE(TSK_TI_LOCAL_FLAGS, offsetof(struct task_struct, thread_info.local_flags)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); #ifdef CONFIG_ARM64_SW_TTBR0_PAN DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/kernel/debug-monitors.c linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/debug-monitors.c --- linux-5.15.26/arch/arm64/kernel/debug-monitors.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/debug-monitors.c 2022-03-10 09:47:50.000000000 +0100 @@ -232,7 +232,7 @@ static void send_user_sigtrap(int si_cod return; if (interrupts_enabled(regs)) - local_irq_enable(); + local_irq_enable_full(); arm64_force_sig_fault(SIGTRAP, si_code, instruction_pointer(regs), "User debug trap"); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/kernel/entry-common.c linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/entry-common.c --- linux-5.15.26/arch/arm64/kernel/entry-common.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/entry-common.c 2022-03-10 09:47:50.000000000 +0100 @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -51,12 +52,60 @@ static __always_inline void __enter_from trace_hardirqs_off_finish(); } +static void noinstr _enter_from_kernel_mode(struct pt_regs *regs) +{ + __enter_from_kernel_mode(regs); + mte_check_tfsr_entry(); +} + +#ifdef CONFIG_IRQ_PIPELINE + static void noinstr enter_from_kernel_mode(struct pt_regs *regs) { + /* + * CAUTION: we may switch in-band as a result of handling a + * trap, so if we are running out-of-band, we must make sure + * not to perform the RCU exit since we did not enter it in + * the first place. + */ + regs->oob_on_entry = running_oob(); + if (regs->oob_on_entry) { + regs->exit_rcu = false; + goto out; + } + + /* + * We trapped from kernel space running in-band, we need to + * record the virtual interrupt state into the current + * register frame (regs->stalled_on_entry) in order to + * reinstate it from exit_to_kernel_mode(). Next we stall the + * in-band stage in order to mirror the current hardware state + * (i.e. hardirqs are off). + */ + regs->stalled_on_entry = test_and_stall_inband_nocheck(); + __enter_from_kernel_mode(regs); + + /* + * Our caller is going to inherit the hardware interrupt state + * from the trapped context once we have returned: if running + * in-band, align the stall bit on the upcoming state. + */ + if (running_inband() && interrupts_enabled(regs)) + unstall_inband_nocheck(); +out: mte_check_tfsr_entry(); } +#else + +static void noinstr enter_from_kernel_mode(struct pt_regs *regs) +{ + _enter_from_kernel_mode(regs); +} + +#endif /* !CONFIG_IRQ_PIPELINE */ + /* * Handle IRQ/context state management when exiting to kernel mode. * After this function returns it is not safe to call regular kernel code, @@ -88,7 +137,24 @@ static __always_inline void __exit_to_ke static void noinstr exit_to_kernel_mode(struct pt_regs *regs) { mte_check_tfsr_exit(); + + if (running_oob()) + return; + __exit_to_kernel_mode(regs); + +#ifdef CONFIG_IRQ_PIPELINE + /* + * Reinstate the virtual interrupt state which was in effect + * on entry to the trap. + */ + if (!regs->oob_on_entry) { + if (regs->stalled_on_entry) + stall_inband_nocheck(); + else + unstall_inband_nocheck(); + } +#endif } /* @@ -98,10 +164,15 @@ static void noinstr exit_to_kernel_mode( */ static __always_inline void __enter_from_user_mode(void) { - lockdep_hardirqs_off(CALLER_ADDR0); - CT_WARN_ON(ct_state() != CONTEXT_USER); - user_exit_irqoff(); - trace_hardirqs_off_finish(); + if (running_inband()) { + lockdep_hardirqs_off(CALLER_ADDR0); + WARN_ON_ONCE(irq_pipeline_debug() && test_inband_stall()); + CT_WARN_ON(ct_state() != CONTEXT_USER); + stall_inband_nocheck(); + user_exit_irqoff(); + unstall_inband_nocheck(); + trace_hardirqs_off_finish(); + } } static __always_inline void enter_from_user_mode(struct pt_regs *regs) @@ -113,31 +184,51 @@ static __always_inline void enter_from_u * Handle IRQ/context state management when exiting to user mode. * After this function returns it is not safe to call regular kernel code, * intrumentable code, or any code which may trigger an exception. + * + * irq_pipeline: prepare_exit_to_user_mode() tells the caller whether + * it is safe to return via the common in-band exit path, i.e. the + * in-band stage was unstalled on entry, and we are (still) running on + * it. */ static __always_inline void __exit_to_user_mode(void) { + stall_inband_nocheck(); trace_hardirqs_on_prepare(); lockdep_hardirqs_on_prepare(CALLER_ADDR0); user_enter_irqoff(); lockdep_hardirqs_on(CALLER_ADDR0); + unstall_inband_nocheck(); } -static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs) +static __always_inline +bool prepare_exit_to_user_mode(struct pt_regs *regs) { unsigned long flags; local_daif_mask(); - flags = READ_ONCE(current_thread_info()->flags); - if (unlikely(flags & _TIF_WORK_MASK)) - do_notify_resume(regs, flags); + if (running_inband() && !test_inband_stall()) { + flags = READ_ONCE(current_thread_info()->flags); + if (unlikely(flags & _TIF_WORK_MASK)) + do_notify_resume(regs, flags); + /* + * Caution: do_notify_resume() might have switched us + * to the out-of-band stage. + */ + return running_inband(); + } + + return false; } static __always_inline void exit_to_user_mode(struct pt_regs *regs) { - prepare_exit_to_user_mode(regs); + bool ret; + + ret = prepare_exit_to_user_mode(regs); mte_check_tfsr_exit(); - __exit_to_user_mode(); + if (ret) + __exit_to_user_mode(); } asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs) @@ -152,6 +243,7 @@ asmlinkage void noinstr asm_exit_to_user */ static void noinstr arm64_enter_nmi(struct pt_regs *regs) { + /* irq_pipeline: running this code oob is ok. */ regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); __nmi_enter(); @@ -221,22 +313,95 @@ static void noinstr arm64_exit_el1_dbg(s static void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) { - if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + /* + * IRQ pipeline: the interrupt entry is special in that we may + * run the regular kernel entry prologue/epilogue only if the + * IRQ is going to be dispatched to its handler on behalf of + * the current context, i.e. only if running in-band and + * unstalled. If so, we also have to reconcile the hardware + * and virtual interrupt states temporarily in order to run + * such prologue. + */ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) { arm64_enter_nmi(regs); - else + } else { +#ifdef CONFIG_IRQ_PIPELINE + if (running_inband()) { + regs->stalled_on_entry = test_inband_stall(); + if (!regs->stalled_on_entry) { + stall_inband_nocheck(); + _enter_from_kernel_mode(regs); + unstall_inband_nocheck(); + return; + } + } +#else enter_from_kernel_mode(regs); +#endif + } } static void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) { - if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) { arm64_exit_nmi(regs); - else + } else { +#ifdef CONFIG_IRQ_PIPELINE + /* + * See enter_el1_irq_or_nmi() for details. UGLY: we + * also have to tell the tracer that irqs are off, + * since sync_current_irq_stage() did the opposite on + * exit. Hopefully, at some point arm64 will convert + * to the generic entry code which exhibits a less + * convoluted logic. + */ + if (running_inband() && !regs->stalled_on_entry) { + stall_inband_nocheck(); + trace_hardirqs_off(); + exit_to_kernel_mode(regs); + unstall_inband_nocheck(); + } +#else exit_to_kernel_mode(regs); +#endif + } +} + +#ifdef CONFIG_IRQ_PIPELINE + +/* + * When pipelining interrupts, we have to reconcile the hardware and + * the virtual states. Hard irqs are off on entry while the current + * stage has to be unstalled: fix this up by stalling the in-band + * stage on entry, unstalling on exit. + */ +static inline void arm64_preempt_irq_enter(void) +{ + WARN_ON_ONCE(irq_pipeline_debug() && test_inband_stall()); + stall_inband_nocheck(); + trace_hardirqs_off(); } +static inline void arm64_preempt_irq_exit(void) +{ + trace_hardirqs_on(); + unstall_inband_nocheck(); +} + +#else + +static inline void arm64_preempt_irq_enter(void) +{ } + +static inline void arm64_preempt_irq_exit(void) +{ } + +#endif + static void __sched arm64_preempt_schedule_irq(void) { + arm64_preempt_irq_enter(); + lockdep_assert_irqs_disabled(); /* @@ -246,7 +411,7 @@ static void __sched arm64_preempt_schedu * DAIF we must have handled an NMI, so skip preemption. */ if (system_uses_irq_prio_masking() && read_sysreg(daif)) - return; + goto out; /* * Preempting a task from an IRQ means we leave copies of PSTATE @@ -258,16 +423,63 @@ static void __sched arm64_preempt_schedu */ if (system_capabilities_finalized()) preempt_schedule_irq(); +out: + arm64_preempt_irq_exit(); } -static void do_interrupt_handler(struct pt_regs *regs, - void (*handler)(struct pt_regs *)) +#ifdef CONFIG_DOVETAIL +/* + * When Dovetail is enabled, the companion core may switch contexts + * over the irq stack, therefore subsequent interrupts might be taken + * over sibling stack contexts. So we need a not so subtle way of + * figuring out whether the irq stack was actually exited, which + * cannot depend on the current task pointer. Instead, we track the + * interrupt nesting depth for a CPU in irq_nesting. + */ +DEFINE_PER_CPU(int, irq_nesting); + +static void __do_interrupt_handler(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ + if (this_cpu_inc_return(irq_nesting) == 1) + call_on_irq_stack(regs, handler); + else + handler(regs); + + this_cpu_dec(irq_nesting); +} + +#else +static void __do_interrupt_handler(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) { if (on_thread_stack()) call_on_irq_stack(regs, handler); else handler(regs); } +#endif + +#ifdef CONFIG_IRQ_PIPELINE +static bool do_interrupt_handler(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ + if (handler == handle_arch_irq) + handler = (void (*)(struct pt_regs *))handle_irq_pipelined; + + __do_interrupt_handler(regs, handler); + + return running_inband() && !irqs_disabled(); +} +#else +static bool do_interrupt_handler(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ + __do_interrupt_handler(regs, handler); + + return true; +} +#endif extern void (*handle_arch_irq)(struct pt_regs *); extern void (*handle_arch_fiq)(struct pt_regs *); @@ -275,6 +487,11 @@ extern void (*handle_arch_fiq)(struct pt static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector, unsigned int esr) { + /* + * Dovetail: Same as __do_kernel_fault(), don't bother + * restoring the in-band stage, this trap is fatal and we are + * already walking on thin ice. + */ arm64_enter_nmi(regs); console_verbose(); @@ -435,17 +652,19 @@ asmlinkage void noinstr el1h_64_sync_han static void noinstr el1_interrupt(struct pt_regs *regs, void (*handler)(struct pt_regs *)) { + bool ret; + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); enter_el1_irq_or_nmi(regs); - do_interrupt_handler(regs, handler); + ret = do_interrupt_handler(regs, handler); /* * Note: thread_info::preempt_count includes both thread_info::count * and thread_info::need_resched, and is not equivalent to * preempt_count(). */ - if (IS_ENABLED(CONFIG_PREEMPTION) && + if (IS_ENABLED(CONFIG_PREEMPTION) && ret && READ_ONCE(current_thread_info()->preempt_count) == 0) arm64_preempt_schedule_irq(); @@ -660,7 +879,9 @@ asmlinkage void noinstr el0t_64_sync_han static void noinstr el0_interrupt(struct pt_regs *regs, void (*handler)(struct pt_regs *)) { - enter_from_user_mode(regs); + if (handler == handle_arch_fiq || + (running_inband() && !test_inband_stall())) + enter_from_user_mode(regs); write_sysreg(DAIF_PROCCTX_NOIRQ, daif); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/kernel/fpsimd.c linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/fpsimd.c --- linux-5.15.26/arch/arm64/kernel/fpsimd.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/fpsimd.c 2022-03-10 09:47:50.000000000 +0100 @@ -171,6 +171,42 @@ static void __get_cpu_fpsimd_context(voi WARN_ON(busy); } +static void __put_cpu_fpsimd_context(void) +{ + bool busy = __this_cpu_xchg(fpsimd_context_busy, false); + + WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */ +} + +#ifdef CONFIG_DOVETAIL + +#define get_cpu_fpsimd_context(__flags) \ + do { \ + (__flags) = hard_preempt_disable(); \ + __get_cpu_fpsimd_context(); \ + } while (0) + +#define put_cpu_fpsimd_context(__flags) \ + do { \ + __put_cpu_fpsimd_context(); \ + hard_preempt_enable(__flags); \ + } while (0) + +void fpsimd_restore_current_oob(void) +{ + /* + * Restore the fpsimd context for the current task as it + * resumes from dovetail_context_switch(), which always happen + * on the out-of-band stage. Skip this for kernel threads + * which have no such context but always bear + * TIF_FOREIGN_FPSTATE. + */ + if (current->mm) + fpsimd_restore_current_state(); +} + +#else + /* * Claim ownership of the CPU FPSIMD context for use by the calling context. * @@ -180,18 +216,12 @@ static void __get_cpu_fpsimd_context(voi * The double-underscore version must only be called if you know the task * can't be preempted. */ -static void get_cpu_fpsimd_context(void) -{ - local_bh_disable(); - __get_cpu_fpsimd_context(); -} - -static void __put_cpu_fpsimd_context(void) -{ - bool busy = __this_cpu_xchg(fpsimd_context_busy, false); - - WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */ -} +#define get_cpu_fpsimd_context(__flags) \ + do { \ + local_bh_disable(); \ + __get_cpu_fpsimd_context(); \ + (void)(__flags); \ + } while (0) /* * Release the CPU FPSIMD context. @@ -200,11 +230,14 @@ static void __put_cpu_fpsimd_context(voi * previously called, with no call to put_cpu_fpsimd_context() in the * meantime. */ -static void put_cpu_fpsimd_context(void) -{ - __put_cpu_fpsimd_context(); - local_bh_enable(); -} +#define put_cpu_fpsimd_context(__flags) \ + do { \ + __put_cpu_fpsimd_context(); \ + local_bh_enable(); \ + (void)(__flags); \ + } while (0) + +#endif /* !CONFIG_DOVETAIL */ static bool have_cpu_fpsimd_context(void) { @@ -285,7 +318,7 @@ static void sve_free(struct task_struct static void task_fpsimd_load(void) { WARN_ON(!system_supports_fpsimd()); - WARN_ON(!have_cpu_fpsimd_context()); + WARN_ON(!hard_irqs_disabled() && !have_cpu_fpsimd_context()); if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) sve_load_state(sve_pffr(¤t->thread), @@ -299,14 +332,14 @@ static void task_fpsimd_load(void) * Ensure FPSIMD/SVE storage in memory for the loaded context is up to * date with respect to the CPU registers. */ -static void fpsimd_save(void) +static void __fpsimd_save(void) { struct fpsimd_last_state_struct const *last = this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ WARN_ON(!system_supports_fpsimd()); - WARN_ON(!have_cpu_fpsimd_context()); + WARN_ON(!hard_irqs_disabled() && !have_cpu_fpsimd_context()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { if (IS_ENABLED(CONFIG_ARM64_SVE) && @@ -329,6 +362,15 @@ static void fpsimd_save(void) } } +void fpsimd_save(void) +{ + unsigned long flags; + + flags = hard_cond_local_irq_save(); + __fpsimd_save(); + hard_cond_local_irq_restore(flags); +} + /* * All vector length selection from userspace comes through here. * We're on a slow path, so some sanity-checks are included. @@ -447,7 +489,7 @@ static void __fpsimd_to_sve(void *sst, s * task->thread.uw.fpsimd_state must be up to date before calling this * function. */ -static void fpsimd_to_sve(struct task_struct *task) +static void _fpsimd_to_sve(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; @@ -460,6 +502,15 @@ static void fpsimd_to_sve(struct task_st __fpsimd_to_sve(sst, fst, vq); } +static void fpsimd_to_sve(struct task_struct *task) +{ + unsigned long flags; + + flags = hard_cond_local_irq_save(); + _fpsimd_to_sve(task); + hard_cond_local_irq_restore(flags); +} + /* * Transfer the SVE state in task->thread.sve_state to * task->thread.uw.fpsimd_state. @@ -478,15 +529,20 @@ static void sve_to_fpsimd(struct task_st struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; unsigned int i; __uint128_t const *p; + unsigned long flags; if (!system_supports_sve()) return; + flags = hard_cond_local_irq_save(); + vq = sve_vq_from_vl(task->thread.sve_vl); for (i = 0; i < SVE_NUM_ZREGS; ++i) { p = (__uint128_t const *)ZREG(sst, vq, i); fst->vregs[i] = arm64_le128_to_cpu(*p); } + + hard_cond_local_irq_restore(flags); } #ifdef CONFIG_ARM64_SVE @@ -581,6 +637,8 @@ void sve_sync_from_fpsimd_zeropad(struct int sve_set_vector_length(struct task_struct *task, unsigned long vl, unsigned long flags) { + unsigned long irqflags = 0; + if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT | PR_SVE_SET_VL_ONEXEC)) return -EINVAL; @@ -618,9 +676,9 @@ int sve_set_vector_length(struct task_st * non-SVE thread. */ if (task == current) { - get_cpu_fpsimd_context(); + get_cpu_fpsimd_context(irqflags); - fpsimd_save(); + __fpsimd_save(); } fpsimd_flush_task_state(task); @@ -628,7 +686,7 @@ int sve_set_vector_length(struct task_st sve_to_fpsimd(task); if (task == current) - put_cpu_fpsimd_context(); + put_cpu_fpsimd_context(irqflags); /* * Force reallocation of task SVE state to the correct size @@ -932,10 +990,14 @@ void fpsimd_release_task(struct task_str */ void do_sve_acc(unsigned int esr, struct pt_regs *regs) { + unsigned long flags; + + oob_trap_notify(ARM64_TRAP_SVE, regs); + /* Even if we chose not to use SVE, the hardware could still trap: */ if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) { force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); - return; + goto out; } sve_alloc(current); @@ -944,7 +1006,7 @@ void do_sve_acc(unsigned int esr, struct return; } - get_cpu_fpsimd_context(); + get_cpu_fpsimd_context(flags); if (test_and_set_thread_flag(TIF_SVE)) WARN_ON(1); /* SVE access shouldn't have trapped */ @@ -963,10 +1025,12 @@ void do_sve_acc(unsigned int esr, struct sve_flush_live(vq_minus_one); fpsimd_bind_task_to_cpu(); } else { - fpsimd_to_sve(current); + _fpsimd_to_sve(current); } - put_cpu_fpsimd_context(); + put_cpu_fpsimd_context(flags); +out: + oob_trap_unwind(ARM64_TRAP_SVE, regs); } /* @@ -998,22 +1062,29 @@ void do_fpsimd_exc(unsigned int esr, str si_code = FPE_FLTRES; } + oob_trap_notify(ARM64_TRAP_FPE, regs); + send_sig_fault(SIGFPE, si_code, (void __user *)instruction_pointer(regs), current); + + oob_trap_unwind(ARM64_TRAP_FPE, regs); } void fpsimd_thread_switch(struct task_struct *next) { bool wrong_task, wrong_cpu; + unsigned long flags; if (!system_supports_fpsimd()) return; + flags = hard_cond_local_irq_save(); + __get_cpu_fpsimd_context(); /* Save unsaved fpsimd state, if any: */ - fpsimd_save(); + __fpsimd_save(); /* * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's @@ -1028,16 +1099,19 @@ void fpsimd_thread_switch(struct task_st wrong_task || wrong_cpu); __put_cpu_fpsimd_context(); + + hard_cond_local_irq_restore(flags); } void fpsimd_flush_thread(void) { int vl, supported_vl; + unsigned long flags; if (!system_supports_fpsimd()) return; - get_cpu_fpsimd_context(); + get_cpu_fpsimd_context(flags); fpsimd_flush_task_state(current); memset(¤t->thread.uw.fpsimd_state, 0, @@ -1078,7 +1152,7 @@ void fpsimd_flush_thread(void) current->thread.sve_vl_onexec = 0; } - put_cpu_fpsimd_context(); + put_cpu_fpsimd_context(flags); } /* @@ -1087,12 +1161,14 @@ void fpsimd_flush_thread(void) */ void fpsimd_preserve_current_state(void) { + unsigned long flags; + if (!system_supports_fpsimd()) return; - get_cpu_fpsimd_context(); - fpsimd_save(); - put_cpu_fpsimd_context(); + get_cpu_fpsimd_context(flags); + __fpsimd_save(); + put_cpu_fpsimd_context(flags); } /* @@ -1134,20 +1210,31 @@ static void fpsimd_bind_task_to_cpu(void } } -void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, +static void __fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, unsigned int sve_vl) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); WARN_ON(!system_supports_fpsimd()); - WARN_ON(!in_softirq() && !irqs_disabled()); last->st = st; last->sve_state = sve_state; last->sve_vl = sve_vl; } +void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, + unsigned int sve_vl) +{ + unsigned long flags; + + WARN_ON(!in_softirq() && !irqs_disabled()); + + flags = hard_cond_local_irq_save(); + __fpsimd_bind_state_to_cpu(st, sve_state, sve_vl); + hard_cond_local_irq_restore(flags); +} + /* * Load the userland FPSIMD state of 'current' from memory, but only if the * FPSIMD state already held in the registers is /not/ the most recent FPSIMD @@ -1155,6 +1242,8 @@ void fpsimd_bind_state_to_cpu(struct use */ void fpsimd_restore_current_state(void) { + unsigned long flags; + /* * For the tasks that were created before we detected the absence of * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(), @@ -1169,14 +1258,14 @@ void fpsimd_restore_current_state(void) return; } - get_cpu_fpsimd_context(); + get_cpu_fpsimd_context(flags); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { task_fpsimd_load(); fpsimd_bind_task_to_cpu(); } - put_cpu_fpsimd_context(); + put_cpu_fpsimd_context(flags); } /* @@ -1186,21 +1275,23 @@ void fpsimd_restore_current_state(void) */ void fpsimd_update_current_state(struct user_fpsimd_state const *state) { + unsigned long flags; + if (WARN_ON(!system_supports_fpsimd())) return; - get_cpu_fpsimd_context(); + get_cpu_fpsimd_context(flags); current->thread.uw.fpsimd_state = *state; if (test_thread_flag(TIF_SVE)) - fpsimd_to_sve(current); + _fpsimd_to_sve(current); task_fpsimd_load(); fpsimd_bind_task_to_cpu(); clear_thread_flag(TIF_FOREIGN_FPSTATE); - put_cpu_fpsimd_context(); + put_cpu_fpsimd_context(flags); } /* @@ -1250,9 +1341,9 @@ void fpsimd_save_and_flush_cpu_state(voi { if (!system_supports_fpsimd()) return; - WARN_ON(preemptible()); + WARN_ON(!hard_irqs_disabled() && preemptible()); __get_cpu_fpsimd_context(); - fpsimd_save(); + __fpsimd_save(); fpsimd_flush_cpu_state(); __put_cpu_fpsimd_context(); } @@ -1278,18 +1369,23 @@ void fpsimd_save_and_flush_cpu_state(voi */ void kernel_neon_begin(void) { + unsigned long flags; + if (WARN_ON(!system_supports_fpsimd())) return; BUG_ON(!may_use_simd()); - get_cpu_fpsimd_context(); + get_cpu_fpsimd_context(flags); /* Save unsaved fpsimd state, if any: */ - fpsimd_save(); + __fpsimd_save(); /* Invalidate any task state remaining in the fpsimd regs: */ fpsimd_flush_cpu_state(); + + if (dovetailing()) + hard_cond_local_irq_restore(flags); } EXPORT_SYMBOL(kernel_neon_begin); @@ -1304,10 +1400,12 @@ EXPORT_SYMBOL(kernel_neon_begin); */ void kernel_neon_end(void) { + unsigned long flags = hard_local_save_flags(); + if (!system_supports_fpsimd()) return; - put_cpu_fpsimd_context(); + put_cpu_fpsimd_context(flags); } EXPORT_SYMBOL(kernel_neon_end); @@ -1397,9 +1495,13 @@ void __efi_fpsimd_end(void) static int fpsimd_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, void *v) { + unsigned long flags; + switch (cmd) { case CPU_PM_ENTER: + flags = hard_cond_local_irq_save(); fpsimd_save_and_flush_cpu_state(); + hard_cond_local_irq_restore(flags); break; case CPU_PM_EXIT: break; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/kernel/idle.c linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/idle.c --- linux-5.15.26/arch/arm64/kernel/idle.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/idle.c 2022-03-10 09:47:50.000000000 +0100 @@ -42,5 +42,6 @@ void noinstr arch_cpu_idle(void) * tricks */ cpu_do_idle(); + hard_cond_local_irq_enable(); raw_local_irq_enable(); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/kernel/irq.c linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/irq.c --- linux-5.15.26/arch/arm64/kernel/irq.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/irq.c 2022-03-10 09:47:50.000000000 +0100 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -28,7 +29,6 @@ DEFINE_PER_CPU(struct nmi_ctx, nmi_conte DEFINE_PER_CPU(unsigned long *, irq_stack_ptr); - DECLARE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr); #ifdef CONFIG_SHADOW_CALL_STACK diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/kernel/irq_pipeline.c linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/irq_pipeline.c --- linux-5.15.26/arch/arm64/kernel/irq_pipeline.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/irq_pipeline.c 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2018 Philippe Gerum . + */ +#include +#include + +void arch_do_IRQ_pipelined(struct irq_desc *desc) +{ + struct pt_regs *regs = raw_cpu_ptr(&irq_pipeline.tick_regs); + struct pt_regs *old_regs = set_irq_regs(regs); + + irq_enter(); + handle_irq_desc(desc); + irq_exit(); + + set_irq_regs(old_regs); +} + +void __init arch_irq_pipeline_init(void) +{ + /* no per-arch init. */ +} diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/kernel/Makefile linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/Makefile --- linux-5.15.26/arch/arm64/kernel/Makefile 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/Makefile 2022-03-10 09:47:50.000000000 +0100 @@ -59,6 +59,7 @@ obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o obj-$(CONFIG_PARAVIRT) += paravirt.o +obj-$(CONFIG_IRQ_PIPELINE) += irq_pipeline.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/kernel/signal.c linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/signal.c --- linux-5.15.26/arch/arm64/kernel/signal.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/signal.c 2022-03-10 09:47:50.000000000 +0100 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -917,15 +918,33 @@ static void do_signal(struct pt_regs *re restore_saved_sigmask(); } +static inline void do_retuser(void) +{ + unsigned long thread_flags; + + if (dovetailing()) { + thread_flags = current_thread_info()->flags; + if (thread_flags & _TIF_RETUSER) + inband_retuser_notify(); + } +} + void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { + WARN_ON_ONCE(irq_pipeline_debug() && running_oob()); + WARN_ON_ONCE(irq_pipeline_debug() && test_inband_stall()); + do { + stall_inband_nocheck(); + if (thread_flags & _TIF_NEED_RESCHED) { /* Unmask Debug and SError for the next task */ - local_daif_restore(DAIF_PROCCTX_NOIRQ); + local_daif_restore(irqs_pipelined() ? DAIF_PROCCTX : + DAIF_PROCCTX_NOIRQ); schedule(); } else { + unstall_inband_nocheck(); local_daif_restore(DAIF_PROCCTX); if (thread_flags & _TIF_UPROBE) @@ -945,11 +964,34 @@ void do_notify_resume(struct pt_regs *re if (thread_flags & _TIF_FOREIGN_FPSTATE) fpsimd_restore_current_state(); + + do_retuser(); + /* RETUSER might have switched oob */ + if (running_oob()) { + local_daif_mask(); + return; + } } + /* + * Dovetail: we may have restored the fpsimd state for + * current with no other opportunity to check for + * _TIF_FOREIGN_FPSTATE until we are back running on + * el0, so we must not take any interrupt until then, + * otherwise we may end up resuming with some OOB + * thread's fpsimd state. + */ local_daif_mask(); thread_flags = READ_ONCE(current_thread_info()->flags); } while (thread_flags & _TIF_WORK_MASK); + + /* + * irq_pipeline: trace_hardirqs_off was in effect on entry, we + * leave it this way by virtue of calling local_daif_mask() + * before exiting the loop. However, we did enter unstalled + * and we must restore such state on exit. + */ + unstall_inband_nocheck(); } unsigned long __ro_after_init signal_minsigstksz; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/kernel/smp.c linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/smp.c --- linux-5.15.26/arch/arm64/kernel/smp.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/smp.c 2022-03-10 09:47:50.000000000 +0100 @@ -77,7 +77,7 @@ enum ipi_msg_type { NR_IPI }; -static int ipi_irq_base __read_mostly; +int ipi_irq_base __read_mostly; static int nr_ipi __read_mostly = NR_IPI; static struct irq_desc *ipi_desc[NR_IPI] __read_mostly; @@ -258,6 +258,7 @@ asmlinkage notrace void secondary_start_ complete(&cpu_running); local_daif_restore(DAIF_PROCCTX); + local_irq_enable_full(); /* * OK, it's off to the idle thread for us @@ -800,6 +801,8 @@ static const char *ipi_types[NR_IPI] __t static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); +static unsigned int get_ipi_count(struct irq_desc *desc, unsigned int cpu); + unsigned long irq_err_count; int arch_show_interrupts(struct seq_file *p, int prec) @@ -810,7 +813,7 @@ int arch_show_interrupts(struct seq_file seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu)); + seq_printf(p, "%10u ", get_ipi_count(ipi_desc[i], cpu)); seq_printf(p, " %s\n", ipi_types[i]); } @@ -872,7 +875,7 @@ static void ipi_cpu_crash_stop(unsigned atomic_dec(&waiting_for_crash_ipi); - local_irq_disable(); + local_irq_disable_full(); sdei_mask_local_cpu(); if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) @@ -884,7 +887,7 @@ static void ipi_cpu_crash_stop(unsigned } /* - * Main handler for inter-processor interrupts + * Main handler for inter-processor interrupts on the in-band stage. */ static void do_handle_IPI(int ipinr) { @@ -943,6 +946,74 @@ static void do_handle_IPI(int ipinr) trace_ipi_exit_rcuidle(ipi_types[ipinr]); } +static void __smp_cross_call(const struct cpumask *target, unsigned int ipinr) +{ + trace_ipi_raise(target, ipi_types[ipinr]); + __ipi_send_mask(ipi_desc[ipinr], target); +} + +#ifdef CONFIG_IRQ_PIPELINE + +static DEFINE_PER_CPU(unsigned long, ipi_messages); + +static DEFINE_PER_CPU(unsigned int [NR_IPI], ipi_counts); + +static irqreturn_t ipi_handler(int irq, void *data) +{ + unsigned long *pmsg; + unsigned int ipinr; + + /* + * Decode in-band IPIs (0..NR_IPI - 1) multiplexed over + * SGI0. Out-of-band IPIs (SGI1, SGI2) have their own + * individual handler. + */ + pmsg = raw_cpu_ptr(&ipi_messages); + while (*pmsg) { + ipinr = ffs(*pmsg) - 1; + clear_bit(ipinr, pmsg); + __this_cpu_inc(ipi_counts[ipinr]); + do_handle_IPI(ipinr); + } + + return IRQ_HANDLED; +} + +static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) +{ + unsigned int cpu; + + /* regular in-band IPI (multiplexed over SGI0). */ + for_each_cpu(cpu, target) + set_bit(ipinr, &per_cpu(ipi_messages, cpu)); + + wmb(); + __smp_cross_call(target, 0); +} + +static unsigned int get_ipi_count(struct irq_desc *desc, unsigned int cpu) +{ + unsigned int irq = irq_desc_get_irq(desc); + return per_cpu(ipi_counts[irq - ipi_irq_base], cpu); +} + +void irq_send_oob_ipi(unsigned int irq, + const struct cpumask *cpumask) +{ + unsigned int sgi = irq - ipi_irq_base; + + if (WARN_ON(irq_pipeline_debug() && + (sgi < OOB_IPI_OFFSET || + sgi >= OOB_IPI_OFFSET + OOB_NR_IPI))) + return; + + /* Out-of-band IPI (SGI1-2). */ + __smp_cross_call(cpumask, sgi); +} +EXPORT_SYMBOL_GPL(irq_send_oob_ipi); + +#else + static irqreturn_t ipi_handler(int irq, void *data) { do_handle_IPI(irq - ipi_irq_base); @@ -951,10 +1022,16 @@ static irqreturn_t ipi_handler(int irq, static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) { - trace_ipi_raise(target, ipi_types[ipinr]); - __ipi_send_mask(ipi_desc[ipinr], target); + __smp_cross_call(target, ipinr); +} + +static unsigned int get_ipi_count(struct irq_desc *desc, unsigned int cpu) +{ + return irq_desc_kstat_cpu(desc, cpu); } +#endif /* CONFIG_IRQ_PIPELINE */ + static void ipi_setup(int cpu) { int i; @@ -981,18 +1058,25 @@ static void ipi_teardown(int cpu) void __init set_smp_ipi_range(int ipi_base, int n) { - int i; + int i, inband_nr_ipi; WARN_ON(n < NR_IPI); nr_ipi = min(n, NR_IPI); + /* + * irq_pipeline: the in-band stage traps SGI0 only, + * over which IPI messages are mutiplexed. Other SGIs + * are available for exchanging out-of-band IPIs. + */ + inband_nr_ipi = irqs_pipelined() ? 1 : nr_ipi; for (i = 0; i < nr_ipi; i++) { - int err; - - err = request_percpu_irq(ipi_base + i, ipi_handler, - "IPI", &cpu_number); - WARN_ON(err); + if (i < inband_nr_ipi) { + int err; + err = request_percpu_irq(ipi_base + i, ipi_handler, + "IPI", &cpu_number); + WARN_ON(err); + } ipi_desc[i] = irq_to_desc(ipi_base + i); irq_set_status_flags(ipi_base + i, IRQ_HIDDEN); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/kernel/syscall.c linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/syscall.c --- linux-5.15.26/arch/arm64/kernel/syscall.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/syscall.c 2022-03-10 09:47:50.000000000 +0100 @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -82,6 +83,7 @@ static void el0_svc_common(struct pt_reg const syscall_fn_t syscall_table[]) { unsigned long flags = current_thread_info()->flags; + int ret; regs->orig_x0 = regs->regs[0]; regs->syscallno = scno; @@ -104,8 +106,17 @@ static void el0_svc_common(struct pt_reg * (Similarly for HVC and SMC elsewhere.) */ + WARN_ON_ONCE(dovetail_debug() && + running_inband() && test_inband_stall()); local_daif_restore(DAIF_PROCCTX); + ret = pipeline_syscall(scno, regs); + if (ret > 0) + return; + + if (ret < 0) + goto tail_work; + if (flags & _TIF_MTE_ASYNC_FAULT) { /* * Process the asynchronous tag check fault before the actual @@ -146,6 +157,7 @@ static void el0_svc_common(struct pt_reg * check again. However, if we were tracing entry, then we always trace * exit regardless, as the old entry assembly did. */ +tail_work: if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { local_daif_mask(); flags = current_thread_info()->flags; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/kernel/traps.c linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/traps.c --- linux-5.15.26/arch/arm64/kernel/traps.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/kernel/traps.c 2022-03-10 09:47:50.000000000 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -202,7 +203,7 @@ static int __die(const char *str, int er return ret; } -static DEFINE_RAW_SPINLOCK(die_lock); +static DEFINE_HARD_SPINLOCK(die_lock); /* * This function is protected against re-entrancy. @@ -374,7 +375,7 @@ void arm64_skip_faulting_instruction(str } static LIST_HEAD(undef_hook); -static DEFINE_RAW_SPINLOCK(undef_lock); +static DEFINE_HARD_SPINLOCK(undef_lock); void register_undef_hook(struct undef_hook *hook) { @@ -496,14 +497,18 @@ void do_undefinstr(struct pt_regs *regs) return; BUG_ON(!user_mode(regs)); + oob_trap_notify(ARM64_TRAP_UNDI, regs); force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); + oob_trap_unwind(ARM64_TRAP_UNDI, regs); } NOKPROBE_SYMBOL(do_undefinstr); void do_bti(struct pt_regs *regs) { BUG_ON(!user_mode(regs)); + oob_trap_notify(ARM64_TRAP_BTI, regs); force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); + oob_trap_unwind(ARM64_TRAP_BTI, regs); } NOKPROBE_SYMBOL(do_bti); @@ -572,10 +577,13 @@ static void user_cache_maint_handler(uns return; } - if (ret) + if (ret) { + oob_trap_notify(ARM64_TRAP_ACCESS, regs); arm64_notify_segfault(tagged_address); - else + oob_trap_unwind(ARM64_TRAP_ACCESS, regs); + } else { arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); + } } static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) @@ -620,8 +628,11 @@ static void mrs_handler(unsigned int esr rt = ESR_ELx_SYS64_ISS_RT(esr); sysreg = esr_sys64_to_sysreg(esr); - if (do_emulate_mrs(regs, sysreg, rt) != 0) + if (do_emulate_mrs(regs, sysreg, rt) != 0) { + oob_trap_notify(ARM64_TRAP_ACCESS, regs); force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); + oob_trap_unwind(ARM64_TRAP_ACCESS, regs); + } } static void wfi_handler(unsigned int esr, struct pt_regs *regs) @@ -850,11 +861,13 @@ void bad_el0_sync(struct pt_regs *regs, { unsigned long pc = instruction_pointer(regs); + oob_trap_notify(ARM64_TRAP_ACCESS, regs); current->thread.fault_address = 0; current->thread.fault_code = esr; arm64_force_sig_fault(SIGILL, ILL_ILLOPC, pc, "Bad EL0 synchronous exception"); + oob_trap_unwind(ARM64_TRAP_ACCESS, regs); } #ifdef CONFIG_VMAP_STACK diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/mm/context.c linux-dovetail-v5.15.y-dovetail/arch/arm64/mm/context.c --- linux-5.15.26/arch/arm64/mm/context.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/mm/context.c 2022-03-10 09:47:50.000000000 +0100 @@ -18,7 +18,7 @@ #include static u32 asid_bits; -static DEFINE_RAW_SPINLOCK(cpu_asid_lock); +static DEFINE_HARD_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation; static unsigned long *asid_map; @@ -217,6 +217,9 @@ void check_and_switch_context(struct mm_ unsigned long flags; unsigned int cpu; u64 asid, old_active_asid; + bool need_flush; + + WARN_ON_ONCE(dovetail_debug() && !hard_irqs_disabled()); if (system_supports_cnp()) cpu_set_reserved_ttbr0(); @@ -252,12 +255,14 @@ void check_and_switch_context(struct mm_ } cpu = smp_processor_id(); - if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) - local_flush_tlb_all(); + need_flush = cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending); atomic64_set(this_cpu_ptr(&active_asids), asid); raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + if (need_flush) + local_flush_tlb_all(); + switch_mm_fastpath: arm64_apply_bp_hardening(); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/arm64/mm/fault.c linux-dovetail-v5.15.y-dovetail/arch/arm64/mm/fault.c --- linux-5.15.26/arch/arm64/mm/fault.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/arm64/mm/fault.c 2022-03-10 09:47:50.000000000 +0100 @@ -268,11 +268,11 @@ static bool __kprobes is_spurious_el1_tr (esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT) return false; - local_irq_save(flags); + flags = hard_local_irq_save(); asm volatile("at s1e1r, %0" :: "r" (addr)); isb(); par = read_sysreg_par(); - local_irq_restore(flags); + hard_local_irq_restore(flags); /* * If we now have a valid translation, treat the translation fault as @@ -388,6 +388,12 @@ static void __do_kernel_fault(unsigned l msg = "paging request"; } + /* + * Dovetail: Don't bother restoring the in-band stage in the + * non-recoverable fault case, we got busted and a full stage + * switch is likely to make things even worse. Try at least to + * get some debug output before panicing. + */ die_kernel_fault(msg, addr, esr, regs); } @@ -460,8 +466,10 @@ static void do_bad_area(unsigned long fa if (user_mode(regs)) { const struct fault_info *inf = esr_to_fault_info(esr); + oob_trap_notify(ARM64_TRAP_ACCESS, regs); set_thread_esr(addr, esr); arm64_force_sig_fault(inf->sig, inf->code, far, inf->name); + oob_trap_unwind(ARM64_TRAP_ACCESS, regs); } else { __do_kernel_fault(addr, esr, regs); } @@ -526,6 +534,8 @@ static int __kprobes do_page_fault(unsig if (kprobe_page_fault(regs, esr)) return 0; + oob_trap_notify(ARM64_TRAP_ACCESS, regs); + /* * If we're in an interrupt or have no user context, we must not take * the fault. @@ -602,7 +612,7 @@ retry: if (fault_signal_pending(fault, regs)) { if (!user_mode(regs)) goto no_context; - return 0; + goto out; } if (fault & VM_FAULT_RETRY) { @@ -618,7 +628,7 @@ retry: */ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) - return 0; + goto out; /* * If we are in kernel mode at this point, we have no context to @@ -634,7 +644,7 @@ retry: * oom-killed). */ pagefault_out_of_memory(); - return 0; + goto out; } inf = esr_to_fault_info(esr); @@ -663,10 +673,12 @@ retry: far, inf->name); } - return 0; + goto out; no_context: __do_kernel_fault(addr, esr, regs); +out: + oob_trap_unwind(ARM64_TRAP_ACCESS, regs); return 0; } @@ -700,6 +712,8 @@ static int do_sea(unsigned long far, uns const struct fault_info *inf; unsigned long siaddr; + oob_trap_notify(ARM64_TRAP_SEA, regs); + inf = esr_to_fault_info(esr); if (user_mode(regs) && apei_claim_sea(regs) == 0) { @@ -707,7 +721,7 @@ static int do_sea(unsigned long far, uns * APEI claimed this as a firmware-first notification. * Some processing deferred to task_work before ret_to_user(). */ - return 0; + goto out; } if (esr & ESR_ELx_FnV) { @@ -721,6 +735,8 @@ static int do_sea(unsigned long far, uns siaddr = untagged_addr(far); } arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr); +out: + oob_trap_unwind(ARM64_TRAP_SEA, regs); return 0; } @@ -813,6 +829,8 @@ void do_mem_abort(unsigned long far, uns if (!inf->fn(far, esr, regs)) return; + oob_trap_notify(ARM64_TRAP_ACCESS, regs); + if (!user_mode(regs)) { pr_alert("Unhandled fault at 0x%016lx\n", addr); mem_abort_decode(esr); @@ -825,13 +843,18 @@ void do_mem_abort(unsigned long far, uns * address to the signal handler. */ arm64_notify_die(inf->name, regs, inf->sig, inf->code, addr, esr); + oob_trap_unwind(ARM64_TRAP_ACCESS, regs); } NOKPROBE_SYMBOL(do_mem_abort); void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { + oob_trap_notify(ARM64_TRAP_ALIGN, regs); + arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN, addr, esr); + + oob_trap_unwind(ARM64_TRAP_ALIGN, regs); } NOKPROBE_SYMBOL(do_sp_pc_abort); @@ -894,6 +917,8 @@ void do_debug_exception(unsigned long ad const struct fault_info *inf = esr_to_debug_fault_info(esr); unsigned long pc = instruction_pointer(regs); + oob_trap_notify(ARM64_TRAP_DEBUG, regs); + debug_exception_enter(regs); if (user_mode(regs) && !is_ttbr0_addr(pc)) @@ -904,6 +929,8 @@ void do_debug_exception(unsigned long ad } debug_exception_exit(regs); + + oob_trap_unwind(ARM64_TRAP_DEBUG, regs); } NOKPROBE_SYMBOL(do_debug_exception); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/Kconfig linux-dovetail-v5.15.y-dovetail/arch/Kconfig --- linux-5.15.26/arch/Kconfig 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/Kconfig 2022-03-10 09:47:50.000000000 +0100 @@ -194,6 +194,9 @@ config HAVE_KPROBES_ON_FTRACE config HAVE_FUNCTION_ERROR_INJECTION bool +config HAVE_PERCPU_PREEMPT_COUNT + bool + config HAVE_NMI bool diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/entry/common.c linux-dovetail-v5.15.y-dovetail/arch/x86/entry/common.c --- linux-5.15.26/arch/x86/entry/common.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/entry/common.c 2022-03-10 09:47:50.000000000 +0100 @@ -75,6 +75,15 @@ __visible noinstr void do_syscall_64(str add_random_kstack_offset(); nr = syscall_enter_from_user_mode(regs, nr); + if (dovetailing()) { + if (nr == EXIT_SYSCALL_OOB) { + hard_local_irq_disable(); + return; + } + if (nr == EXIT_SYSCALL_TAIL) + goto done; + } + instrumentation_begin(); if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) { @@ -83,6 +92,7 @@ __visible noinstr void do_syscall_64(str } instrumentation_end(); +done: syscall_exit_to_user_mode(regs); } #endif @@ -127,11 +137,22 @@ __visible noinstr void do_int80_syscall_ * the semantics of syscall_get_nr(). */ nr = syscall_enter_from_user_mode(regs, nr); + + if (dovetailing()) { + if (nr == EXIT_SYSCALL_OOB) { + hard_local_irq_disable(); + return; + } + if (nr == EXIT_SYSCALL_TAIL) + goto done; + } + instrumentation_begin(); do_syscall_32_irqs_on(regs, nr); instrumentation_end(); +done: syscall_exit_to_user_mode(regs); } @@ -174,9 +195,20 @@ static noinstr bool __do_fast_syscall_32 nr = syscall_enter_from_user_mode_work(regs, nr); + if (dovetailing()) { + if (nr == EXIT_SYSCALL_OOB) { + instrumentation_end(); + hard_local_irq_disable(); + return true; + } + if (nr == EXIT_SYSCALL_TAIL) + goto done; + } + /* Now this is just like a normal syscall. */ do_syscall_32_irqs_on(regs, nr); +done: instrumentation_end(); syscall_exit_to_user_mode(regs); return true; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/entry/entry_64.S linux-dovetail-v5.15.y-dovetail/arch/x86/entry/entry_64.S --- linux-5.15.26/arch/x86/entry/entry_64.S 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/entry/entry_64.S 2022-03-10 09:47:50.000000000 +0100 @@ -413,6 +413,11 @@ SYM_CODE_END(\asmsym) * If hits in kernel mode then it needs to go through the paranoid * entry as the exception can hit any random state. No preemption * check on exit to keep the paranoid path simple. + * + * irq_pipeline: since those events are non-maskable in essence, + * we may assume NMI-type restrictions for their handlers, which + * means the latter may - and actually have to - run immediately + * regardless of the current stage. */ .macro idtentry_mce_db vector asmsym cfunc SYM_CODE_START(\asmsym) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/hyperv/hv_init.c linux-dovetail-v5.15.y-dovetail/arch/x86/hyperv/hv_init.c --- linux-5.15.26/arch/x86/hyperv/hv_init.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/hyperv/hv_init.c 2022-03-10 09:47:50.000000000 +0100 @@ -127,7 +127,8 @@ static inline bool hv_reenlightenment_av ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT; } -DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_reenlightenment) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(HYPERV_REENLIGHTENMENT_VECTOR, + sysvec_hyperv_reenlightenment) { ack_APIC_irq(); inc_irq_stat(irq_hv_reenlightenment_count); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/apic.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/apic.h --- linux-5.15.26/arch/x86/include/asm/apic.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/apic.h 2022-03-10 09:47:50.000000000 +0100 @@ -436,7 +436,7 @@ static inline void apic_set_eoi_write(vo extern void apic_ack_irq(struct irq_data *data); -static inline void ack_APIC_irq(void) +static inline void __ack_APIC_irq(void) { /* * ack_APIC_irq() actually gets compiled as a single instruction @@ -445,6 +445,11 @@ static inline void ack_APIC_irq(void) apic_eoi(); } +static inline void ack_APIC_irq(void) +{ + if (!irqs_pipelined()) + __ack_APIC_irq(); +} static inline bool lapic_vector_set_in_irr(unsigned int vector) { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/dovetail.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/dovetail.h --- linux-5.15.26/arch/x86/include/asm/dovetail.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/dovetail.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2019 Philippe Gerum. + */ +#ifndef _ASM_X86_DOVETAIL_H +#define _ASM_X86_DOVETAIL_H + +#if !defined(__ASSEMBLY__) && defined(CONFIG_DOVETAIL) + +#include + +static inline void arch_dovetail_exec_prepare(void) +{ + clear_thread_flag(TIF_NEED_FPU_LOAD); +} + +static inline +void arch_dovetail_switch_prepare(bool leave_inband) +{ + if (leave_inband) + fpu__suspend_inband(); +} + +static inline +void arch_dovetail_switch_finish(bool enter_inband) +{ + if (enter_inband) + fpu__resume_inband(); + else if (!(current->flags & PF_KTHREAD) && + test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); +} + +#endif + +#endif /* _ASM_X86_DOVETAIL_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/fpu/api.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/fpu/api.h --- linux-5.15.26/arch/x86/include/asm/fpu/api.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/fpu/api.h 2022-03-10 09:47:50.000000000 +0100 @@ -64,20 +64,29 @@ static inline void kernel_fpu_begin(void * * Disabling preemption also serializes against kernel_fpu_begin(). */ -static inline void fpregs_lock(void) +static inline unsigned long fpregs_lock(void) { + if (IS_ENABLED(CONFIG_IRQ_PIPELINE)) + return hard_preempt_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) local_bh_disable(); else preempt_disable(); + + return 0; } -static inline void fpregs_unlock(void) +static inline void fpregs_unlock(unsigned long flags) { - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) - local_bh_enable(); - else - preempt_enable(); + if (IS_ENABLED(CONFIG_IRQ_PIPELINE)) { + hard_preempt_enable(flags); + } else { + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_enable(); + else + preempt_enable(); + } } #ifdef CONFIG_X86_DEBUG_FPU @@ -91,6 +100,10 @@ static inline void fpregs_assert_state_c */ extern void switch_fpu_return(void); +/* For Dovetail context switching. */ +void fpu__suspend_inband(void); +void fpu__resume_inband(void); + /* * Query the presence of one or more xfeatures. Works on any legacy CPU as well. * diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/fpu/internal.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/fpu/internal.h --- linux-5.15.26/arch/x86/include/asm/fpu/internal.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/fpu/internal.h 2022-03-10 09:47:50.000000000 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -483,6 +484,32 @@ static inline void fpregs_restore_userre clear_thread_flag(TIF_NEED_FPU_LOAD); } +#ifdef CONFIG_DOVETAIL + +static inline void oob_fpu_set_preempt(struct fpu *fpu) +{ + fpu->preempted = 1; +} + +static inline void oob_fpu_clear_preempt(struct fpu *fpu) +{ + fpu->preempted = 0; +} + +static inline bool oob_fpu_preempted(struct fpu *old_fpu) +{ + return old_fpu->preempted; +} + +#else + +static inline bool oob_fpu_preempted(struct fpu *old_fpu) +{ + return false; +} + +#endif /* !CONFIG_DOVETAIL */ + /* * FPU state switching for scheduling. * @@ -507,7 +534,8 @@ static inline void fpregs_restore_userre */ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { + if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD) && + !oob_fpu_preempted(old_fpu)) { save_fpregs_to_fpstate(old_fpu); /* * The save operation preserved register state, so the diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/fpu/types.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/fpu/types.h --- linux-5.15.26/arch/x86/include/asm/fpu/types.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/fpu/types.h 2022-03-10 09:47:50.000000000 +0100 @@ -329,6 +329,18 @@ struct fpu { */ unsigned int last_cpu; +#ifdef CONFIG_DOVETAIL + /* + * @preempted: + * + * When Dovetail is enabled, this flag is set for the inband + * task context saved when entering a kernel_fpu_begin/end() + * section before the latter got preempted by an out-of-band + * task. + */ + unsigned char preempted : 1; +#endif + /* * @avx512_timestamp: * diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/i8259.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/i8259.h --- linux-5.15.26/arch/x86/include/asm/i8259.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/i8259.h 2022-03-10 09:47:50.000000000 +0100 @@ -28,7 +28,7 @@ extern unsigned int cached_irq_mask; #define SLAVE_ICW4_DEFAULT 0x01 #define PIC_ICW4_AEOI 2 -extern raw_spinlock_t i8259A_lock; +extern hard_spinlock_t i8259A_lock; /* the PIC may need a careful delay on some platforms, hence specific calls */ static inline unsigned char inb_pic(unsigned int port) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/idtentry.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/idtentry.h --- linux-5.15.26/arch/x86/include/asm/idtentry.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/idtentry.h 2022-03-10 09:47:50.000000000 +0100 @@ -174,6 +174,50 @@ __visible noinstr void func(struct pt_re #define DECLARE_IDTENTRY_IRQ(vector, func) \ DECLARE_IDTENTRY_ERRORCODE(vector, func) +#ifdef CONFIG_IRQ_PIPELINE + +struct irq_stage_data; + +void arch_pipeline_entry(struct pt_regs *regs, u8 vector); + +#define DECLARE_IDTENTRY_SYSVEC_PIPELINED(vector, func) \ + DECLARE_IDTENTRY_SYSVEC(vector, func); \ + __visible void __##func(struct pt_regs *regs) + +#define DEFINE_IDTENTRY_IRQ_PIPELINED(func) \ +__visible noinstr void func(struct pt_regs *regs, \ + unsigned long error_code) \ +{ \ + arch_pipeline_entry(regs, (u8)error_code); \ +} \ +static __always_inline void __##func(struct pt_regs *regs, u8 vector) + +/* + * In a pipelined model, the actual sysvec __handler() is directly + * instrumentable, just like it is in fact in the non-pipelined + * model. The indirect call via run_on_irqstack_cond() in + * DEFINE_IDTENTRY_SYSVEC() happens to hide the noinstr dependency + * from objtool in the latter case. + */ +#define DEFINE_IDTENTRY_SYSVEC_PIPELINED(vector, func) \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + arch_pipeline_entry(regs, vector); \ +} \ + \ +__visible void __##func(struct pt_regs *regs) + +#define DEFINE_IDTENTRY_SYSVEC_SIMPLE_PIPELINED(vector, func) \ + DEFINE_IDTENTRY_SYSVEC_PIPELINED(vector, func) + +#else /* !CONFIG_IRQ_PIPELINE */ + +#define DECLARE_IDTENTRY_SYSVEC_PIPELINED(vector, func) DECLARE_IDTENTRY_SYSVEC(vector, func) + +#define DEFINE_IDTENTRY_IRQ_PIPELINED(func) DEFINE_IDTENTRY_IRQ(func) +#define DEFINE_IDTENTRY_SYSVEC_PIPELINED(vector, func) DEFINE_IDTENTRY_SYSVEC(func) +#define DEFINE_IDTENTRY_SYSVEC_SIMPLE_PIPELINED(vector, func) DEFINE_IDTENTRY_SYSVEC_SIMPLE(func) + /** * DEFINE_IDTENTRY_IRQ - Emit code for device interrupt IDT entry points * @func: Function name of the entry point @@ -204,6 +248,8 @@ __visible noinstr void func(struct pt_re \ static noinline void __##func(struct pt_regs *regs, u32 vector) +#endif /* !CONFIG_IRQ_PIPELINE */ + /** * DECLARE_IDTENTRY_SYSVEC - Declare functions for system vector entry points * @vector: Vector number (ignored for C) @@ -447,6 +493,9 @@ __visible noinstr void func(struct pt_re #define DECLARE_IDTENTRY_SYSVEC(vector, func) \ idtentry_sysvec vector func +#define DECLARE_IDTENTRY_SYSVEC_PIPELINED(vector, func) \ + DECLARE_IDTENTRY_SYSVEC(vector, func) + #ifdef CONFIG_X86_64 # define DECLARE_IDTENTRY_MCE(vector, func) \ idtentry_mce_db vector asm_##func func @@ -635,21 +684,25 @@ DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, spu #ifdef CONFIG_X86_LOCAL_APIC DECLARE_IDTENTRY_SYSVEC(ERROR_APIC_VECTOR, sysvec_error_interrupt); DECLARE_IDTENTRY_SYSVEC(SPURIOUS_APIC_VECTOR, sysvec_spurious_apic_interrupt); -DECLARE_IDTENTRY_SYSVEC(LOCAL_TIMER_VECTOR, sysvec_apic_timer_interrupt); -DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI_VECTOR, sysvec_x86_platform_ipi); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(LOCAL_TIMER_VECTOR, sysvec_apic_timer_interrupt); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(X86_PLATFORM_IPI_VECTOR, sysvec_x86_platform_ipi); #endif #ifdef CONFIG_SMP -DECLARE_IDTENTRY(RESCHEDULE_VECTOR, sysvec_reschedule_ipi); -DECLARE_IDTENTRY_SYSVEC(IRQ_MOVE_CLEANUP_VECTOR, sysvec_irq_move_cleanup); -DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot); -DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single); -DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(RESCHEDULE_VECTOR, sysvec_reschedule_ipi); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(IRQ_MOVE_CLEANUP_VECTOR, sysvec_irq_move_cleanup); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(REBOOT_VECTOR, sysvec_reboot); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(CALL_FUNCTION_VECTOR, sysvec_call_function); +#ifdef CONFIG_IRQ_PIPELINE +DECLARE_IDTENTRY_SYSVEC(RESCHEDULE_OOB_VECTOR, sysvec_reschedule_oob_ipi); +DECLARE_IDTENTRY_SYSVEC(TIMER_OOB_VECTOR, sysvec_timer_oob_ipi); +#endif #endif #ifdef CONFIG_X86_LOCAL_APIC # ifdef CONFIG_X86_MCE_THRESHOLD -DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR, sysvec_threshold); +DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR, sysvec_threshold); # endif # ifdef CONFIG_X86_MCE_AMD @@ -661,28 +714,28 @@ DECLARE_IDTENTRY_SYSVEC(THERMAL_APIC_VEC # endif # ifdef CONFIG_IRQ_WORK -DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(IRQ_WORK_VECTOR, sysvec_irq_work); # endif #endif #ifdef CONFIG_HAVE_KVM -DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR, sysvec_kvm_posted_intr_ipi); -DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup_ipi); -DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(POSTED_INTR_VECTOR, sysvec_kvm_posted_intr_ipi); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup_ipi); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi); #endif #if IS_ENABLED(CONFIG_HYPERV) -DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); -DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); -DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0); #endif #if IS_ENABLED(CONFIG_ACRN_GUEST) -DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback); #endif #ifdef CONFIG_XEN_PVHVM -DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback); +DECLARE_IDTENTRY_SYSVEC_PIPELINED(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback); #endif #ifdef CONFIG_KVM_GUEST diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/irqflags.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/irqflags.h --- linux-5.15.26/arch/x86/include/asm/irqflags.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/irqflags.h 2022-03-10 09:47:50.000000000 +0100 @@ -26,7 +26,7 @@ extern __always_inline unsigned long nat * it evaluates its effective address -- this is part of the * documented behavior of the "pop" instruction. */ - asm volatile("# __raw_save_flags\n\t" + asm volatile("# __native_save_flags\n\t" "pushf ; pop %0" : "=rm" (flags) : /* no input */ @@ -35,6 +35,15 @@ extern __always_inline unsigned long nat return flags; } +extern inline void native_restore_fl(unsigned long flags); +extern __always_inline void native_restore_fl(unsigned long flags) +{ + asm volatile("push %0 ; popf" + : /* no output */ + :"g" (flags) + :"memory", "cc"); +} + static __always_inline void native_irq_disable(void) { asm volatile("cli": : :"memory"); @@ -45,6 +54,49 @@ static __always_inline void native_irq_e asm volatile("sti": : :"memory"); } +static inline unsigned long native_save_flags(void) +{ + return native_save_fl(); +} + +static __always_inline void native_irq_sync(void) +{ + asm volatile("sti ; nop ; cli": : :"memory"); +} + +static __always_inline unsigned long native_irq_save(void) +{ + unsigned long flags; + + flags = native_save_flags(); + + native_irq_disable(); + + return flags; +} + +static __always_inline int native_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & X86_EFLAGS_IF); +} + +static __always_inline void native_irq_restore(unsigned long flags) +{ + /* + * CAUTION: the hard_irq_* API may be used to bracket code + * which re-enables interrupts inside save/restore pairs, so + * do not try to be (too) smart: do restore the original flags + * unconditionally. + */ + native_restore_fl(flags); +} + +static __always_inline bool native_irqs_disabled(void) +{ + unsigned long flags = native_save_flags(); + return native_irqs_disabled_flags(flags); +} + static inline __cpuidle void native_safe_halt(void) { mds_idle_clear_cpu_buffers(); @@ -64,21 +116,7 @@ static inline __cpuidle void native_halt #else #ifndef __ASSEMBLY__ #include - -static __always_inline unsigned long arch_local_save_flags(void) -{ - return native_save_fl(); -} - -static __always_inline void arch_local_irq_disable(void) -{ - native_irq_disable(); -} - -static __always_inline void arch_local_irq_enable(void) -{ - native_irq_enable(); -} +#include /* * Used in the idle loop; sti takes one instruction cycle @@ -98,15 +136,6 @@ static inline __cpuidle void halt(void) native_halt(); } -/* - * For spinlocks, etc: - */ -static __always_inline unsigned long arch_local_irq_save(void) -{ - unsigned long flags = arch_local_save_flags(); - arch_local_irq_disable(); - return flags; -} #else #ifdef CONFIG_X86_64 @@ -124,7 +153,7 @@ static __always_inline unsigned long arc #ifndef __ASSEMBLY__ static __always_inline int arch_irqs_disabled_flags(unsigned long flags) { - return !(flags & X86_EFLAGS_IF); + return native_irqs_disabled_flags(flags); } static __always_inline int arch_irqs_disabled(void) @@ -134,11 +163,14 @@ static __always_inline int arch_irqs_dis return arch_irqs_disabled_flags(flags); } -static __always_inline void arch_local_irq_restore(unsigned long flags) +#ifndef CONFIG_IRQ_PIPELINE +static inline notrace void arch_local_irq_restore(unsigned long flags) { if (!arch_irqs_disabled_flags(flags)) arch_local_irq_enable(); } +#endif + #else #ifdef CONFIG_X86_64 #ifdef CONFIG_XEN_PV diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/irq_pipeline.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/irq_pipeline.h --- linux-5.15.26/arch/x86/include/asm/irq_pipeline.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/irq_pipeline.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,130 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2019 Philippe Gerum . + */ +#ifndef _ASM_X86_IRQ_PIPELINE_H +#define _ASM_X86_IRQ_PIPELINE_H + +#include + +#ifdef CONFIG_IRQ_PIPELINE + +#include + +#define FIRST_SYSTEM_IRQ NR_IRQS +#define TIMER_OOB_IPI apicm_vector_irq(TIMER_OOB_VECTOR) +#define RESCHEDULE_OOB_IPI apicm_vector_irq(RESCHEDULE_OOB_VECTOR) +#define apicm_irq_vector(__irq) ((__irq) - FIRST_SYSTEM_IRQ + FIRST_SYSTEM_VECTOR) +#define apicm_vector_irq(__vec) ((__vec) - FIRST_SYSTEM_VECTOR + FIRST_SYSTEM_IRQ) + +#define X86_EFLAGS_SS_BIT 31 + +static inline notrace +unsigned long arch_irqs_virtual_to_native_flags(int stalled) +{ + return (!stalled) << X86_EFLAGS_IF_BIT; +} + +static inline notrace +unsigned long arch_irqs_native_to_virtual_flags(unsigned long flags) +{ + return hard_irqs_disabled_flags(flags) << X86_EFLAGS_SS_BIT; +} + +#ifndef CONFIG_PARAVIRT_XXL + +static inline notrace unsigned long arch_local_irq_save(void) +{ + int stalled = inband_irq_save(); + barrier(); + return arch_irqs_virtual_to_native_flags(stalled); +} + +static inline notrace void arch_local_irq_enable(void) +{ + barrier(); + inband_irq_enable(); +} + +static inline notrace void arch_local_irq_disable(void) +{ + inband_irq_disable(); + barrier(); +} + +static inline notrace unsigned long arch_local_save_flags(void) +{ + int stalled = inband_irqs_disabled(); + barrier(); + return arch_irqs_virtual_to_native_flags(stalled); +} + +#endif /* !CONFIG_PARAVIRT_XXL */ + +static inline notrace void arch_local_irq_restore(unsigned long flags) +{ + inband_irq_restore(native_irqs_disabled_flags(flags)); + barrier(); +} + +static inline +void arch_save_timer_regs(struct pt_regs *dst, struct pt_regs *src) +{ + dst->flags = src->flags; + dst->cs = src->cs; + dst->ip = src->ip; + dst->bp = src->bp; + dst->ss = src->ss; + dst->sp = src->sp; +} + +static inline bool arch_steal_pipelined_tick(struct pt_regs *regs) +{ + return !(regs->flags & X86_EFLAGS_IF); +} + +static inline int arch_enable_oob_stage(void) +{ + return 0; +} + +static inline void arch_handle_irq_pipelined(struct pt_regs *regs) +{ } + +#else /* !CONFIG_IRQ_PIPELINE */ + +struct pt_regs; + +#ifndef CONFIG_PARAVIRT_XXL + +static inline notrace unsigned long arch_local_save_flags(void) +{ + return native_save_fl(); +} + +static inline notrace void arch_local_irq_disable(void) +{ + native_irq_disable(); +} + +static inline notrace void arch_local_irq_enable(void) +{ + native_irq_enable(); +} + +/* + * For spinlocks, etc: + */ +static inline notrace unsigned long arch_local_irq_save(void) +{ + unsigned long flags = arch_local_save_flags(); + arch_local_irq_disable(); + return flags; +} + +#endif /* !CONFIG_PARAVIRT_XXL */ + +#endif /* !CONFIG_IRQ_PIPELINE */ + +#endif /* _ASM_X86_IRQ_PIPELINE_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/irq_stack.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/irq_stack.h --- linux-5.15.26/arch/x86/include/asm/irq_stack.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/irq_stack.h 2022-03-10 09:47:50.000000000 +0100 @@ -132,8 +132,13 @@ /* \ * User mode entry and interrupt on the irq stack do not \ * switch stacks. If from user mode the task stack is empty. \ + * \ + * irq_pipeline: we always start from a kernel context when \ + * replaying interrupts, so the user check is not relevant \ + * in this case. \ */ \ - if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \ + if ((!irqs_pipelined() && user_mode(regs)) || \ + __this_cpu_read(hardirq_stack_inuse)) { \ irq_enter_rcu(); \ func(c_args); \ irq_exit_rcu(); \ @@ -143,6 +148,11 @@ * switching stacks. Interrupts are disabled in both \ * places. Invoke the stack switch macro with the call \ * sequence which matches the above direct invocation. \ + * \ + * IRQ pipeline: only in-band (soft-)irq handlers have \ + * to run on the irqstack. Out-of-band irq handlers \ + * run directly over the preempted context, therefore \ + * they never land there. \ */ \ __this_cpu_write(hardirq_stack_inuse, true); \ call_on_irqstack(func, asm_call, constr); \ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/irq_vectors.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/irq_vectors.h --- linux-5.15.26/arch/x86/include/asm/irq_vectors.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/irq_vectors.h 2022-03-10 09:47:50.000000000 +0100 @@ -106,10 +106,19 @@ #define LOCAL_TIMER_VECTOR 0xec +#ifdef CONFIG_IRQ_PIPELINE +#define TIMER_OOB_VECTOR 0xeb +#define RESCHEDULE_OOB_VECTOR 0xea +#define FIRST_SYSTEM_APIC_VECTOR RESCHEDULE_OOB_VECTOR +#define NR_APIC_VECTORS (NR_VECTORS - FIRST_SYSTEM_VECTOR) +#else +#define FIRST_SYSTEM_APIC_VECTOR LOCAL_TIMER_VECTOR +#endif + #define NR_VECTORS 256 #ifdef CONFIG_X86_LOCAL_APIC -#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR +#define FIRST_SYSTEM_VECTOR FIRST_SYSTEM_APIC_VECTOR #else #define FIRST_SYSTEM_VECTOR NR_VECTORS #endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/mmu_context.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/mmu_context.h --- linux-5.15.26/arch/x86/include/asm/mmu_context.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/mmu_context.h 2022-03-10 09:47:50.000000000 +0100 @@ -132,6 +132,13 @@ extern void switch_mm_irqs_off(struct mm struct task_struct *tsk); #define switch_mm_irqs_off switch_mm_irqs_off +static inline void +switch_oob_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + switch_mm_irqs_off(prev, next, tsk); +} + #define activate_mm(prev, next) \ do { \ paravirt_activate_mm((prev), (next)); \ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/special_insns.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/special_insns.h --- linux-5.15.26/arch/x86/include/asm/special_insns.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/special_insns.h 2022-03-10 09:47:50.000000000 +0100 @@ -126,9 +126,9 @@ static inline void native_load_gs_index( { unsigned long flags; - local_irq_save(flags); + flags = hard_local_irq_save(); asm_load_gs_index(selector); - local_irq_restore(flags); + hard_local_irq_restore(flags); } static inline unsigned long __read_cr4(void) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/syscall.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/syscall.h --- linux-5.15.26/arch/x86/include/asm/syscall.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/syscall.h 2022-03-10 09:47:50.000000000 +0100 @@ -127,6 +127,11 @@ static inline void syscall_get_arguments } } +static inline unsigned long syscall_get_arg0(struct pt_regs *regs) +{ + return regs->di; +} + static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, const unsigned long *args) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/thread_info.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/thread_info.h --- linux-5.15.26/arch/x86/include/asm/thread_info.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/thread_info.h 2022-03-10 09:47:50.000000000 +0100 @@ -52,11 +52,13 @@ struct task_struct; #include #include +#include struct thread_info { unsigned long flags; /* low level flags */ unsigned long syscall_work; /* SYSCALL_WORK_ flags */ u32 status; /* thread synchronous flags */ + struct oob_thread_state oob_state; /* co-kernel thread state */ }; #define INIT_THREAD_INFO(tsk) \ @@ -64,6 +66,8 @@ struct thread_info { .flags = 0, \ } +#define ti_local_flags(__ti) ((__ti)->status) + #else /* !__ASSEMBLY__ */ #include @@ -90,12 +94,14 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */ #define TIF_SLD 18 /* Restore split lock detection on context switch */ +#define TIF_RETUSER 19 /* INBAND_TASK_RETUSER is pending */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ #define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */ #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ +#define TIF_MAYDAY 26 /* emergency trap pending */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ @@ -114,10 +120,12 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_SLD (1 << TIF_SLD) +#define _TIF_RETUSER (1 << TIF_RETUSER) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) +#define _TIF_MAYDAY (1 << TIF_MAYDAY) #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_ADDR32 (1 << TIF_ADDR32) @@ -209,6 +217,16 @@ static inline int arch_within_stack_fram * have to worry about atomic accesses. */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ +/* bits 2 and 3 reserved for compat */ +#define TS_OOB 0x0010 /* Thread is running out-of-band */ +#define TS_DOVETAIL 0x0020 /* Dovetail notifier enabled */ +#define TS_OFFSTAGE 0x0040 /* Thread is in-flight to OOB context */ +#define TS_OOBTRAP 0x0080 /* Handling a trap from OOB context */ + +#define _TLF_OOB TS_OOB +#define _TLF_DOVETAIL TS_DOVETAIL +#define _TLF_OFFSTAGE TS_OFFSTAGE +#define _TLF_OOBTRAP TS_OOBTRAP #ifndef __ASSEMBLY__ #ifdef CONFIG_COMPAT diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/tlbflush.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/tlbflush.h --- linux-5.15.26/arch/x86/include/asm/tlbflush.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/tlbflush.h 2022-03-10 09:47:50.000000000 +0100 @@ -37,9 +37,9 @@ static inline void cr4_set_bits(unsigned { unsigned long flags; - local_irq_save(flags); + flags = hard_local_irq_save(); cr4_set_bits_irqsoff(mask); - local_irq_restore(flags); + hard_local_irq_restore(flags); } /* Clear in this cpu's CR4. */ @@ -47,9 +47,9 @@ static inline void cr4_clear_bits(unsign { unsigned long flags; - local_irq_save(flags); + flags = hard_local_irq_save(); cr4_clear_bits_irqsoff(mask); - local_irq_restore(flags); + hard_local_irq_restore(flags); } #ifndef MODULE diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/include/asm/uaccess.h linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/uaccess.h --- linux-5.15.26/arch/x86/include/asm/uaccess.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/include/asm/uaccess.h 2022-03-10 09:47:50.000000000 +0100 @@ -44,7 +44,7 @@ static inline bool __chk_range_not_ok(un #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline bool pagefault_disabled(void); # define WARN_ON_IN_IRQ() \ - WARN_ON_ONCE(!in_task() && !pagefault_disabled()) + WARN_ON_ONCE(running_inband() && !in_task() && !pagefault_disabled()) #else # define WARN_ON_IN_IRQ() #endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/Kconfig linux-dovetail-v5.15.y-dovetail/arch/x86/Kconfig --- linux-5.15.26/arch/x86/Kconfig 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/Kconfig 2022-03-10 09:47:50.000000000 +0100 @@ -29,6 +29,8 @@ config X86_64 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY + select HAVE_IRQ_PIPELINE + select HAVE_DOVETAIL select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE select SWIOTLB @@ -222,6 +224,7 @@ config X86 select HAVE_MOVE_PMD select HAVE_MOVE_PUD select HAVE_NMI + select HAVE_PERCPU_PREEMPT_COUNT select HAVE_OPTPROBES select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS @@ -872,6 +875,7 @@ config ACRN_GUEST endif #HYPERVISOR_GUEST +source "kernel/Kconfig.dovetail" source "arch/x86/Kconfig.cpu" config HPET_TIMER diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/alternative.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/alternative.c --- linux-5.15.26/arch/x86/kernel/alternative.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/alternative.c 2022-03-10 09:47:50.000000000 +0100 @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -208,9 +209,9 @@ static __always_inline int optimize_nops if (nnops <= 1) return nnops; - local_irq_save(flags); + flags = hard_local_irq_save(); add_nops(instr + off, nnops); - local_irq_restore(flags); + hard_local_irq_restore(flags); DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i); @@ -694,9 +695,9 @@ void __init_or_module text_poke_early(vo */ memcpy(addr, opcode, len); } else { - local_irq_save(flags); + flags = hard_local_irq_save(); memcpy(addr, opcode, len); - local_irq_restore(flags); + hard_local_irq_restore(flags); sync_core(); /* @@ -728,6 +729,7 @@ static inline temp_mm_state_t use_tempor temp_mm_state_t temp_state; lockdep_assert_irqs_disabled(); + WARN_ON_ONCE(irq_pipeline_debug() && !hard_irqs_disabled()); /* * Make sure not to be in TLB lazy mode, as otherwise we'll end up @@ -821,7 +823,7 @@ static void *__text_poke(void *addr, con */ VM_BUG_ON(!ptep); - local_irq_save(flags); + local_irq_save_full(flags); pte = mk_pte(pages[0], pgprot); set_pte_at(poking_mm, poking_addr, ptep, pte); @@ -872,7 +874,7 @@ static void *__text_poke(void *addr, con */ BUG_ON(memcmp(addr, opcode, len)); - local_irq_restore(flags); + local_irq_restore_full(flags); pte_unmap_unlock(ptep, ptl); return addr; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/apic/apic.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/apic.c --- linux-5.15.26/arch/x86/kernel/apic/apic.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/apic.c 2022-03-10 09:47:50.000000000 +0100 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -274,10 +275,10 @@ void native_apic_icr_write(u32 low, u32 { unsigned long flags; - local_irq_save(flags); + flags = hard_local_irq_save(); apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); apic_write(APIC_ICR, low); - local_irq_restore(flags); + hard_local_irq_restore(flags); } u64 native_apic_icr_read(void) @@ -333,6 +334,9 @@ int lapic_get_maxlvt(void) static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { unsigned int lvtt_value, tmp_value; + unsigned long flags; + + flags = hard_cond_local_irq_save(); lvtt_value = LOCAL_TIMER_VECTOR; if (!oneshot) @@ -355,6 +359,8 @@ static void __setup_APIC_LVTT(unsigned i * According to Intel, MFENCE can do the serialization here. */ asm volatile("mfence" : : : "memory"); + hard_cond_local_irq_restore(flags); + printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } @@ -368,6 +374,8 @@ static void __setup_APIC_LVTT(unsigned i if (!oneshot) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); + + hard_cond_local_irq_restore(flags); } /* @@ -473,28 +481,34 @@ static int lapic_next_event(unsigned lon static int lapic_next_deadline(unsigned long delta, struct clock_event_device *evt) { + unsigned long flags; u64 tsc; /* This MSR is special and need a special fence: */ weak_wrmsr_fence(); + flags = hard_local_irq_save(); tsc = rdtsc(); wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); + hard_local_irq_restore(flags); return 0; } static int lapic_timer_shutdown(struct clock_event_device *evt) { + unsigned long flags; unsigned int v; /* Lapic used as dummy for broadcast ? */ if (evt->features & CLOCK_EVT_FEAT_DUMMY) return 0; + flags = hard_local_irq_save(); v = apic_read(APIC_LVTT); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, v); apic_write(APIC_TMICT, 0); + hard_local_irq_restore(flags); return 0; } @@ -529,6 +543,32 @@ static void lapic_timer_broadcast(const #endif } +static DEFINE_PER_CPU(struct clock_event_device, lapic_events); + +#ifdef CONFIG_IRQ_PIPELINE + +#define LAPIC_TIMER_IRQ apicm_vector_irq(LOCAL_TIMER_VECTOR) + +static irqreturn_t lapic_oob_handler(int irq, void *dev_id) +{ + struct clock_event_device *evt = this_cpu_ptr(&lapic_events); + + trace_local_timer_entry(LOCAL_TIMER_VECTOR); + clockevents_handle_event(evt); + trace_local_timer_exit(LOCAL_TIMER_VECTOR); + + return IRQ_HANDLED; +} + +static struct irqaction lapic_oob_action = { + .handler = lapic_oob_handler, + .name = "Out-of-band LAPIC timer interrupt", + .flags = IRQF_TIMER | IRQF_PERCPU, +}; + +#else +#define LAPIC_TIMER_IRQ -1 +#endif /* * The local apic timer can be used for any function which is CPU local. @@ -536,8 +576,8 @@ static void lapic_timer_broadcast(const static struct clock_event_device lapic_clockevent = { .name = "lapic", .features = CLOCK_EVT_FEAT_PERIODIC | - CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP - | CLOCK_EVT_FEAT_DUMMY, + CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP | + CLOCK_EVT_FEAT_PIPELINE | CLOCK_EVT_FEAT_DUMMY, .shift = 32, .set_state_shutdown = lapic_timer_shutdown, .set_state_periodic = lapic_timer_set_periodic, @@ -546,9 +586,8 @@ static struct clock_event_device lapic_c .set_next_event = lapic_next_event, .broadcast = lapic_timer_broadcast, .rating = 100, - .irq = -1, + .irq = LAPIC_TIMER_IRQ, }; -static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static const struct x86_cpu_id deadline_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */ @@ -1044,6 +1083,9 @@ void __init setup_boot_APIC_clock(void) /* Setup the lapic or request the broadcast */ setup_APIC_timer(); amd_e400_c1e_apic_setup(); +#ifdef CONFIG_IRQ_PIPELINE + setup_percpu_irq(LAPIC_TIMER_IRQ, &lapic_oob_action); +#endif } void setup_secondary_APIC_clock(void) @@ -1094,7 +1136,8 @@ static void local_apic_timer_interrupt(v * [ if a single-CPU system runs an SMP kernel then we call the local * interrupt as well. Thus we cannot inline the local irq ... ] */ -DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(LOCAL_TIMER_VECTOR, + sysvec_apic_timer_interrupt) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -1515,7 +1558,7 @@ static bool apic_check_and_ack(union api * per set bit. */ for_each_set_bit(bit, isr->map, APIC_IR_BITS) - ack_APIC_irq(); + __ack_APIC_irq(); return true; } @@ -2165,7 +2208,7 @@ static noinline void handle_spurious_int if (v & (1 << (vector & 0x1f))) { pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n", vector, smp_processor_id()); - ack_APIC_irq(); + __ack_APIC_irq(); } else { pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n", vector, smp_processor_id()); @@ -2183,18 +2226,23 @@ out: * trigger on an entry which is routed to the common_spurious idtentry * point. */ -DEFINE_IDTENTRY_IRQ(spurious_interrupt) +DEFINE_IDTENTRY_IRQ_PIPELINED(spurious_interrupt) { handle_spurious_interrupt(vector); } -DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(SPURIOUS_APIC_VECTOR, + sysvec_spurious_apic_interrupt) { handle_spurious_interrupt(SPURIOUS_APIC_VECTOR); } /* * This interrupt should never happen with our APIC/SMP architecture + * + * irq_pipeline: same as spurious_interrupt, would run directly out of + * the IDT, no deferral via the interrupt log which means that only + * the hardware IRQ state is considered for masking. */ DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt) { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/apic/apic_flat_64.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/apic_flat_64.c --- linux-5.15.26/arch/x86/kernel/apic/apic_flat_64.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/apic_flat_64.c 2022-03-10 09:47:50.000000000 +0100 @@ -52,9 +52,9 @@ static void _flat_send_IPI_mask(unsigned { unsigned long flags; - local_irq_save(flags); + flags = hard_local_irq_save(); __default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); - local_irq_restore(flags); + hard_local_irq_restore(flags); } static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/apic/apic_numachip.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/apic_numachip.c --- linux-5.15.26/arch/x86/kernel/apic/apic_numachip.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/apic_numachip.c 2022-03-10 09:47:50.000000000 +0100 @@ -103,10 +103,10 @@ static void numachip_send_IPI_one(int cp if (!((apicid ^ local_apicid) >> NUMACHIP_LAPIC_BITS)) { unsigned long flags; - local_irq_save(flags); + flags = hard_local_irq_save(); __default_send_IPI_dest_field(apicid, vector, APIC_DEST_PHYSICAL); - local_irq_restore(flags); + hard_local_irq_restore(flags); preempt_enable(); return; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/apic/io_apic.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/io_apic.c --- linux-5.15.26/arch/x86/kernel/apic/io_apic.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/io_apic.c 2022-03-10 09:47:50.000000000 +0100 @@ -78,7 +78,7 @@ #define for_each_irq_pin(entry, head) \ list_for_each_entry(entry, &head, list) -static DEFINE_RAW_SPINLOCK(ioapic_lock); +static DEFINE_HARD_SPINLOCK(ioapic_lock); static DEFINE_MUTEX(ioapic_mutex); static unsigned int ioapic_dynirq_base; static int ioapic_initialized; @@ -1634,7 +1634,7 @@ static int __init timer_irq_works(void) if (no_timer_check) return 1; - local_irq_enable(); + local_irq_enable_full(); if (boot_cpu_has(X86_FEATURE_TSC)) delay_with_tsc(); else @@ -1648,7 +1648,7 @@ static int __init timer_irq_works(void) * least one tick may be lost due to delays. */ - local_irq_disable(); + local_irq_disable_full(); /* Did jiffies advance? */ return time_after(jiffies, t1 + 4); @@ -1719,14 +1719,56 @@ static bool io_apic_level_ack_pending(st return false; } +static inline void do_prepare_move(struct irq_data *data) +{ + if (!irqd_irq_masked(data)) + mask_ioapic_irq(data); +} + +#ifdef CONFIG_IRQ_PIPELINE + +static inline void ioapic_finish_move(struct irq_data *data, bool moveit); + +static void ioapic_deferred_irq_move(struct irq_work *work) +{ + struct irq_data *data; + struct irq_desc *desc; + unsigned long flags; + + data = container_of(work, struct irq_data, move_work); + desc = irq_data_to_desc(data); + raw_spin_lock_irqsave(&desc->lock, flags); + do_prepare_move(data); + ioapic_finish_move(data, true); + raw_spin_unlock_irqrestore(&desc->lock, flags); +} + +static inline bool __ioapic_prepare_move(struct irq_data *data) +{ + init_irq_work(&data->move_work, ioapic_deferred_irq_move); + irq_work_queue(&data->move_work); + + return false; /* Postpone ioapic_finish_move(). */ +} + +#else /* !CONFIG_IRQ_PIPELINE */ + +static inline bool __ioapic_prepare_move(struct irq_data *data) +{ + do_prepare_move(data); + + return true; +} + +#endif + static inline bool ioapic_prepare_move(struct irq_data *data) { /* If we are moving the IRQ we need to mask it */ - if (unlikely(irqd_is_setaffinity_pending(data))) { - if (!irqd_irq_masked(data)) - mask_ioapic_irq(data); - return true; - } + if (irqd_is_setaffinity_pending(data) && + !irqd_is_setaffinity_blocked(data)) + return __ioapic_prepare_move(data); + return false; } @@ -1825,7 +1867,7 @@ static void ioapic_ack_level(struct irq_ * We must acknowledge the irq before we move it or the acknowledge will * not propagate properly. */ - ack_APIC_irq(); + __ack_APIC_irq(); /* * Tail end of clearing remote IRR bit (either by delivering the EOI @@ -1987,7 +2029,8 @@ static struct irq_chip ioapic_chip __rea .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_get_irqchip_state = ioapic_irq_get_chip_state, .flags = IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_AFFINITY_PRE_STARTUP, + IRQCHIP_AFFINITY_PRE_STARTUP | + IRQCHIP_PIPELINE_SAFE, }; static struct irq_chip ioapic_ir_chip __read_mostly = { @@ -2001,7 +2044,8 @@ static struct irq_chip ioapic_ir_chip __ .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_get_irqchip_state = ioapic_irq_get_chip_state, .flags = IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_AFFINITY_PRE_STARTUP, + IRQCHIP_AFFINITY_PRE_STARTUP | + IRQCHIP_PIPELINE_SAFE, }; static inline void init_IO_APIC_traps(void) @@ -2048,7 +2092,7 @@ static void unmask_lapic_irq(struct irq_ static void ack_lapic_irq(struct irq_data *data) { - ack_APIC_irq(); + __ack_APIC_irq(); } static struct irq_chip lapic_chip __read_mostly = { @@ -2056,6 +2100,7 @@ static struct irq_chip lapic_chip __read .irq_mask = mask_lapic_irq, .irq_unmask = unmask_lapic_irq, .irq_ack = ack_lapic_irq, + .flags = IRQCHIP_PIPELINE_SAFE, }; static void lapic_register_intr(int irq) @@ -2175,7 +2220,7 @@ static inline void __init check_timer(vo if (!global_clock_event) return; - local_irq_disable(); + local_irq_disable_full(); /* * get/set the timer IRQ vector: @@ -2308,7 +2353,7 @@ static inline void __init check_timer(vo panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " "report. Then try booting with the 'noapic' option.\n"); out: - local_irq_enable(); + local_irq_enable_full(); } /* @@ -3037,10 +3082,10 @@ int mp_irqdomain_alloc(struct irq_domain mp_preconfigure_entry(data); mp_register_handler(virq, data->is_level); - local_irq_save(flags); + local_irq_save_full(flags); if (virq < nr_legacy_irqs()) legacy_pic->mask(virq); - local_irq_restore(flags); + local_irq_restore_full(flags); apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: Preconfigured routing entry (%d-%d -> IRQ %d Level:%i ActiveLow:%i)\n", diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/apic/ipi.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/ipi.c --- linux-5.15.26/arch/x86/kernel/apic/ipi.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/ipi.c 2022-03-10 09:47:50.000000000 +0100 @@ -117,8 +117,10 @@ void __default_send_IPI_shortcut(unsigne * cli/sti. Otherwise we use an even cheaper single atomic write * to the APIC. */ + unsigned long flags; unsigned int cfg; + flags = hard_cond_local_irq_save(); /* * Wait for idle. */ @@ -137,6 +139,8 @@ void __default_send_IPI_shortcut(unsigne * Send the IPI. The write to APIC_ICR fires this off. */ native_apic_mem_write(APIC_ICR, cfg); + + hard_cond_local_irq_restore(flags); } /* @@ -145,8 +149,10 @@ void __default_send_IPI_shortcut(unsigne */ void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) { + unsigned long flags; unsigned long cfg; + flags = hard_cond_local_irq_save(); /* * Wait for idle. */ @@ -170,16 +176,18 @@ void __default_send_IPI_dest_field(unsig * Send the IPI. The write to APIC_ICR fires this off. */ native_apic_mem_write(APIC_ICR, cfg); + + hard_cond_local_irq_restore(flags); } void default_send_IPI_single_phys(int cpu, int vector) { unsigned long flags; - local_irq_save(flags); + flags = hard_local_irq_save(); __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, cpu), vector, APIC_DEST_PHYSICAL); - local_irq_restore(flags); + hard_local_irq_restore(flags); } void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) @@ -192,12 +200,12 @@ void default_send_IPI_mask_sequence_phys * to an arbitrary mask, so I do a unicast to each CPU instead. * - mbligh */ - local_irq_save(flags); + flags = hard_local_irq_save(); for_each_cpu(query_cpu, mask) { __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } - local_irq_restore(flags); + hard_local_irq_restore(flags); } void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, @@ -209,14 +217,14 @@ void default_send_IPI_mask_allbutself_ph /* See Hack comment above */ - local_irq_save(flags); + flags = hard_local_irq_save(); for_each_cpu(query_cpu, mask) { if (query_cpu == this_cpu) continue; __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } - local_irq_restore(flags); + hard_local_irq_restore(flags); } /* @@ -256,12 +264,12 @@ void default_send_IPI_mask_sequence_logi * should be modified to do 1 message per cluster ID - mbligh */ - local_irq_save(flags); + flags = hard_local_irq_save(); for_each_cpu(query_cpu, mask) __default_send_IPI_dest_field( early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), vector, APIC_DEST_LOGICAL); - local_irq_restore(flags); + hard_local_irq_restore(flags); } void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, @@ -273,7 +281,7 @@ void default_send_IPI_mask_allbutself_lo /* See Hack comment above */ - local_irq_save(flags); + flags = hard_local_irq_save(); for_each_cpu(query_cpu, mask) { if (query_cpu == this_cpu) continue; @@ -281,7 +289,7 @@ void default_send_IPI_mask_allbutself_lo early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), vector, APIC_DEST_LOGICAL); } - local_irq_restore(flags); + hard_local_irq_restore(flags); } /* @@ -295,10 +303,10 @@ void default_send_IPI_mask_logical(const if (!mask) return; - local_irq_save(flags); + flags = hard_local_irq_save(); WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); __default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); - local_irq_restore(flags); + hard_local_irq_restore(flags); } /* must come after the send_IPI functions above for inlining */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/apic/msi.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/msi.c --- linux-5.15.26/arch/x86/kernel/apic/msi.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/msi.c 2022-03-10 09:47:50.000000000 +0100 @@ -153,7 +153,8 @@ static struct irq_chip pci_msi_controlle .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_affinity = msi_set_affinity, .flags = IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_AFFINITY_PRE_STARTUP, + IRQCHIP_AFFINITY_PRE_STARTUP | + IRQCHIP_PIPELINE_SAFE, }; int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, @@ -223,7 +224,8 @@ static struct irq_chip pci_msi_ir_contro .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_AFFINITY_PRE_STARTUP, + IRQCHIP_AFFINITY_PRE_STARTUP | + IRQCHIP_PIPELINE_SAFE, }; static struct msi_domain_info pci_msi_ir_domain_info = { @@ -278,7 +280,8 @@ static struct irq_chip dmar_msi_controll .irq_compose_msi_msg = dmar_msi_compose_msg, .irq_write_msi_msg = dmar_msi_write_msg, .flags = IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_AFFINITY_PRE_STARTUP, + IRQCHIP_AFFINITY_PRE_STARTUP | + IRQCHIP_PIPELINE_SAFE, }; static int dmar_msi_init(struct irq_domain *domain, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/apic/vector.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/vector.c --- linux-5.15.26/arch/x86/kernel/apic/vector.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/vector.c 2022-03-10 09:47:50.000000000 +0100 @@ -39,7 +39,7 @@ struct apic_chip_data { struct irq_domain *x86_vector_domain; EXPORT_SYMBOL_GPL(x86_vector_domain); -static DEFINE_RAW_SPINLOCK(vector_lock); +static DEFINE_HARD_SPINLOCK(vector_lock); static cpumask_var_t vector_searchmask; static struct irq_chip lapic_controller; static struct irq_matrix *vector_matrix; @@ -813,6 +813,10 @@ static struct irq_desc *__setup_vector_i { int isairq = vector - ISA_IRQ_VECTOR(0); + /* Copy the cleanup vector if irqs are pipelined. */ + if (IS_ENABLED(CONFIG_IRQ_PIPELINE) && + vector == IRQ_MOVE_CLEANUP_VECTOR) + return irq_to_desc(IRQ_MOVE_CLEANUP_VECTOR); /* 1:1 mapping */ /* Check whether the irq is in the legacy space */ if (isairq < 0 || isairq >= nr_legacy_irqs()) return VECTOR_UNUSED; @@ -847,9 +851,11 @@ void lapic_online(void) void lapic_offline(void) { - lock_vector_lock(); + unsigned long flags; + + raw_spin_lock_irqsave(&vector_lock, flags); irq_matrix_offline(vector_matrix); - unlock_vector_lock(); + raw_spin_unlock_irqrestore(&vector_lock, flags); } static int apic_set_affinity(struct irq_data *irqd, @@ -857,6 +863,8 @@ static int apic_set_affinity(struct irq_ { int err; + WARN_ON_ONCE(irqs_pipelined() && !hard_irqs_disabled()); + if (WARN_ON_ONCE(!irqd_is_activated(irqd))) return -EIO; @@ -886,10 +894,44 @@ static int apic_retrigger_irq(struct irq return 1; } -void apic_ack_irq(struct irq_data *irqd) +#if defined(CONFIG_IRQ_PIPELINE) && \ + defined(CONFIG_GENERIC_PENDING_IRQ) + +static void apic_deferred_irq_move(struct irq_work *work) +{ + struct irq_data *irqd; + struct irq_desc *desc; + unsigned long flags; + + irqd = container_of(work, struct irq_data, move_work); + desc = irq_data_to_desc(irqd); + raw_spin_lock_irqsave(&desc->lock, flags); + __irq_move_irq(irqd); + raw_spin_unlock_irqrestore(&desc->lock, flags); +} + +static inline void apic_move_irq(struct irq_data *irqd) +{ + if (irqd_is_setaffinity_pending(irqd) && + !irqd_is_setaffinity_blocked(irqd)) { + init_irq_work(&irqd->move_work, apic_deferred_irq_move); + irq_work_queue(&irqd->move_work); + } +} + +#else + +static inline void apic_move_irq(struct irq_data *irqd) { irq_move_irq(irqd); - ack_APIC_irq(); +} + +#endif + +void apic_ack_irq(struct irq_data *irqd) +{ + apic_move_irq(irqd); + __ack_APIC_irq(); } void apic_ack_edge(struct irq_data *irqd) @@ -938,15 +980,17 @@ static void free_moved_vector(struct api apicd->move_in_progress = 0; } -DEFINE_IDTENTRY_SYSVEC(sysvec_irq_move_cleanup) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(IRQ_MOVE_CLEANUP_VECTOR, + sysvec_irq_move_cleanup) { struct hlist_head *clhead = this_cpu_ptr(&cleanup_list); struct apic_chip_data *apicd; struct hlist_node *tmp; + unsigned long flags; ack_APIC_irq(); /* Prevent vectors vanishing under us */ - raw_spin_lock(&vector_lock); + raw_spin_lock_irqsave(&vector_lock, flags); hlist_for_each_entry_safe(apicd, tmp, clhead, clist) { unsigned int irr, vector = apicd->prev_vector; @@ -968,14 +1012,15 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_irq_move_c free_moved_vector(apicd); } - raw_spin_unlock(&vector_lock); + raw_spin_unlock_irqrestore(&vector_lock, flags); } static void __send_cleanup_vector(struct apic_chip_data *apicd) { + unsigned long flags; unsigned int cpu; - raw_spin_lock(&vector_lock); + raw_spin_lock_irqsave(&vector_lock, flags); apicd->move_in_progress = 0; cpu = apicd->prev_cpu; if (cpu_online(cpu)) { @@ -984,7 +1029,7 @@ static void __send_cleanup_vector(struct } else { apicd->prev_vector = 0; } - raw_spin_unlock(&vector_lock); + raw_spin_unlock_irqrestore(&vector_lock, flags); } void send_cleanup_vector(struct irq_cfg *cfg) @@ -1023,6 +1068,8 @@ void irq_force_complete_move(struct irq_ struct irq_data *irqd; unsigned int vector; + WARN_ON_ONCE(irqs_pipelined() && !hard_irqs_disabled()); + /* * The function is called for all descriptors regardless of which * irqdomain they belong to. For example if an IRQ is provided by @@ -1113,9 +1160,10 @@ unlock: int lapic_can_unplug_cpu(void) { unsigned int rsvd, avl, tomove, cpu = smp_processor_id(); + unsigned long flags; int ret = 0; - raw_spin_lock(&vector_lock); + raw_spin_lock_irqsave(&vector_lock, flags); tomove = irq_matrix_allocated(vector_matrix); avl = irq_matrix_available(vector_matrix, true); if (avl < tomove) { @@ -1130,7 +1178,7 @@ int lapic_can_unplug_cpu(void) rsvd, avl); } out: - raw_spin_unlock(&vector_lock); + raw_spin_unlock_irqrestore(&vector_lock, flags); return ret; } #endif /* HOTPLUG_CPU */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/apic/x2apic_cluster.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/x2apic_cluster.c --- linux-5.15.26/arch/x86/kernel/apic/x2apic_cluster.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/x2apic_cluster.c 2022-03-10 09:47:50.000000000 +0100 @@ -44,7 +44,7 @@ __x2apic_send_IPI_mask(const struct cpum /* x2apic MSRs are special and need a special fence: */ weak_wrmsr_fence(); - local_irq_save(flags); + flags = hard_local_irq_save(); tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask); cpumask_copy(tmpmsk, mask); @@ -68,7 +68,7 @@ __x2apic_send_IPI_mask(const struct cpum cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask); } - local_irq_restore(flags); + hard_local_irq_restore(flags); } static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/apic/x2apic_phys.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/x2apic_phys.c --- linux-5.15.26/arch/x86/kernel/apic/x2apic_phys.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/apic/x2apic_phys.c 2022-03-10 09:47:50.000000000 +0100 @@ -58,7 +58,7 @@ __x2apic_send_IPI_mask(const struct cpum /* x2apic MSRs are special and need a special fence: */ weak_wrmsr_fence(); - local_irq_save(flags); + flags = hard_local_irq_save(); this_cpu = smp_processor_id(); for_each_cpu(query_cpu, mask) { @@ -67,7 +67,7 @@ __x2apic_send_IPI_mask(const struct cpum __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } - local_irq_restore(flags); + hard_local_irq_restore(flags); } static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/asm-offsets.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/asm-offsets.c --- linux-5.15.26/arch/x86/kernel/asm-offsets.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/asm-offsets.c 2022-03-10 09:47:50.000000000 +0100 @@ -38,6 +38,9 @@ static void __used common(void) #endif BLANK(); +#ifdef CONFIG_IRQ_PIPELINE + DEFINE(OOB_stage_mask, STAGE_MASK); +#endif OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); BLANK(); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/cpu/acrn.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/cpu/acrn.c --- linux-5.15.26/arch/x86/kernel/cpu/acrn.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/cpu/acrn.c 2022-03-10 09:47:50.000000000 +0100 @@ -37,7 +37,8 @@ static bool acrn_x2apic_available(void) static void (*acrn_intr_handler)(void); -DEFINE_IDTENTRY_SYSVEC(sysvec_acrn_hv_callback) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(HYPERVISOR_CALLBACK_VECTOR, + sysvec_acrn_hv_callback) { struct pt_regs *old_regs = set_irq_regs(regs); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/cpu/mce/amd.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/cpu/mce/amd.c --- linux-5.15.26/arch/x86/kernel/cpu/mce/amd.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/cpu/mce/amd.c 2022-03-10 09:47:50.000000000 +0100 @@ -934,13 +934,18 @@ static void __log_error(unsigned int ban mce_log(&m); } +/* + * irq_pipeline: Deferred error events have NMI semantics wrt to + * pipelining, they can and should be handled immediately out of the + * IDT. + */ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error) { trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); inc_irq_stat(irq_deferred_error_count); deferred_error_int_vector(); trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR); - ack_APIC_irq(); + __ack_APIC_irq(); } /* diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/cpu/mce/core.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/cpu/mce/core.c --- linux-5.15.26/arch/x86/kernel/cpu/mce/core.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/cpu/mce/core.c 2022-03-10 09:47:50.000000000 +0100 @@ -1484,7 +1484,9 @@ noinstr void do_machine_check(struct pt_ /* If this triggers there is no way to recover. Die hard. */ BUG_ON(!on_thread_stack() || !user_mode(regs)); + hard_local_irq_enable(); queue_task_work(&m, msg, kill_current_task); + hard_local_irq_disable(); } else { /* diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/cpu/mce/threshold.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/cpu/mce/threshold.c --- linux-5.15.26/arch/x86/kernel/cpu/mce/threshold.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/cpu/mce/threshold.c 2022-03-10 09:47:50.000000000 +0100 @@ -27,5 +27,5 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_threshold) inc_irq_stat(irq_threshold_count); mce_threshold_vector(); trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR); - ack_APIC_irq(); + __ack_APIC_irq(); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/cpu/mshyperv.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/cpu/mshyperv.c --- linux-5.15.26/arch/x86/kernel/cpu/mshyperv.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/cpu/mshyperv.c 2022-03-10 09:47:50.000000000 +0100 @@ -43,7 +43,8 @@ static void (*hv_stimer0_handler)(void); static void (*hv_kexec_handler)(void); static void (*hv_crash_handler)(struct pt_regs *regs); -DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(HYPERVISOR_CALLBACK_VECTOR, + sysvec_hyperv_callback) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -72,7 +73,8 @@ void hv_remove_vmbus_handler(void) * Routines to do per-architecture handling of stimer0 * interrupts when in Direct Mode */ -DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(HYPERV_STIMER0_VECTOR, + sysvec_hyperv_stimer0) { struct pt_regs *old_regs = set_irq_regs(regs); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/cpu/mtrr/generic.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/cpu/mtrr/generic.c --- linux-5.15.26/arch/x86/kernel/cpu/mtrr/generic.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/cpu/mtrr/generic.c 2022-03-10 09:47:50.000000000 +0100 @@ -449,13 +449,13 @@ void __init mtrr_bp_pat_init(void) { unsigned long flags; - local_irq_save(flags); + flags = hard_local_irq_save(); prepare_set(); pat_init(); post_set(); - local_irq_restore(flags); + hard_local_irq_restore(flags); } /* Grab all of the MTRR state for this CPU into *state */ @@ -796,7 +796,7 @@ static void generic_set_all(void) unsigned long mask, count; unsigned long flags; - local_irq_save(flags); + flags = hard_local_irq_save(); prepare_set(); /* Actually set the state */ @@ -806,7 +806,7 @@ static void generic_set_all(void) pat_init(); post_set(); - local_irq_restore(flags); + hard_local_irq_restore(flags); /* Use the atomic bitops to update the global mask */ for (count = 0; count < sizeof(mask) * 8; ++count) { @@ -835,7 +835,7 @@ static void generic_set_mtrr(unsigned in vr = &mtrr_state.var_ranges[reg]; - local_irq_save(flags); + flags = hard_local_irq_save(); prepare_set(); if (size == 0) { @@ -856,7 +856,7 @@ static void generic_set_mtrr(unsigned in } post_set(); - local_irq_restore(flags); + hard_local_irq_restore(flags); } int generic_validate_add_page(unsigned long base, unsigned long size, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/dumpstack.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/dumpstack.c --- linux-5.15.26/arch/x86/kernel/dumpstack.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/dumpstack.c 2022-03-10 09:47:50.000000000 +0100 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -335,7 +336,7 @@ unsigned long oops_begin(void) oops_enter(); /* racy, but better than risking deadlock. */ - raw_local_irq_save(flags); + flags = hard_local_irq_save(); cpu = smp_processor_id(); if (!arch_spin_trylock(&die_lock)) { if (cpu == die_owner) @@ -365,7 +366,7 @@ void oops_end(unsigned long flags, struc if (!die_nest_count) /* Nest count reaches zero, release the lock. */ arch_spin_unlock(&die_lock); - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); oops_exit(); /* Executive summary in case the oops scrolled away */ @@ -394,6 +395,8 @@ static void __die_header(const char *str { const char *pr = ""; + irq_pipeline_oops(); + /* Save the regs of the first oops for the executive summary later. */ if (!die_counter) exec_summary_regs = *regs; @@ -402,13 +405,14 @@ static void __die_header(const char *str pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT"; printk(KERN_DEFAULT - "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter, + "%s: %04lx [#%d]%s%s%s%s%s%s\n", str, err & 0xffff, ++die_counter, pr, IS_ENABLED(CONFIG_SMP) ? " SMP" : "", debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? - (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); + (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "", + irqs_pipelined() ? " IRQ_PIPELINE" : ""); } NOKPROBE_SYMBOL(__die_header); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/fpu/core.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/fpu/core.c --- linux-5.15.26/arch/x86/kernel/fpu/core.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/fpu/core.c 2022-03-10 09:47:50.000000000 +0100 @@ -15,6 +15,7 @@ #include #include +#include #define CREATE_TRACE_POINTS #include @@ -76,9 +77,10 @@ static bool interrupted_user_mode(void) */ bool irq_fpu_usable(void) { - return !in_interrupt() || - interrupted_user_mode() || - interrupted_kernel_fpu_idle(); + return running_inband() && + (!in_interrupt() || + interrupted_user_mode() || + interrupted_kernel_fpu_idle()); } EXPORT_SYMBOL(irq_fpu_usable); @@ -153,11 +155,15 @@ EXPORT_SYMBOL_GPL(__restore_fpregs_from_ void kernel_fpu_begin_mask(unsigned int kfpu_mask) { + unsigned long flags; + preempt_disable(); WARN_ON_FPU(!irq_fpu_usable()); WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); + flags = hard_cond_local_irq_save(); + this_cpu_write(in_kernel_fpu, true); if (!(current->flags & PF_KTHREAD) && @@ -173,6 +179,8 @@ void kernel_fpu_begin_mask(unsigned int if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU)) asm volatile ("fninit"); + + hard_cond_local_irq_restore(flags); } EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask); @@ -191,16 +199,18 @@ EXPORT_SYMBOL_GPL(kernel_fpu_end); */ void fpu_sync_fpstate(struct fpu *fpu) { + unsigned long flags; + WARN_ON_FPU(fpu != ¤t->thread.fpu); - fpregs_lock(); + flags = fpregs_lock(); trace_x86_fpu_before_save(fpu); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) save_fpregs_to_fpstate(fpu); trace_x86_fpu_after_save(fpu); - fpregs_unlock(); + fpregs_unlock(flags); } static inline void fpstate_init_xstate(struct xregs_state *xsave) @@ -252,6 +262,7 @@ int fpu_clone(struct task_struct *dst) { struct fpu *src_fpu = ¤t->thread.fpu; struct fpu *dst_fpu = &dst->thread.fpu; + unsigned long flags; /* The new task's FPU state cannot be valid in the hardware. */ dst_fpu->last_cpu = -1; @@ -270,13 +281,13 @@ int fpu_clone(struct task_struct *dst) * state. Otherwise save the FPU registers directly into the * child's FPU context, without any memory-to-memory copying. */ - fpregs_lock(); + flags = fpregs_lock(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size); else save_fpregs_to_fpstate(dst_fpu); - fpregs_unlock(); + fpregs_unlock(flags); set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); @@ -297,7 +308,9 @@ int fpu_clone(struct task_struct *dst) */ void fpu__drop(struct fpu *fpu) { - preempt_disable(); + unsigned long flags; + + flags = hard_preempt_disable(); if (fpu == ¤t->thread.fpu) { /* Ignore delayed exceptions from user space */ @@ -309,7 +322,7 @@ void fpu__drop(struct fpu *fpu) trace_x86_fpu_dropped(fpu); - preempt_enable(); + hard_preempt_enable(flags); } /* @@ -343,8 +356,9 @@ static inline unsigned int init_fpstate_ static void fpu_reset_fpstate(void) { struct fpu *fpu = ¤t->thread.fpu; + unsigned long flags; - fpregs_lock(); + flags = fpregs_lock(); fpu__drop(fpu); /* * This does not change the actual hardware registers. It just @@ -361,7 +375,7 @@ static void fpu_reset_fpstate(void) */ memcpy(&fpu->state, &init_fpstate, init_fpstate_copy_size()); set_thread_flag(TIF_NEED_FPU_LOAD); - fpregs_unlock(); + fpregs_unlock(flags); } /* @@ -371,12 +385,14 @@ static void fpu_reset_fpstate(void) */ void fpu__clear_user_states(struct fpu *fpu) { + unsigned long flags; + WARN_ON_FPU(fpu != ¤t->thread.fpu); - fpregs_lock(); + flags = fpregs_lock(); if (!cpu_feature_enabled(X86_FEATURE_FPU)) { fpu_reset_fpstate(); - fpregs_unlock(); + fpregs_unlock(flags); return; } @@ -400,7 +416,7 @@ void fpu__clear_user_states(struct fpu * * current's FPU is marked active. */ fpregs_mark_activate(); - fpregs_unlock(); + fpregs_unlock(flags); } void fpu_flush_thread(void) @@ -412,10 +428,14 @@ void fpu_flush_thread(void) */ void switch_fpu_return(void) { + unsigned long flags; + if (!static_cpu_has(X86_FEATURE_FPU)) return; + flags = hard_cond_local_irq_save(); fpregs_restore_userregs(); + hard_cond_local_irq_restore(flags); } EXPORT_SYMBOL_GPL(switch_fpu_return); @@ -515,3 +535,71 @@ int fpu__exception_code(struct fpu *fpu, */ return 0; } + +#ifdef CONFIG_DOVETAIL + +/* + * Holds the in-kernel fpu state when preempted by a task running on + * the out-of-band stage. + */ +static DEFINE_PER_CPU(struct fpu *, in_kernel_fpstate); + +static int fpu__init_kernel_fpstate(unsigned int cpu) +{ + struct fpu *fpu; + + fpu = kzalloc(sizeof(*fpu) + fpu_kernel_xstate_size, GFP_KERNEL); + if (fpu == NULL) + return -ENOMEM; + + this_cpu_write(in_kernel_fpstate, fpu); + fpstate_init(&fpu->state); + + return 0; +} + +static int fpu__drop_kernel_fpstate(unsigned int cpu) +{ + struct fpu *fpu = this_cpu_read(in_kernel_fpstate); + + kfree(fpu); + + return 0; +} + +void fpu__suspend_inband(void) +{ + struct fpu *kfpu = this_cpu_read(in_kernel_fpstate); + struct task_struct *tsk = current; + + if (kernel_fpu_disabled()) { + save_fpregs_to_fpstate(kfpu); + __cpu_invalidate_fpregs_state(); + oob_fpu_set_preempt(&tsk->thread.fpu); + } +} + +void fpu__resume_inband(void) +{ + struct fpu *kfpu = this_cpu_read(in_kernel_fpstate); + struct task_struct *tsk = current; + + if (oob_fpu_preempted(&tsk->thread.fpu)) { + restore_fpregs_from_fpstate(&kfpu->state); + __cpu_invalidate_fpregs_state(); + oob_fpu_clear_preempt(&tsk->thread.fpu); + } else if (!(tsk->flags & PF_KTHREAD) && + test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); +} + +static int __init fpu__init_dovetail(void) +{ + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "platform/x86/dovetail:online", + fpu__init_kernel_fpstate, fpu__drop_kernel_fpstate); + return 0; +} +core_initcall(fpu__init_dovetail); + +#endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/fpu/signal.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/fpu/signal.c --- linux-5.15.26/arch/x86/kernel/fpu/signal.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/fpu/signal.c 2022-03-10 09:47:50.000000000 +0100 @@ -66,15 +66,17 @@ setfx: */ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) { + unsigned long flags; + if (use_fxsr()) { struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; struct user_i387_ia32_struct env; struct _fpstate_32 __user *fp = buf; - fpregs_lock(); + flags = fpregs_lock(); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) fxsave(&tsk->thread.fpu.state.fxsave); - fpregs_unlock(); + fpregs_unlock(flags); convert_from_fxsr(&env, tsk); @@ -174,6 +176,7 @@ int copy_fpstate_to_sigframe(void __user { struct task_struct *tsk = current; int ia32_fxstate = (buf != buf_fx); + unsigned long flags; int ret; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || @@ -195,14 +198,14 @@ retry: * userland's stack frame which will likely succeed. If it does not, * resolve the fault in the user memory and try again. */ - fpregs_lock(); + flags = fpregs_lock(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) fpregs_restore_userregs(); pagefault_disable(); ret = copy_fpregs_to_sigframe(buf_fx); pagefault_enable(); - fpregs_unlock(); + fpregs_unlock(flags); if (ret) { if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size)) @@ -250,10 +253,11 @@ static int restore_fpregs_from_user(void bool fx_only, unsigned int size) { struct fpu *fpu = ¤t->thread.fpu; + unsigned long flags; int ret; retry: - fpregs_lock(); + flags = fpregs_lock(); pagefault_disable(); ret = __restore_fpregs_from_user(buf, xrestore, fx_only); pagefault_enable(); @@ -272,7 +276,7 @@ retry: */ if (test_thread_flag(TIF_NEED_FPU_LOAD)) __cpu_invalidate_fpregs_state(); - fpregs_unlock(); + fpregs_unlock(flags); /* Try to handle #PF, but anything else is fatal. */ if (ret != -EFAULT) @@ -297,7 +301,7 @@ retry: os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor()); fpregs_mark_activate(); - fpregs_unlock(); + fpregs_unlock(flags); return 0; } @@ -310,6 +314,7 @@ static int __fpu_restore_sig(void __user struct user_i387_ia32_struct env; u64 user_xfeatures = 0; bool fx_only = false; + unsigned long flags; int ret; if (use_xsave()) { @@ -352,7 +357,7 @@ static int __fpu_restore_sig(void __user * to be loaded again on return to userland (overriding last_cpu avoids * the optimisation). */ - fpregs_lock(); + flags = fpregs_lock(); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { /* * If supervisor states are available then save the @@ -368,7 +373,7 @@ static int __fpu_restore_sig(void __user } __fpu_invalidate_fpregs_state(fpu); __cpu_invalidate_fpregs_state(); - fpregs_unlock(); + fpregs_unlock(flags); if (use_xsave() && !fx_only) { ret = copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx); @@ -396,7 +401,7 @@ static int __fpu_restore_sig(void __user /* Fold the legacy FP storage */ convert_to_fxsr(&fpu->state.fxsave, &env); - fpregs_lock(); + flags = fpregs_lock(); if (use_xsave()) { /* * Remove all UABI feature bits not set in user_xfeatures @@ -418,7 +423,7 @@ static int __fpu_restore_sig(void __user if (likely(!ret)) fpregs_mark_activate(); - fpregs_unlock(); + fpregs_unlock(flags); return ret; } static inline int xstate_sigframe_size(void) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/fpu/xstate.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/fpu/xstate.c --- linux-5.15.26/arch/x86/kernel/fpu/xstate.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/fpu/xstate.c 2022-03-10 09:47:50.000000000 +0100 @@ -916,6 +916,7 @@ int arch_set_user_pkey_access(struct tas unsigned long init_val) { u32 old_pkru, new_pkru_bits = 0; + unsigned long flags; int pkey_shift; /* @@ -943,6 +944,8 @@ int arch_set_user_pkey_access(struct tas pkey_shift = pkey * PKRU_BITS_PER_PKEY; new_pkru_bits <<= pkey_shift; + flags = hard_cond_local_irq_save(); + /* Get old PKRU and mask off any old bits in place: */ old_pkru = read_pkru(); old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); @@ -950,6 +953,8 @@ int arch_set_user_pkey_access(struct tas /* Write old part along with new part: */ write_pkru(old_pkru | new_pkru_bits); + hard_cond_local_irq_restore(flags); + return 0; } #endif /* ! CONFIG_ARCH_HAS_PKEYS */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/hpet.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/hpet.c --- linux-5.15.26/arch/x86/kernel/hpet.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/hpet.c 2022-03-10 09:47:50.000000000 +0100 @@ -407,7 +407,7 @@ static void hpet_init_clockevent(struct evt->set_next_event = hpet_clkevt_set_next_event; evt->set_state_shutdown = hpet_clkevt_set_state_shutdown; - evt->features = CLOCK_EVT_FEAT_ONESHOT; + evt->features = CLOCK_EVT_FEAT_ONESHOT|CLOCK_EVT_FEAT_PIPELINE; if (hc->boot_cfg & HPET_TN_PERIODIC) { evt->features |= CLOCK_EVT_FEAT_PERIODIC; evt->set_state_periodic = hpet_clkevt_set_state_periodic; @@ -509,7 +509,9 @@ static struct irq_chip hpet_msi_controll .irq_set_affinity = msi_domain_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_write_msi_msg = hpet_msi_write_msg, - .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_AFFINITY_PRE_STARTUP, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP | + IRQCHIP_PIPELINE_SAFE, }; static int hpet_msi_init(struct irq_domain *domain, @@ -629,7 +631,7 @@ static irqreturn_t hpet_msi_interrupt_ha return IRQ_HANDLED; } - evt->event_handler(evt); + clockevents_handle_event(evt); return IRQ_HANDLED; } @@ -812,7 +814,7 @@ static u64 read_hpet(struct clocksource if (arch_spin_is_locked(&old.lock)) goto contended; - local_irq_save(flags); + flags = hard_local_irq_save(); if (arch_spin_trylock(&hpet.lock)) { new.value = hpet_readl(HPET_COUNTER); /* @@ -820,10 +822,10 @@ static u64 read_hpet(struct clocksource */ WRITE_ONCE(hpet.value, new.value); arch_spin_unlock(&hpet.lock); - local_irq_restore(flags); + hard_local_irq_restore(flags); return (u64)new.value; } - local_irq_restore(flags); + hard_local_irq_restore(flags); contended: /* diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/i8259.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/i8259.c --- linux-5.15.26/arch/x86/kernel/i8259.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/i8259.c 2022-03-10 09:47:50.000000000 +0100 @@ -33,7 +33,7 @@ static void init_8259A(int auto_eoi); static int i8259A_auto_eoi; -DEFINE_RAW_SPINLOCK(i8259A_lock); +DEFINE_HARD_SPINLOCK(i8259A_lock); /* * 8259A PIC functions to handle ISA devices: @@ -227,6 +227,7 @@ struct irq_chip i8259A_chip = { .irq_disable = disable_8259A_irq, .irq_unmask = enable_8259A_irq, .irq_mask_ack = mask_and_ack_8259A, + .flags = IRQCHIP_PIPELINE_SAFE, }; static char irq_trigger[2]; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/idt.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/idt.c --- linux-5.15.26/arch/x86/kernel/idt.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/idt.c 2022-03-10 09:47:50.000000000 +0100 @@ -125,6 +125,10 @@ static const __initconst struct idt_data INTG(CALL_FUNCTION_SINGLE_VECTOR, asm_sysvec_call_function_single), INTG(IRQ_MOVE_CLEANUP_VECTOR, asm_sysvec_irq_move_cleanup), INTG(REBOOT_VECTOR, asm_sysvec_reboot), +#ifdef CONFIG_IRQ_PIPELINE + INTG(RESCHEDULE_OOB_VECTOR, asm_sysvec_reschedule_oob_ipi), + INTG(TIMER_OOB_VECTOR, asm_sysvec_timer_oob_ipi), +#endif #endif #ifdef CONFIG_X86_THERMAL_VECTOR diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/irq.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/irq.c --- linux-5.15.26/arch/x86/kernel/irq.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/irq.c 2022-03-10 09:47:50.000000000 +0100 @@ -4,6 +4,7 @@ */ #include #include +#include #include #include #include @@ -49,7 +50,7 @@ void ack_bad_irq(unsigned int irq) * completely. * But only ack when the APIC is enabled -AK */ - ack_APIC_irq(); + __ack_APIC_irq(); } #define irq_stats(x) (&per_cpu(irq_stat, x)) @@ -236,8 +237,11 @@ static __always_inline void handle_irq(s /* * common_interrupt() handles all normal device IRQ's (the special SMP * cross-CPU interrupts have their own entry points). + * + * Compiled out if CONFIG_IRQ_PIPELINE is enabled, replaced by + * arch_handle_irq(). */ -DEFINE_IDTENTRY_IRQ(common_interrupt) +DEFINE_IDTENTRY_IRQ_PIPELINED(common_interrupt) { struct pt_regs *old_regs = set_irq_regs(regs); struct irq_desc *desc; @@ -269,7 +273,8 @@ void (*x86_platform_ipi_callback)(void) /* * Handler for X86_PLATFORM_IPI_VECTOR. */ -DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(X86_PLATFORM_IPI_VECTOR, + sysvec_x86_platform_ipi) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -301,7 +306,8 @@ EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wa /* * Handler for POSTED_INTERRUPT_VECTOR. */ -DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi) +DEFINE_IDTENTRY_SYSVEC_SIMPLE_PIPELINED(POSTED_INTR_VECTOR, + sysvec_kvm_posted_intr_ipi) { ack_APIC_irq(); inc_irq_stat(kvm_posted_intr_ipis); @@ -310,7 +316,8 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm /* * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. */ -DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(POSTED_INTR_WAKEUP_VECTOR, + sysvec_kvm_posted_intr_wakeup_ipi) { ack_APIC_irq(); inc_irq_stat(kvm_posted_intr_wakeup_ipis); @@ -320,7 +327,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted /* * Handler for POSTED_INTERRUPT_NESTED_VECTOR. */ -DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi) +DEFINE_IDTENTRY_SYSVEC_SIMPLE_PIPELINED(POSTED_INTR_NESTED_VECTOR, + sysvec_kvm_posted_intr_nested_ipi) { ack_APIC_irq(); inc_irq_stat(kvm_posted_intr_nested_ipis); @@ -394,6 +402,6 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) inc_irq_stat(irq_thermal_count); smp_thermal_vector(); trace_thermal_apic_exit(THERMAL_APIC_VECTOR); - ack_APIC_irq(); + __ack_APIC_irq(); } #endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/irq_pipeline.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/irq_pipeline.c --- linux-5.15.26/arch/x86/kernel/irq_pipeline.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/irq_pipeline.c 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,422 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2019 Philippe Gerum . + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct irq_domain *sipic_domain; + +static void sipic_irq_noop(struct irq_data *data) { } + +static unsigned int sipic_irq_noop_ret(struct irq_data *data) +{ + return 0; +} + +static struct irq_chip sipic_chip = { + .name = "SIPIC", + .irq_startup = sipic_irq_noop_ret, + .irq_shutdown = sipic_irq_noop, + .irq_enable = sipic_irq_noop, + .irq_disable = sipic_irq_noop, + .flags = IRQCHIP_PIPELINE_SAFE | IRQCHIP_SKIP_SET_WAKE, +}; + +void handle_apic_irq(struct irq_desc *desc) +{ + if (WARN_ON_ONCE(irq_pipeline_debug() && !on_pipeline_entry())) + return; + + /* + * MCE events are non-maskable therefore their in-band + * handlers have to be oob-compatible by construction. Those + * handlers run immediately out of the IDT for this reason as + * well. We won't see them here since they are not routed via + * arch_handle_irq() -> generic_pipeline_irq_desc(). + * + * All we need to do at this stage is to acknowledge other + * APIC events, then pipeline the corresponding interrupt from + * our synthetic controller chip (SIPIC). + */ + __ack_APIC_irq(); + + handle_oob_irq(desc); +} + +void irq_send_oob_ipi(unsigned int ipi, + const struct cpumask *cpumask) +{ + apic->send_IPI_mask_allbutself(cpumask, apicm_irq_vector(ipi)); +} +EXPORT_SYMBOL_GPL(irq_send_oob_ipi); + +static irqentry_state_t pipeline_enter_rcu(void) +{ + irqentry_state_t state = { + .exit_rcu = false, + .stage_info = IRQENTRY_INBAND_UNSTALLED, + }; + + if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { + rcu_irq_enter(); + state.exit_rcu = true; + } else { + rcu_irq_enter_check_tick(); + } + + return state; +} + +static void pipeline_exit_rcu(irqentry_state_t state) +{ + if (state.exit_rcu) + rcu_irq_exit(); +} + +static void do_sysvec_inband(struct irq_desc *desc, struct pt_regs *regs) +{ + unsigned int irq = irq_desc_get_irq(desc); + int vector = apicm_irq_vector(irq); + + /* + * This code only sees pipelined sysvec events tagged with + * DEFINE_IDTENTRY_SYSVEC_PIPELINED: + * + * arch_handle_irq(irq) + * generic_pipeline_irq_desc(irq) + * handle_apic_irq(irq) + * handle_oob_irq(irq) + * [...irq_post_inband...] + * + * arch_do_IRQ_pipelined(desc) + * do_sysvec_inband(desc) + * + * | + * v + * sysvec_handler(regs) + * + * System vectors which are still tagged as + * DEFINE_IDTENTRY_SYSVEC/DEFINE_IDTENTRY_SYSVEC_SIMPLE are + * directly dispatched out of the IDT, assuming their handler + * is oob-safe (like NMI handlers) therefore never reach this + * in-band stage handler. + * + * NOTE: we expand run_sysvec_on_irqstack_cond() each time, + * which is ugly. But the irqstack code makes assumptions we + * don't want to break. + */ + + switch (vector) { +#ifdef CONFIG_SMP + case RESCHEDULE_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_reschedule_ipi, + regs); + break; + case CALL_FUNCTION_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_call_function, + regs); + break; + case CALL_FUNCTION_SINGLE_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_call_function_single, + regs); + break; + case REBOOT_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_reboot, regs); + break; +#endif + case X86_PLATFORM_IPI_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_x86_platform_ipi, + regs); + break; + case IRQ_WORK_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_irq_work, regs); + break; +#ifdef CONFIG_HAVE_KVM + case POSTED_INTR_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_kvm_posted_intr_ipi, + regs); + break; + case POSTED_INTR_WAKEUP_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_kvm_posted_intr_wakeup_ipi, + regs); + break; + case POSTED_INTR_NESTED_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_kvm_posted_intr_nested_ipi, + regs); + break; +#endif +#ifdef CONFIG_HYPERV + case HYPERVISOR_CALLBACK_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_hyperv_callback, + regs); + break; + case HYPERV_REENLIGHTENMENT_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_hyperv_reenlightenment, + regs); + break; + case HYPERV_STIMER0_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_hyperv_stimer0, + regs); + break; +#endif +#ifdef CONFIG_ACRN_GUEST + case HYPERVISOR_CALLBACK_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_acrn_hv_callback, + regs); + break; +#endif +#ifdef CONFIG_XEN_PVHVM + case HYPERVISOR_CALLBACK_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_xen_hvm_callback, + regs); + break; +#endif + case LOCAL_TIMER_VECTOR: + run_sysvec_on_irqstack_cond(__sysvec_apic_timer_interrupt, + regs); + break; + default: + printk_once(KERN_ERR "irq_pipeline: unexpected event" + " on vector #%.2x (irq=%u)", vector, irq); + return; + } +} + +static void do_irq_inband(struct pt_regs *regs, u32 irq) +{ + + struct irq_desc *desc = irq_to_desc(irq); + + desc->handle_irq(desc); +} + +void arch_do_IRQ_pipelined(struct irq_desc *desc) +{ + struct pt_regs *regs = raw_cpu_ptr(&irq_pipeline.tick_regs), *old_regs; + irqentry_state_t state; + + /* Emulate a kernel entry. */ + state = pipeline_enter_rcu(); + + if (desc->irq_data.domain == sipic_domain) { + do_sysvec_inband(desc, regs); + } else { + /* + * XXX: the following is ugly, but the irqstack + * switching code is not that flexible. However, we + * don't want to provide a separate implementation of + * the latter in the pipelined case, so let's get + * along with it. + */ + u32 vector = irq_desc_get_irq(desc); /* irq carried as 'vector' */ + old_regs = set_irq_regs(regs); + run_irq_on_irqstack_cond(do_irq_inband, regs, vector); + set_irq_regs(old_regs); + } + + pipeline_exit_rcu(state); +} + +void arch_handle_irq(struct pt_regs *regs, u8 vector, bool irq_movable) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; + unsigned int irq; + + if (vector >= FIRST_SYSTEM_VECTOR) { + irq = apicm_vector_irq(vector); + desc = irq_to_desc(irq); + } else { + desc = __this_cpu_read(vector_irq[vector]); + if (unlikely(IS_ERR_OR_NULL(desc))) { + __ack_APIC_irq(); + + if (desc == VECTOR_UNUSED) { + pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n", + __func__, smp_processor_id(), + vector); + } else { + __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + } + return; + } + if (irqd_is_setaffinity_pending(&desc->irq_data)) { + raw_spin_lock(&desc->lock); + if (irq_movable) + irqd_clr_move_blocked(&desc->irq_data); + else + irqd_set_move_blocked(&desc->irq_data); + raw_spin_unlock(&desc->lock); + } + } + + generic_pipeline_irq_desc(desc, regs); + + set_irq_regs(old_regs); +} + +noinstr void arch_pipeline_entry(struct pt_regs *regs, u8 vector) +{ + struct irq_stage_data *prevd; + irqentry_state_t state; + + /* + * The tricky one: we distinguish the following cases: + * + * [1] entry from oob context, either kernel or user code was + * preempted by the IRQ, the in-band (virtual) interrupt state + * is 'undefined' (could be either stalled/unstalled, it is + * not relevant). + * + * [2] entry from in-band context while the stage is stalled, + * which means that some kernel code was preempted by the IRQ + * since in-band user code cannot run with interrupts + * (virtually) disabled. + * + * [3] entry from in-band context while the stage is + * unstalled: the common case for IRQ entry. Kernel or user + * code may have been preempted, we handle the event + * identically. + * + * [1] and [2] are processed almost the same way, except for + * one key aspect: the potential stage demotion of the + * preempted task which originally entered [1] on the oob + * stage, then left it for the in-band stage as a result of + * handling the IRQ (such demotion normally happens during + * handle_irq_pipelined_finish() if required). In this + * particular case, we want to run the common IRQ epilogue + * code before returning to user mode, so that all pending + * in-band work (_TIF_WORK_*) is carried out for the task + * which is about to exit kernel mode. + * + * If the task runs in-band at the exit point and a user mode + * context was preempted, then case [2] is excluded by + * definition so we know for sure that we just observed a + * stage demotion, therefore we have to run the work loop by + * calling irqentry_exit_to_user_mode(). + */ + if (unlikely(running_oob() || irqs_disabled())) { + instrumentation_begin(); + prevd = handle_irq_pipelined_prepare(regs); + arch_handle_irq(regs, vector, false); + kvm_set_cpu_l1tf_flush_l1d(); + handle_irq_pipelined_finish(prevd, regs); + if (running_inband() && user_mode(regs)) { + stall_inband_nocheck(); + irqentry_exit_to_user_mode(regs); + } + instrumentation_end(); + return; + } + + /* In-band on entry, accepting interrupts. */ + state = irqentry_enter(regs); + instrumentation_begin(); + /* Prep for handling, switching oob. */ + prevd = handle_irq_pipelined_prepare(regs); + arch_handle_irq(regs, vector, true); + kvm_set_cpu_l1tf_flush_l1d(); + /* irqentry_enter() stalled the in-band stage. */ + trace_hardirqs_on(); + unstall_inband_nocheck(); + handle_irq_pipelined_finish(prevd, regs); + stall_inband_nocheck(); + trace_hardirqs_off(); + instrumentation_end(); + irqentry_exit(regs, state); +} + +static int sipic_irq_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_percpu_devid(irq); + irq_set_chip_and_handler(irq, &sipic_chip, handle_apic_irq); + + return 0; +} + +static struct irq_domain_ops sipic_domain_ops = { + .map = sipic_irq_map, +}; + +static void create_x86_apic_domain(void) +{ + sipic_domain = irq_domain_add_simple(NULL, NR_APIC_VECTORS, + FIRST_SYSTEM_IRQ, + &sipic_domain_ops, NULL); +} + +#ifdef CONFIG_SMP + +DEFINE_IDTENTRY_SYSVEC_PIPELINED(RESCHEDULE_OOB_VECTOR, + sysvec_reschedule_oob_ipi) +{ /* In-band handler is unused. */ } + +DEFINE_IDTENTRY_SYSVEC_PIPELINED(TIMER_OOB_VECTOR, + sysvec_timer_oob_ipi) +{ /* In-band handler is unused. */ } + +void handle_irq_move_cleanup(struct irq_desc *desc) +{ + if (on_pipeline_entry()) { + /* 1. on receipt from hardware. */ + __ack_APIC_irq(); + handle_oob_irq(desc); + } else { + /* 2. in-band delivery. */ + __sysvec_irq_move_cleanup(NULL); + } +} + +static void smp_setup(void) +{ + int irq; + + /* + * The IRQ cleanup event must be pipelined to the inband + * stage, so we need a valid IRQ descriptor for it. Since we + * still are in the early boot stage on CPU0, we ask for a 1:1 + * mapping between the vector number and IRQ number, to make + * things easier for us later on. + */ + irq = irq_alloc_desc_at(IRQ_MOVE_CLEANUP_VECTOR, 0); + WARN_ON(IRQ_MOVE_CLEANUP_VECTOR != irq); + /* + * Set up the vector_irq[] mapping array for the boot CPU, + * other CPUs will copy this entry when their APIC is going + * online (see lapic_online()). + */ + per_cpu(vector_irq, 0)[irq] = irq_to_desc(irq); + + irq_set_chip_and_handler(irq, &dummy_irq_chip, + handle_irq_move_cleanup); +} + +#else + +static void smp_setup(void) { } + +#endif + +void __init arch_irq_pipeline_init(void) +{ + /* + * Create an IRQ domain for mapping APIC system interrupts + * (in-band and out-of-band), with fixed sirq numbers starting + * from FIRST_SYSTEM_IRQ. Upon receipt of a system interrupt, + * the corresponding sirq is injected into the pipeline. + */ + create_x86_apic_domain(); + + smp_setup(); +} diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/irq_work.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/irq_work.c --- linux-5.15.26/arch/x86/kernel/irq_work.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/irq_work.c 2022-03-10 09:47:50.000000000 +0100 @@ -14,7 +14,8 @@ #include #ifdef CONFIG_X86_LOCAL_APIC -DEFINE_IDTENTRY_SYSVEC(sysvec_irq_work) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(IRQ_WORK_VECTOR, + sysvec_irq_work) { ack_APIC_irq(); trace_irq_work_entry(IRQ_WORK_VECTOR); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/kvm.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/kvm.c --- linux-5.15.26/arch/x86/kernel/kvm.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/kvm.c 2022-03-10 09:47:50.000000000 +0100 @@ -241,12 +241,15 @@ noinstr bool __kvm_handle_async_pf(struc { u32 flags = kvm_read_and_reset_apf_flags(); irqentry_state_t state; + unsigned long irqflags; if (!flags) return false; state = irqentry_enter(regs); + oob_trap_notify(X86_TRAP_PF, regs); instrumentation_begin(); + irqflags = hard_cond_local_irq_save(); /* * If the host managed to inject an async #PF into an interrupt @@ -265,7 +268,9 @@ noinstr bool __kvm_handle_async_pf(struc WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags); } + hard_cond_local_irq_restore(irqflags); instrumentation_end(); + oob_trap_unwind(X86_TRAP_PF, regs); irqentry_exit(regs, state); return true; } @@ -430,6 +435,9 @@ static void __init sev_map_percpu_data(v static void kvm_guest_cpu_offline(bool shutdown) { + unsigned long flags; + + flags = hard_cond_local_irq_save(); kvm_disable_steal_time(); if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) wrmsrl(MSR_KVM_PV_EOI_EN, 0); @@ -437,15 +445,16 @@ static void kvm_guest_cpu_offline(bool s if (!shutdown) apf_task_wake_all(); kvmclock_disable(); + hard_cond_local_irq_restore(flags); } static int kvm_cpu_online(unsigned int cpu) { unsigned long flags; - local_irq_save(flags); + local_irq_save_full(flags); kvm_guest_cpu_init(); - local_irq_restore(flags); + local_irq_restore_full(flags); return 0; } @@ -879,16 +888,20 @@ static void kvm_wait(u8 *ptr, u8 val) * in irq spinlock slowpath and no spurious interrupt occur to save us. */ if (irqs_disabled()) { + hard_local_irq_disable(); + if (READ_ONCE(*ptr) == val) halt(); + + hard_local_irq_enable(); } else { - local_irq_disable(); + local_irq_disable_full(); /* safe_halt() will enable IRQ */ if (READ_ONCE(*ptr) == val) safe_halt(); - else - local_irq_enable(); + + local_irq_enable_full(); } } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/Makefile linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/Makefile --- linux-5.15.26/arch/x86/kernel/Makefile 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/Makefile 2022-03-10 09:47:50.000000000 +0100 @@ -130,6 +130,7 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock. obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o obj-$(CONFIG_JAILHOUSE_GUEST) += jailhouse.o +obj-$(CONFIG_IRQ_PIPELINE) += irq_pipeline.o obj-$(CONFIG_EISA) += eisa.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/nmi.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/nmi.c --- linux-5.15.26/arch/x86/kernel/nmi.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/nmi.c 2022-03-10 09:47:50.000000000 +0100 @@ -473,6 +473,10 @@ static DEFINE_PER_CPU(enum nmi_states, n static DEFINE_PER_CPU(unsigned long, nmi_cr2); static DEFINE_PER_CPU(unsigned long, nmi_dr7); +/* + * IRQ pipeline: fixing up the virtual IRQ state makes no sense on + * NMI. + */ DEFINE_IDTENTRY_RAW(exc_nmi) { irqentry_state_t irq_state; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/process_64.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/process_64.c --- linux-5.15.26/arch/x86/kernel/process_64.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/process_64.c 2022-03-10 09:47:50.000000000 +0100 @@ -273,9 +273,9 @@ void current_save_fsgs(void) unsigned long flags; /* Interrupts need to be off for FSGSBASE */ - local_irq_save(flags); + local_irq_save_full(flags); save_fsgs(current); - local_irq_restore(flags); + local_irq_restore_full(flags); } #if IS_ENABLED(CONFIG_KVM) EXPORT_SYMBOL_GPL(current_save_fsgs); @@ -434,9 +434,9 @@ unsigned long x86_gsbase_read_cpu_inacti if (boot_cpu_has(X86_FEATURE_FSGSBASE)) { unsigned long flags; - local_irq_save(flags); + local_irq_save_full(flags); gsbase = __rdgsbase_inactive(); - local_irq_restore(flags); + local_irq_restore_full(flags); } else { rdmsrl(MSR_KERNEL_GS_BASE, gsbase); } @@ -449,9 +449,9 @@ void x86_gsbase_write_cpu_inactive(unsig if (boot_cpu_has(X86_FEATURE_FSGSBASE)) { unsigned long flags; - local_irq_save(flags); + local_irq_save_full(flags); __wrgsbase_inactive(gsbase); - local_irq_restore(flags); + local_irq_restore_full(flags); } else { wrmsrl(MSR_KERNEL_GS_BASE, gsbase); } @@ -562,8 +562,17 @@ __switch_to(struct task_struct *prev_p, struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); + /* + * Dovetail: Switching context on the out-of-band stage is + * legit, and we may have preempted an in-band (soft)irq + * handler earlier. Since oob handlers never switch stack, + * make sure to restrict the following test to in-band + * callers. + */ WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && - this_cpu_read(hardirq_stack_inuse)); + running_inband() && this_cpu_read(hardirq_stack_inuse)); + + WARN_ON_ONCE(dovetail_debug() && !hard_irqs_disabled()); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) switch_fpu_prepare(prev_fpu, cpu); @@ -745,6 +754,7 @@ static long prctl_map_vdso(const struct long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) { + unsigned long flags; int ret = 0; switch (option) { @@ -752,7 +762,7 @@ long do_arch_prctl_64(struct task_struct if (unlikely(arg2 >= TASK_SIZE_MAX)) return -EPERM; - preempt_disable(); + flags = hard_preempt_disable(); /* * ARCH_SET_GS has always overwritten the index * and the base. Zero is the most sensible value @@ -773,7 +783,7 @@ long do_arch_prctl_64(struct task_struct task->thread.gsindex = 0; x86_gsbase_write_task(task, arg2); } - preempt_enable(); + hard_preempt_enable(flags); break; } case ARCH_SET_FS: { @@ -784,7 +794,7 @@ long do_arch_prctl_64(struct task_struct if (unlikely(arg2 >= TASK_SIZE_MAX)) return -EPERM; - preempt_disable(); + flags = hard_preempt_disable(); /* * Set the selector to 0 for the same reason * as %gs above. @@ -802,7 +812,7 @@ long do_arch_prctl_64(struct task_struct task->thread.fsindex = 0; x86_fsbase_write_task(task, arg2); } - preempt_enable(); + hard_preempt_enable(flags); break; } case ARCH_GET_FS: { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/process.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/process.c --- linux-5.15.26/arch/x86/kernel/process.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/process.c 2022-03-10 09:47:50.000000000 +0100 @@ -609,9 +609,9 @@ void speculation_ctrl_update(unsigned lo unsigned long flags; /* Forced update. Make sure all relevant TIF flags are different */ - local_irq_save(flags); + flags = hard_local_irq_save(); __speculation_ctrl_update(~tif, tif); - local_irq_restore(flags); + hard_local_irq_restore(flags); } /* Called from seccomp/prctl update */ @@ -711,6 +711,9 @@ void arch_cpu_idle(void) /* * We use this if we don't have any better idle routine.. + * + * IRQ pipeline: safe_halt() returns with hard irqs on, caller does + * not need to force enable. */ void __cpuidle default_idle(void) { @@ -733,7 +736,7 @@ bool xen_set_default_idle(void) void stop_this_cpu(void *dummy) { - local_irq_disable(); + hard_local_irq_disable(); /* * Remove this CPU: */ @@ -829,11 +832,14 @@ static __cpuidle void mwait_idle(void) } __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (!need_resched()) + if (!need_resched()) { __sti_mwait(0, 0); - else + } else { + hard_cond_local_irq_enable(); raw_local_irq_enable(); + } } else { + hard_cond_local_irq_enable(); raw_local_irq_enable(); } __current_clr_polling(); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/smpboot.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/smpboot.c --- linux-5.15.26/arch/x86/kernel/smpboot.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/smpboot.c 2022-03-10 09:47:50.000000000 +0100 @@ -262,7 +262,7 @@ static void notrace start_secondary(void x86_platform.nmi_init(); /* enable local interrupts */ - local_irq_enable(); + local_irq_enable_full(); x86_cpuinit.setup_percpu_clockev(); @@ -1138,7 +1138,6 @@ int native_cpu_up(unsigned int cpu, stru { int apicid = apic->cpu_present_to_apicid(cpu); int cpu0_nmi_registered = 0; - unsigned long flags; int err, ret = 0; lockdep_assert_irqs_enabled(); @@ -1189,9 +1188,9 @@ int native_cpu_up(unsigned int cpu, stru * Check TSC synchronization with the AP (keep irqs disabled * while doing so): */ - local_irq_save(flags); + local_irq_disable_full(); check_tsc_sync_source(cpu); - local_irq_restore(flags); + local_irq_enable_full(); while (!cpu_online(cpu)) { cpu_relax(); @@ -1664,7 +1663,7 @@ void play_dead_common(void) /* * With physical CPU hotplug, we should halt the cpu */ - local_irq_disable(); + local_irq_disable_full(); } /** diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/smp.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/smp.c --- linux-5.15.26/arch/x86/kernel/smp.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/smp.c 2022-03-10 09:47:50.000000000 +0100 @@ -131,7 +131,7 @@ static int smp_stop_nmi_callback(unsigne /* * this function calls the 'stop' function on all other CPUs in the system. */ -DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(REBOOT_VECTOR, sysvec_reboot) { ack_APIC_irq(); cpu_emergency_vmxoff(); @@ -212,17 +212,18 @@ static void native_stop_other_cpus(int w udelay(1); } - local_irq_save(flags); + flags = hard_local_irq_save(); disable_local_APIC(); mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); - local_irq_restore(flags); + hard_local_irq_restore(flags); } /* * Reschedule call back. KVM uses this interrupt to force a cpu out of * guest mode. */ -DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_reschedule_ipi) +DEFINE_IDTENTRY_SYSVEC_SIMPLE_PIPELINED(RESCHEDULE_VECTOR, + sysvec_reschedule_ipi) { ack_APIC_irq(); trace_reschedule_entry(RESCHEDULE_VECTOR); @@ -231,7 +232,8 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_res trace_reschedule_exit(RESCHEDULE_VECTOR); } -DEFINE_IDTENTRY_SYSVEC(sysvec_call_function) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(CALL_FUNCTION_VECTOR, + sysvec_call_function) { ack_APIC_irq(); trace_call_function_entry(CALL_FUNCTION_VECTOR); @@ -240,7 +242,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_call_funct trace_call_function_exit(CALL_FUNCTION_VECTOR); } -DEFINE_IDTENTRY_SYSVEC(sysvec_call_function_single) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(CALL_FUNCTION_SINGLE_VECTOR, + sysvec_call_function_single) { ack_APIC_irq(); trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/time.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/time.c --- linux-5.15.26/arch/x86/kernel/time.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/time.c 2022-03-10 09:47:50.000000000 +0100 @@ -54,7 +54,7 @@ EXPORT_SYMBOL(profile_pc); */ static irqreturn_t timer_interrupt(int irq, void *dev_id) { - global_clock_event->event_handler(global_clock_event); + clockevents_handle_event(global_clock_event); return IRQ_HANDLED; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/traps.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/traps.c --- linux-5.15.26/arch/x86/kernel/traps.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/traps.c 2022-03-10 09:47:50.000000000 +0100 @@ -75,14 +75,22 @@ DECLARE_BITMAP(system_vectors, NR_VECTOR static inline void cond_local_irq_enable(struct pt_regs *regs) { - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); + if (regs->flags & X86_EFLAGS_IF) { + if (running_inband()) + local_irq_enable_full(); + else + hard_local_irq_enable(); + } } static inline void cond_local_irq_disable(struct pt_regs *regs) { - if (regs->flags & X86_EFLAGS_IF) - local_irq_disable(); + if (regs->flags & X86_EFLAGS_IF) { + if (running_inband()) + local_irq_disable_full(); + else + hard_local_irq_disable(); + } } __always_inline int is_valid_bugaddr(unsigned long addr) @@ -152,6 +160,32 @@ static void show_signal(struct task_stru } } +static __always_inline +void mark_trap_entry(int trapnr, struct pt_regs *regs) +{ + oob_trap_notify(trapnr, regs); + hard_cond_local_irq_enable(); +} + +static __always_inline +void mark_trap_exit(int trapnr, struct pt_regs *regs) +{ + oob_trap_unwind(trapnr, regs); + hard_cond_local_irq_disable(); +} + +static __always_inline +void mark_trap_entry_raw(int trapnr, struct pt_regs *regs) +{ + oob_trap_notify(trapnr, regs); +} + +static __always_inline +void mark_trap_exit_raw(int trapnr, struct pt_regs *regs) +{ + oob_trap_unwind(trapnr, regs); +} + static void do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, long error_code, int sicode, void __user *addr) @@ -175,12 +209,16 @@ static void do_error_trap(struct pt_regs { RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); + mark_trap_entry(trapnr, regs); + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { cond_local_irq_enable(regs); do_trap(trapnr, signr, str, regs, error_code, sicode, addr); cond_local_irq_disable(regs); } + + mark_trap_exit(trapnr, regs); } /* @@ -235,13 +273,13 @@ static noinstr bool handle_bug(struct pt * state to what it was at the exception site. */ if (regs->flags & X86_EFLAGS_IF) - raw_local_irq_enable(); + local_irq_enable_full(); if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { regs->ip += LEN_UD2; handled = true; } if (regs->flags & X86_EFLAGS_IF) - raw_local_irq_disable(); + local_irq_disable_full(); instrumentation_end(); return handled; @@ -260,9 +298,11 @@ DEFINE_IDTENTRY_RAW(exc_invalid_op) return; state = irqentry_enter(regs); + mark_trap_entry(X86_TRAP_UD, regs); instrumentation_begin(); handle_invalid_op(regs); instrumentation_end(); + mark_trap_exit(X86_TRAP_UD, regs); irqentry_exit(regs, state); } @@ -294,8 +334,10 @@ DEFINE_IDTENTRY_ERRORCODE(exc_alignment_ { char *str = "alignment check"; + mark_trap_entry(X86_TRAP_AC, regs); + if (notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_AC, SIGBUS) == NOTIFY_STOP) - return; + goto mark_exit; if (!user_mode(regs)) die("Split lock detected\n", regs, error_code); @@ -310,6 +352,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_alignment_ out: local_irq_disable(); + +mark_exit: + mark_trap_exit(X86_TRAP_AC, regs); } #ifdef CONFIG_VMAP_STACK @@ -347,6 +392,9 @@ __visible void __noreturn handle_stack_o * * The 32bit #DF shim provides CR2 already as an argument. On 64bit it needs * to be read before doing anything else. + * + * Dovetail: do not even ask the companion core to try restoring the + * in-band stage on double-fault, this would be a lost cause. */ DEFINE_IDTENTRY_DF(exc_double_fault) { @@ -470,9 +518,11 @@ DEFINE_IDTENTRY_DF(exc_double_fault) DEFINE_IDTENTRY(exc_bounds) { + mark_trap_entry(X86_TRAP_BR, regs); + if (notify_die(DIE_TRAP, "bounds", regs, 0, X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) - return; + goto out; cond_local_irq_enable(regs); if (!user_mode(regs)) @@ -481,6 +531,8 @@ DEFINE_IDTENTRY(exc_bounds) do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, 0, 0, NULL); cond_local_irq_disable(regs); +out: + mark_trap_exit(X86_TRAP_BR, regs); } enum kernel_gp_hint { @@ -575,9 +627,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_pr } if (v8086_mode(regs)) { - local_irq_enable(); + local_irq_enable_full(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); - local_irq_disable(); + local_irq_disable_full(); return; } @@ -587,6 +639,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_pr if (fixup_iopl_exception(regs)) goto exit; + mark_trap_entry(X86_TRAP_GP, regs); tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; @@ -595,7 +648,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_pr show_signal(tsk, SIGSEGV, "", desc, regs, error_code); force_sig(SIGSEGV); - goto exit; + goto mark_exit; } if (fixup_exception(regs, X86_TRAP_GP, error_code, 0)) @@ -613,9 +666,11 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_pr kprobe_fault_handler(regs, X86_TRAP_GP)) goto exit; + mark_trap_entry(X86_TRAP_GP, regs); + ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV); if (ret == NOTIFY_STOP) - goto exit; + goto mark_exit; if (error_code) snprintf(desc, sizeof(desc), "segment-related " GPFSTR); @@ -637,6 +692,8 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_pr die_addr(desc, regs, error_code, gp_addr); +mark_exit: + mark_trap_exit(X86_TRAP_GP, regs); exit: cond_local_irq_disable(regs); } @@ -680,6 +737,8 @@ DEFINE_IDTENTRY_RAW(exc_int3) if (poke_int3_handler(regs)) return; + mark_trap_entry_raw(X86_TRAP_BP, regs); + /* * irqentry_enter_from_user_mode() uses static_branch_{,un}likely() * and therefore can trigger INT3, hence poke_int3_handler() must @@ -702,6 +761,8 @@ DEFINE_IDTENTRY_RAW(exc_int3) instrumentation_end(); irqentry_nmi_exit(regs, irq_state); } + + mark_trap_exit_raw(X86_TRAP_BP, regs); } #ifdef CONFIG_X86_64 @@ -1003,7 +1064,7 @@ static __always_inline void exc_debug_us goto out; /* It's safe to allow irq's after DR6 has been saved */ - local_irq_enable(); + local_irq_enable_full(); if (v8086_mode(regs)) { handle_vm86_trap((struct kernel_vm86_regs *)regs, 0, X86_TRAP_DB); @@ -1020,7 +1081,7 @@ static __always_inline void exc_debug_us send_sigtrap(regs, 0, get_si_code(dr6)); out_irq: - local_irq_disable(); + local_irq_disable_full(); out: instrumentation_end(); irqentry_exit_to_user_mode(regs); @@ -1030,13 +1091,17 @@ out: /* IST stack entry */ DEFINE_IDTENTRY_DEBUG(exc_debug) { + mark_trap_entry_raw(X86_TRAP_DB, regs); exc_debug_kernel(regs, debug_read_clear_dr6()); + mark_trap_exit_raw(X86_TRAP_DB, regs); } /* User entry, runs on regular task stack */ DEFINE_IDTENTRY_DEBUG_USER(exc_debug) { + mark_trap_entry_raw(X86_TRAP_DB, regs); exc_debug_user(regs, debug_read_clear_dr6()); + mark_trap_exit_raw(X86_TRAP_DB, regs); } #else /* 32 bit does not have separate entry points. */ @@ -1070,13 +1135,14 @@ static void math_error(struct pt_regs *r if (fixup_exception(regs, trapnr, 0, 0)) goto exit; + mark_trap_entry(trapnr, regs); task->thread.error_code = 0; task->thread.trap_nr = trapnr; if (notify_die(DIE_TRAP, str, regs, 0, trapnr, SIGFPE) != NOTIFY_STOP) die(str, regs, 0); - goto exit; + goto mark_exit; } /* @@ -1096,8 +1162,12 @@ static void math_error(struct pt_regs *r if (fixup_vdso_exception(regs, trapnr, 0, 0)) goto exit; + mark_trap_entry(trapnr, regs); + force_sig_fault(SIGFPE, si_code, (void __user *)uprobe_get_trap_addr(regs)); +mark_exit: + mark_trap_exit(trapnr, regs); exit: cond_local_irq_disable(regs); } @@ -1170,7 +1240,9 @@ DEFINE_IDTENTRY(exc_device_not_available * to kill the task than getting stuck in a never-ending * loop of #NM faults. */ + mark_trap_entry(X86_TRAP_NM, regs); die("unexpected #NM exception", regs, 0); + mark_trap_exit(X86_TRAP_NM, regs); } } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/tsc.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/tsc.c --- linux-5.15.26/arch/x86/kernel/tsc.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/tsc.c 2022-03-10 09:47:50.000000000 +0100 @@ -132,8 +132,11 @@ static void __set_cyc2ns_scale(unsigned { unsigned long long ns_now; struct cyc2ns_data data; + unsigned long flags; struct cyc2ns *c2n; + flags = hard_cond_local_irq_save(); + ns_now = cycles_2_ns(tsc_now); /* @@ -164,6 +167,8 @@ static void __set_cyc2ns_scale(unsigned c2n->data[0] = data; raw_write_seqcount_latch(&c2n->seq); c2n->data[1] = data; + + hard_cond_local_irq_restore(flags); } static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now) @@ -760,11 +765,11 @@ static unsigned long pit_hpet_ptimer_cal * calibration, which will take at least 50ms, and * read the end value. */ - local_irq_save(flags); + flags = hard_local_irq_save(); tsc1 = tsc_read_refs(&ref1, hpet); tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); tsc2 = tsc_read_refs(&ref2, hpet); - local_irq_restore(flags); + hard_local_irq_restore(flags); /* Pick the lowest PIT TSC calibration so far */ tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); @@ -873,9 +878,9 @@ unsigned long native_calibrate_cpu_early if (!fast_calibrate) fast_calibrate = cpu_khz_from_msr(); if (!fast_calibrate) { - local_irq_save(flags); + flags = hard_local_irq_save(); fast_calibrate = quick_pit_calibrate(); - local_irq_restore(flags); + hard_local_irq_restore(flags); } return fast_calibrate; } @@ -943,7 +948,7 @@ void tsc_restore_sched_clock_state(void) if (!sched_clock_stable()) return; - local_irq_save(flags); + flags = hard_local_irq_save(); /* * We're coming out of suspend, there's no concurrency yet; don't @@ -961,7 +966,7 @@ void tsc_restore_sched_clock_state(void) per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset; } - local_irq_restore(flags); + hard_local_irq_restore(flags); } #ifdef CONFIG_CPU_FREQ @@ -1413,6 +1418,8 @@ static int __init init_tsc_clocksource(v if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; + clocksource_tsc.vdso_type = CLOCKSOURCE_VDSO_ARCHITECTED; + /* * When TSC frequency is known (retrieved via MSR or CPUID), we skip * the refined calibration and directly register it as a clocksource. diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kernel/tsc_sync.c linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/tsc_sync.c --- linux-5.15.26/arch/x86/kernel/tsc_sync.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kernel/tsc_sync.c 2022-03-10 09:47:50.000000000 +0100 @@ -367,6 +367,8 @@ void check_tsc_sync_source(int cpu) atomic_set(&test_runs, 1); else atomic_set(&test_runs, 3); + + hard_cond_local_irq_disable(); retry: /* * Wait for the target to start or to skip the test: @@ -448,6 +450,8 @@ void check_tsc_sync_target(void) if (unsynchronized_tsc()) return; + hard_cond_local_irq_disable(); + /* * Store, verify and sanitize the TSC adjust register. If * successful skip the test. diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kvm/emulate.c linux-dovetail-v5.15.y-dovetail/arch/x86/kvm/emulate.c --- linux-5.15.26/arch/x86/kvm/emulate.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kvm/emulate.c 2022-03-10 09:47:50.000000000 +0100 @@ -1081,25 +1081,28 @@ static void fetch_register_operand(struc static int em_fninit(struct x86_emulate_ctxt *ctxt) { + unsigned long flags; + if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - kvm_fpu_get(); + flags = kvm_fpu_get(); asm volatile("fninit"); - kvm_fpu_put(); + kvm_fpu_put(flags); return X86EMUL_CONTINUE; } static int em_fnstcw(struct x86_emulate_ctxt *ctxt) { + unsigned long flags; u16 fcw; if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - kvm_fpu_get(); + flags = kvm_fpu_get(); asm volatile("fnstcw %0": "+m"(fcw)); - kvm_fpu_put(); + kvm_fpu_put(flags); ctxt->dst.val = fcw; @@ -1108,14 +1111,15 @@ static int em_fnstcw(struct x86_emulate_ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) { + unsigned long flags; u16 fsw; if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - kvm_fpu_get(); + flags = kvm_fpu_get(); asm volatile("fnstsw %0": "+m"(fsw)); - kvm_fpu_put(); + kvm_fpu_put(flags); ctxt->dst.val = fsw; @@ -4021,17 +4025,18 @@ static inline size_t fxstate_size(struct static int em_fxsave(struct x86_emulate_ctxt *ctxt) { struct fxregs_state fx_state; + unsigned long flags; int rc; rc = check_fxsr(ctxt); if (rc != X86EMUL_CONTINUE) return rc; - kvm_fpu_get(); + flags = kvm_fpu_get(); rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); - kvm_fpu_put(); + kvm_fpu_put(flags); if (rc != X86EMUL_CONTINUE) return rc; @@ -4065,6 +4070,7 @@ static int em_fxrstor(struct x86_emulate struct fxregs_state fx_state; int rc; size_t size; + unsigned long flags; rc = check_fxsr(ctxt); if (rc != X86EMUL_CONTINUE) @@ -4075,7 +4081,7 @@ static int em_fxrstor(struct x86_emulate if (rc != X86EMUL_CONTINUE) return rc; - kvm_fpu_get(); + flags = kvm_fpu_get(); if (size < __fxstate_size(16)) { rc = fxregs_fixup(&fx_state, size); @@ -4092,7 +4098,7 @@ static int em_fxrstor(struct x86_emulate rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); out: - kvm_fpu_put(); + kvm_fpu_put(flags); return rc; } @@ -5338,11 +5344,12 @@ static bool string_insn_completed(struct static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) { + unsigned long flags; int rc; - kvm_fpu_get(); + flags = kvm_fpu_get(); rc = asm_safe("fwait"); - kvm_fpu_put(); + kvm_fpu_put(flags); if (unlikely(rc != X86EMUL_CONTINUE)) return emulate_exception(ctxt, MF_VECTOR, 0, false); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kvm/fpu.h linux-dovetail-v5.15.y-dovetail/arch/x86/kvm/fpu.h --- linux-5.15.26/arch/x86/kvm/fpu.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kvm/fpu.h 2022-03-10 09:47:50.000000000 +0100 @@ -95,46 +95,48 @@ static inline void _kvm_write_mmx_reg(in } } -static inline void kvm_fpu_get(void) +static inline unsigned long kvm_fpu_get(void) { - fpregs_lock(); + unsigned long flags = fpregs_lock(); fpregs_assert_state_consistent(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) switch_fpu_return(); + + return flags; } -static inline void kvm_fpu_put(void) +static inline void kvm_fpu_put(unsigned long flags) { - fpregs_unlock(); + fpregs_unlock(flags); } static inline void kvm_read_sse_reg(int reg, sse128_t *data) { - kvm_fpu_get(); + unsigned long flags = kvm_fpu_get(); _kvm_read_sse_reg(reg, data); - kvm_fpu_put(); + kvm_fpu_put(flags); } static inline void kvm_write_sse_reg(int reg, const sse128_t *data) { - kvm_fpu_get(); + unsigned long flags = kvm_fpu_get(); _kvm_write_sse_reg(reg, data); - kvm_fpu_put(); + kvm_fpu_put(flags); } static inline void kvm_read_mmx_reg(int reg, u64 *data) { - kvm_fpu_get(); + unsigned long flags = kvm_fpu_get(); _kvm_read_mmx_reg(reg, data); - kvm_fpu_put(); + kvm_fpu_put(flags); } static inline void kvm_write_mmx_reg(int reg, const u64 *data) { - kvm_fpu_get(); + unsigned long flags = kvm_fpu_get(); _kvm_write_mmx_reg(reg, data); - kvm_fpu_put(); + kvm_fpu_put(flags); } #endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kvm/hyperv.c linux-dovetail-v5.15.y-dovetail/arch/x86/kvm/hyperv.c --- linux-5.15.26/arch/x86/kvm/hyperv.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kvm/hyperv.c 2022-03-10 09:47:50.000000000 +0100 @@ -2103,12 +2103,13 @@ static bool is_xmm_fast_hypercall(struct static void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc) { + unsigned long flags; int reg; - kvm_fpu_get(); + flags = kvm_fpu_get(); for (reg = 0; reg < HV_HYPERCALL_MAX_XMM_REGISTERS; reg++) _kvm_read_sse_reg(reg, &hc->xmm[reg]); - kvm_fpu_put(); + kvm_fpu_put(flags); } static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kvm/vmx/vmx.c linux-dovetail-v5.15.y-dovetail/arch/x86/kvm/vmx/vmx.c --- linux-5.15.26/arch/x86/kvm/vmx/vmx.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kvm/vmx/vmx.c 2022-03-10 09:47:50.000000000 +0100 @@ -600,14 +600,15 @@ static int vmx_set_guest_uret_msr(struct struct vmx_uret_msr *msr, u64 data) { unsigned int slot = msr - vmx->guest_uret_msrs; + unsigned long flags; int ret = 0; u64 old_msr_data = msr->data; msr->data = data; if (msr->load_into_hardware) { - preempt_disable(); + flags = hard_preempt_disable(); ret = kvm_set_user_return_msr(slot, msr->data, msr->mask); - preempt_enable(); + hard_preempt_enable(flags); if (ret) msr->data = old_msr_data; } @@ -1210,19 +1211,23 @@ static void vmx_prepare_switch_to_host(s #ifdef CONFIG_X86_64 static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) { - preempt_disable(); + unsigned long flags; + + flags = hard_preempt_disable(); if (vmx->guest_state_loaded) rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); - preempt_enable(); + hard_preempt_enable(flags); return vmx->msr_guest_kernel_gs_base; } static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) { - preempt_disable(); + unsigned long flags; + + flags = hard_preempt_disable(); if (vmx->guest_state_loaded) wrmsrl(MSR_KERNEL_GS_BASE, data); - preempt_enable(); + hard_preempt_enable(flags); vmx->msr_guest_kernel_gs_base = data; } #endif @@ -1655,6 +1660,7 @@ static void vmx_setup_uret_msrs(struct v * The SYSCALL MSRs are only needed on long mode guests, and only * when EFER.SCE is set. */ + hard_cond_local_irq_disable(); load_syscall_msrs = is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE); @@ -1676,6 +1682,8 @@ static void vmx_setup_uret_msrs(struct v */ vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM)); + hard_cond_local_irq_enable(); + /* * The set of MSRs to load may have changed, reload MSRs before the * next VM-Enter. @@ -1940,6 +1948,7 @@ static int vmx_set_msr(struct kvm_vcpu * u32 msr_index = msr_info->index; u64 data = msr_info->data; u32 index; + unsigned long flags; switch (msr_index) { case MSR_EFER: @@ -2221,11 +2230,22 @@ static int vmx_set_msr(struct kvm_vcpu * default: find_uret_msr: + /* + * Dovetail: guest MSRs may be activated independently + * from vcpu_run(): rely on the notifier for restoring + * them upon preemption by the companion core, right + * before the current CPU switches to out-of-band + * scheduling (see dovetail_context_switch()). + */ msr = vmx_find_uret_msr(vmx, msr_index); - if (msr) + if (msr) { + flags = hard_cond_local_irq_save(); + inband_enter_guest(vcpu); ret = vmx_set_guest_uret_msr(vmx, msr, data); - else + hard_cond_local_irq_restore(flags); + } else { ret = kvm_set_msr_common(vcpu, msr_info); + } } return ret; @@ -6886,7 +6906,9 @@ static int vmx_create_vcpu(struct kvm_vc vmx_vcpu_load(vcpu, cpu); vcpu->cpu = cpu; init_vmcs(vmx); + hard_cond_local_irq_disable(); vmx_vcpu_put(vcpu); + hard_cond_local_irq_enable(); put_cpu(); if (cpu_need_virtualize_apic_accesses(vcpu)) { err = alloc_apic_access_page(vcpu->kvm); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/kvm/x86.c linux-dovetail-v5.15.y-dovetail/arch/x86/kvm/x86.c --- linux-5.15.26/arch/x86/kvm/x86.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/kvm/x86.c 2022-03-10 09:47:50.000000000 +0100 @@ -194,6 +194,7 @@ module_param(pi_inject_timer, bint, S_IR struct kvm_user_return_msrs { struct user_return_notifier urn; bool registered; + bool dirty; struct kvm_user_return_msr_values { u64 host; u64 curr; @@ -338,12 +339,29 @@ static inline void kvm_async_pf_hash_res vcpu->arch.apf.gfns[i] = ~0; } -static void kvm_on_user_return(struct user_return_notifier *urn) +static void __kvm_on_user_return(struct kvm_user_return_msrs *msrs) { + struct kvm_user_return_msr_values *values; unsigned slot; + + if (!msrs->dirty) + return; + + for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) { + values = &msrs->values[slot]; + if (values->host != values->curr) { + wrmsrl(kvm_uret_msrs_list[slot], values->host); + values->curr = values->host; + } + } + + msrs->dirty = false; +} + +static void kvm_on_user_return(struct user_return_notifier *urn) +{ struct kvm_user_return_msrs *msrs = container_of(urn, struct kvm_user_return_msrs, urn); - struct kvm_user_return_msr_values *values; unsigned long flags; /* @@ -356,27 +374,25 @@ static void kvm_on_user_return(struct us user_return_notifier_unregister(urn); } local_irq_restore(flags); - for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) { - values = &msrs->values[slot]; - if (values->host != values->curr) { - wrmsrl(kvm_uret_msrs_list[slot], values->host); - values->curr = values->host; - } - } + flags = hard_cond_local_irq_save(); + __kvm_on_user_return(msrs); + hard_cond_local_irq_restore(flags); + inband_exit_guest(); } static int kvm_probe_user_return_msr(u32 msr) { + unsigned long flags; u64 val; int ret; - preempt_disable(); + flags = hard_preempt_disable(); ret = rdmsrl_safe(msr, &val); if (ret) goto out; ret = wrmsrl_safe(msr, val); out: - preempt_enable(); + hard_preempt_enable(flags); return ret; } @@ -431,6 +447,7 @@ int kvm_set_user_return_msr(unsigned slo if (err) return 1; + msrs->dirty = true; msrs->values[slot].curr = value; if (!msrs->registered) { msrs->urn.on_user_return = kvm_on_user_return; @@ -4395,11 +4412,22 @@ static void kvm_steal_time_set_preempted void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); + unsigned long flags; int idx; if (vcpu->preempted && !vcpu->arch.guest_state_protected) vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); + flags = hard_cond_local_irq_save(); + /* + * Skip steal time accounting from the out-of-band stage since + * this is oob-unsafe. We leave it to the next call from the + * inband stage. + */ + if (running_oob()) + goto skip_steal_time_update; + /* * Take the srcu lock as memslots will be accessed to check the gfn * cache generation against the memslots generation. @@ -4411,9 +4439,43 @@ void kvm_arch_vcpu_put(struct kvm_vcpu * kvm_steal_time_set_preempted(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, idx); +skip_steal_time_update: static_call(kvm_x86_vcpu_put)(vcpu); vcpu->arch.last_host_tsc = rdtsc(); + + inband_set_vcpu_release_state(vcpu, false); + if (!msrs->dirty) + inband_exit_guest(); + + hard_cond_local_irq_restore(flags); +} + +#ifdef CONFIG_DOVETAIL +/* hard irqs off. */ +void kvm_handle_oob_switch(struct kvm_oob_notifier *nfy) +{ + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); + struct kvm_vcpu *vcpu; + + vcpu = container_of(nfy, struct kvm_vcpu, oob_notifier); + /* + * If user_return MSRs were still active when leaving + * kvm_arch_vcpu_put(), inband_exit_guest() was not invoked, + * so we might get called later on before kvm_on_user_return() + * had a chance to run, if a switch to out-of-band scheduling + * sneaks in in the meantime. Prevent kvm_arch_vcpu_put() + * from running twice in such a case by checking ->put_vcpu + * from the notifier block. + */ + if (nfy->put_vcpu) + kvm_arch_vcpu_put(vcpu); + + __kvm_on_user_return(msrs); + inband_exit_guest(); } +#else +#define kvm_handle_oob_switch NULL +#endif static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) @@ -9683,6 +9745,10 @@ static int vcpu_enter_guest(struct kvm_v } preempt_disable(); + local_irq_disable_full(); + + inband_enter_guest(vcpu); + inband_set_vcpu_release_state(vcpu, true); static_call(kvm_x86_prepare_guest_switch)(vcpu); @@ -9691,7 +9757,6 @@ static int vcpu_enter_guest(struct kvm_v * IPI are then delayed after guest entry, which ensures that they * result in virtual interrupt delivery. */ - local_irq_disable(); vcpu->mode = IN_GUEST_MODE; srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); @@ -9722,7 +9787,7 @@ static int vcpu_enter_guest(struct kvm_v if (kvm_vcpu_exit_request(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - local_irq_enable(); + local_irq_enable_full(); preempt_enable(); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = 1; @@ -9801,9 +9866,9 @@ static int vcpu_enter_guest(struct kvm_v * stat.exits increment will do nicely. */ kvm_before_interrupt(vcpu); - local_irq_enable(); + local_irq_enable_full(); ++vcpu->stat.exits; - local_irq_disable(); + local_irq_disable_full(); kvm_after_interrupt(vcpu); /* @@ -9823,7 +9888,7 @@ static int vcpu_enter_guest(struct kvm_v } } - local_irq_enable(); + local_irq_enable_full(); preempt_enable(); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -10039,7 +10104,9 @@ static void kvm_save_current_fpu(struct /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { - fpregs_lock(); + unsigned long flags; + + flags = fpregs_lock(); kvm_save_current_fpu(vcpu->arch.user_fpu); @@ -10053,7 +10120,7 @@ static void kvm_load_guest_fpu(struct kv ~XFEATURE_MASK_PKRU); fpregs_mark_activate(); - fpregs_unlock(); + fpregs_unlock(flags); trace_kvm_fpu(1); } @@ -10061,7 +10128,9 @@ static void kvm_load_guest_fpu(struct kv /* When vcpu_run ends, restore user space FPU context. */ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - fpregs_lock(); + unsigned long flags; + + flags = fpregs_lock(); /* * Guests with protected state can't have it read by the hypervisor, @@ -10073,7 +10142,7 @@ static void kvm_put_guest_fpu(struct kvm restore_fpregs_from_fpstate(&vcpu->arch.user_fpu->state); fpregs_mark_activate(); - fpregs_unlock(); + fpregs_unlock(flags); ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); @@ -10851,6 +10920,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu if (r) goto free_guest_fpu; + inband_init_vcpu(vcpu, kvm_handle_oob_switch); vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; kvm_vcpu_mtrr_init(vcpu); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/lib/usercopy.c linux-dovetail-v5.15.y-dovetail/arch/x86/lib/usercopy.c --- linux-5.15.26/arch/x86/lib/usercopy.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/lib/usercopy.c 2022-03-10 09:47:50.000000000 +0100 @@ -32,7 +32,7 @@ copy_from_user_nmi(void *to, const void { unsigned long ret; - if (__range_not_ok(from, n, TASK_SIZE)) + if (running_oob() || __range_not_ok(from, n, TASK_SIZE)) return n; if (!nmi_uaccess_okay()) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/mm/fault.c linux-dovetail-v5.15.y-dovetail/arch/x86/mm/fault.c --- linux-5.15.26/arch/x86/mm/fault.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/mm/fault.c 2022-03-10 09:47:50.000000000 +0100 @@ -646,6 +646,12 @@ page_fault_oops(struct pt_regs *regs, un goto oops; } + /* + * Do not bother unwinding the notification context on + * CPU/firmware/kernel bug. + */ + oob_trap_notify(X86_TRAP_PF, regs); + #ifdef CONFIG_VMAP_STACK /* * Stack overflow? During boot, we can fault near the initial @@ -722,7 +728,7 @@ kernelmode_fixup_or_oops(struct pt_regs * the below recursive fault logic only apply to a faults from * task context. */ - if (in_interrupt()) + if (running_oob() || in_interrupt()) return; /* @@ -796,6 +802,55 @@ static bool is_vsyscall_vaddr(unsigned l return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); } +#ifdef CONFIG_IRQ_PIPELINE + +static inline void cond_reenable_irqs_user(void) +{ + hard_local_irq_enable(); + + if (running_inband()) + local_irq_enable(); +} + +static inline void cond_reenable_irqs_kernel(irqentry_state_t state, + struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) { + hard_local_irq_enable(); + if (state.stage_info == IRQENTRY_INBAND_UNSTALLED) + local_irq_enable(); + } +} + +static inline void cond_disable_irqs(void) +{ + hard_local_irq_disable(); + + if (running_inband()) + local_irq_disable(); +} + +#else /* !CONFIG_IRQ_PIPELINE */ + +static inline void cond_reenable_irqs_user(void) +{ + local_irq_enable(); +} + +static inline void cond_reenable_irqs_kernel(irqentry_state_t state, + struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); +} + +static inline void cond_disable_irqs(void) +{ + local_irq_disable(); +} + +#endif /* !CONFIG_IRQ_PIPELINE */ + static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, unsigned long address, u32 pkey, int si_code) @@ -818,7 +873,7 @@ __bad_area_nosemaphore(struct pt_regs *r * User mode accesses just cause a SIGSEGV. * It's possible to have interrupts off here: */ - local_irq_enable(); + cond_reenable_irqs_user(); /* * Valid to do another page fault here because this one came @@ -835,6 +890,8 @@ __bad_area_nosemaphore(struct pt_regs *r if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address)) return; + oob_trap_notify(X86_TRAP_PF, regs); + if (likely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); @@ -845,7 +902,8 @@ __bad_area_nosemaphore(struct pt_regs *r else force_sig_fault(SIGSEGV, si_code, (void __user *)address); - local_irq_disable(); + local_irq_disable_full(); + oob_trap_unwind(X86_TRAP_PF, regs); } static noinline void @@ -1219,7 +1277,8 @@ NOKPROBE_SYMBOL(do_kern_addr_fault); static inline void do_user_addr_fault(struct pt_regs *regs, unsigned long error_code, - unsigned long address) + unsigned long address, + irqentry_state_t state) { struct vm_area_struct *vma; struct task_struct *tsk; @@ -1278,7 +1337,7 @@ void do_user_addr_fault(struct pt_regs * * If we're in an interrupt, have no user context or are running * in a region with pagefaults disabled then we must not take the fault */ - if (unlikely(faulthandler_disabled() || !mm)) { + if (unlikely(running_inband() && (faulthandler_disabled() || !mm))) { bad_area_nosemaphore(regs, error_code, address); return; } @@ -1291,13 +1350,21 @@ void do_user_addr_fault(struct pt_regs * * potential system fault or CPU buglet: */ if (user_mode(regs)) { - local_irq_enable(); + cond_reenable_irqs_user(); flags |= FAULT_FLAG_USER; } else { if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); + cond_reenable_irqs_kernel(state, regs); } + /* + * At this point, we would have to stop running + * out-of-band. Tell the companion core about the page fault + * event, so that it might switch current to in-band mode if + * need be. + */ + oob_trap_notify(X86_TRAP_PF, regs); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); if (error_code & X86_PF_WRITE) @@ -1319,7 +1386,7 @@ void do_user_addr_fault(struct pt_regs * */ if (is_vsyscall_vaddr(address)) { if (emulate_vsyscall(error_code, regs, address)) - return; + goto out; } #endif @@ -1342,7 +1409,7 @@ void do_user_addr_fault(struct pt_regs * * which we do not expect faults. */ bad_area_nosemaphore(regs, error_code, address); - return; + goto out; } retry: mmap_read_lock(mm); @@ -1358,17 +1425,17 @@ retry: vma = find_vma(mm, address); if (unlikely(!vma)) { bad_area(regs, error_code, address); - return; + goto out; } if (likely(vma->vm_start <= address)) goto good_area; if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { bad_area(regs, error_code, address); - return; + goto out; } if (unlikely(expand_stack(vma, address))) { bad_area(regs, error_code, address); - return; + goto out; } /* @@ -1378,7 +1445,7 @@ retry: good_area: if (unlikely(access_error(error_code, vma))) { bad_area_access_error(regs, error_code, address, vma); - return; + goto out; } /* @@ -1405,7 +1472,7 @@ good_area: kernelmode_fixup_or_oops(regs, error_code, address, SIGBUS, BUS_ADRERR, ARCH_DEFAULT_PKEY); - return; + goto out; } /* @@ -1421,12 +1488,12 @@ good_area: mmap_read_unlock(mm); if (likely(!(fault & VM_FAULT_ERROR))) - return; + goto out; if (fatal_signal_pending(current) && !user_mode(regs)) { kernelmode_fixup_or_oops(regs, error_code, address, 0, 0, ARCH_DEFAULT_PKEY); - return; + goto out; } if (fault & VM_FAULT_OOM) { @@ -1435,7 +1502,7 @@ good_area: kernelmode_fixup_or_oops(regs, error_code, address, SIGSEGV, SEGV_MAPERR, ARCH_DEFAULT_PKEY); - return; + goto out; } /* @@ -1453,6 +1520,8 @@ good_area: else BUG(); } +out: + oob_trap_unwind(X86_TRAP_PF, regs); } NOKPROBE_SYMBOL(do_user_addr_fault); @@ -1471,7 +1540,8 @@ trace_page_fault_entries(struct pt_regs static __always_inline void handle_page_fault(struct pt_regs *regs, unsigned long error_code, - unsigned long address) + unsigned long address, + irqentry_state_t state) { trace_page_fault_entries(regs, error_code, address); @@ -1482,7 +1552,7 @@ handle_page_fault(struct pt_regs *regs, if (unlikely(fault_in_kernel_space(address))) { do_kern_addr_fault(regs, error_code, address); } else { - do_user_addr_fault(regs, error_code, address); + do_user_addr_fault(regs, error_code, address, state); /* * User address page fault handling might have reenabled * interrupts. Fixing up all potential exit points of @@ -1490,7 +1560,7 @@ handle_page_fault(struct pt_regs *regs, * doable w/o creating an unholy mess or turning the code * upside down. */ - local_irq_disable(); + cond_disable_irqs(); } } @@ -1538,8 +1608,46 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_f state = irqentry_enter(regs); instrumentation_begin(); - handle_page_fault(regs, error_code, address); + handle_page_fault(regs, error_code, address, state); instrumentation_end(); irqentry_exit(regs, state); } + +#ifdef CONFIG_DOVETAIL + +void arch_advertise_page_mapping(unsigned long start, unsigned long end) +{ + unsigned long next, addr = start; + pgd_t *pgd, *pgd_ref; + struct page *page; + + /* + * APEI may create temporary mappings in interrupt context - + * nothing we can and need to propagate globally. + */ + if (in_interrupt()) + return; + + if (!(start >= VMALLOC_START && start < VMALLOC_END)) + return; + + do { + next = pgd_addr_end(addr, end); + pgd_ref = pgd_offset_k(addr); + if (pgd_none(*pgd_ref)) + continue; + spin_lock(&pgd_lock); + list_for_each_entry(page, &pgd_list, lru) { + pgd = page_address(page) + pgd_index(addr); + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + } + spin_unlock(&pgd_lock); + addr = next; + } while (addr != end); + + arch_flush_lazy_mmu_mode(); +} + +#endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/mm/tlb.c linux-dovetail-v5.15.y-dovetail/arch/x86/mm/tlb.c --- linux-5.15.26/arch/x86/mm/tlb.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/mm/tlb.c 2022-03-10 09:47:50.000000000 +0100 @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -317,10 +318,12 @@ EXPORT_SYMBOL_GPL(leave_mm); void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { - unsigned long flags; + unsigned long flags, _flags; local_irq_save(flags); + protect_inband_mm(_flags); switch_mm_irqs_off(prev, next, tsk); + unprotect_inband_mm(_flags); local_irq_restore(flags); } @@ -506,7 +509,9 @@ void switch_mm_irqs_off(struct mm_struct */ /* We don't want flush_tlb_func() to run concurrently with us. */ - if (IS_ENABLED(CONFIG_PROVE_LOCKING)) + if (IS_ENABLED(CONFIG_DOVETAIL)) + WARN_ON_ONCE(!hard_irqs_disabled()); + else if (IS_ENABLED(CONFIG_PROVE_LOCKING)) WARN_ON_ONCE(!irqs_disabled()); /* @@ -732,11 +737,11 @@ static void flush_tlb_func(void *info) */ const struct flush_tlb_info *f = info; struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); - u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); - u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen); - u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen); + u32 loaded_mm_asid; + u64 mm_tlb_gen; + u64 local_tlb_gen; bool local = smp_processor_id() == f->initiating_cpu; - unsigned long nr_invalidate = 0; + unsigned long nr_invalidate = 0, flags; /* This code cannot presently handle being reentered. */ VM_WARN_ON(!irqs_disabled()); @@ -750,8 +755,16 @@ static void flush_tlb_func(void *info) return; } - if (unlikely(loaded_mm == &init_mm)) + protect_inband_mm(flags); + + loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); + mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen); + local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen); + + if (unlikely(loaded_mm == &init_mm)) { + unprotect_inband_mm(flags); return; + } VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) != loaded_mm->context.ctx_id); @@ -767,6 +780,7 @@ static void flush_tlb_func(void *info) * IPIs to lazy TLB mode CPUs. */ switch_mm_irqs_off(NULL, &init_mm, NULL); + unprotect_inband_mm(flags); return; } @@ -777,12 +791,15 @@ static void flush_tlb_func(void *info) * be handled can catch us all the way up, leaving no work for * the second flush. */ + unprotect_inband_mm(flags); goto done; } WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen); WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen); + unprotect_inband_mm(flags); + /* * If we get to this point, we know that our TLB is out of date. * This does not strictly imply that we need to flush (it's @@ -1166,7 +1183,7 @@ STATIC_NOPV void native_flush_tlb_global * from interrupts. (Use the raw variant because this code can * be called from deep inside debugging code.) */ - raw_local_irq_save(flags); + flags = hard_local_irq_save(); cr4 = this_cpu_read(cpu_tlbstate.cr4); /* toggle PGE */ @@ -1174,7 +1191,7 @@ STATIC_NOPV void native_flush_tlb_global /* write old PGE again and flush TLBs */ native_write_cr4(cr4); - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); } /* @@ -1182,6 +1199,8 @@ STATIC_NOPV void native_flush_tlb_global */ STATIC_NOPV void native_flush_tlb_local(void) { + unsigned long flags; + /* * Preemption or interrupts must be disabled to protect the access * to the per CPU variable and to prevent being preempted between @@ -1189,10 +1208,14 @@ STATIC_NOPV void native_flush_tlb_local( */ WARN_ON_ONCE(preemptible()); + flags = hard_cond_local_irq_save(); + invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); /* If current->mm == NULL then the read_cr3() "borrows" an mm */ native_write_cr3(__native_read_cr3()); + + hard_cond_local_irq_restore(flags); } void flush_tlb_local(void) @@ -1264,6 +1287,16 @@ bool nmi_uaccess_okay(void) VM_WARN_ON_ONCE(!loaded_mm); /* + * There would be no way for the companion core to switch an + * out-of-band task back in-band in order to handle an access + * fault over NMI safely. Tell the caller that uaccess from + * NMI is NOT ok if the preempted task was running + * out-of-band. + */ + if (running_oob()) + return false; + + /* * The condition we want to check is * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though, * if we're running in a VM with shadow paging, and nmi_uaccess_okay() diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/platform/efi/efi_64.c linux-dovetail-v5.15.y-dovetail/arch/x86/platform/efi/efi_64.c --- linux-5.15.26/arch/x86/platform/efi/efi_64.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/platform/efi/efi_64.c 2022-03-10 09:47:50.000000000 +0100 @@ -461,15 +461,23 @@ void __init efi_dump_pagetable(void) */ void efi_enter_mm(void) { + unsigned long flags; + + protect_inband_mm(flags); efi_prev_mm = current->active_mm; current->active_mm = &efi_mm; switch_mm(efi_prev_mm, &efi_mm, NULL); + unprotect_inband_mm(flags); } void efi_leave_mm(void) { + unsigned long flags; + + protect_inband_mm(flags); current->active_mm = efi_prev_mm; switch_mm(&efi_mm, efi_prev_mm, NULL); + unprotect_inband_mm(flags); } static DEFINE_SPINLOCK(efi_runtime_lock); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/xen/enlighten_hvm.c linux-dovetail-v5.15.y-dovetail/arch/x86/xen/enlighten_hvm.c --- linux-5.15.26/arch/x86/xen/enlighten_hvm.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/xen/enlighten_hvm.c 2022-03-10 09:47:50.000000000 +0100 @@ -120,7 +120,8 @@ static void __init init_hvm_pv_info(void this_cpu_write(xen_vcpu_id, smp_processor_id()); } -DEFINE_IDTENTRY_SYSVEC(sysvec_xen_hvm_callback) +DEFINE_IDTENTRY_SYSVEC_PIPELINED(HYPERVISOR_CALLBACK_VECTOR, + sysvec_xen_hvm_callback) { struct pt_regs *old_regs = set_irq_regs(regs); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/arch/x86/xen/Kconfig linux-dovetail-v5.15.y-dovetail/arch/x86/xen/Kconfig --- linux-5.15.26/arch/x86/xen/Kconfig 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/arch/x86/xen/Kconfig 2022-03-10 09:47:50.000000000 +0100 @@ -5,7 +5,7 @@ config XEN bool "Xen guest support" - depends on PARAVIRT + depends on PARAVIRT && !IRQ_PIPELINE select PARAVIRT_CLOCK select X86_HV_CALLBACK_VECTOR depends on X86_64 || (X86_32 && X86_PAE) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/Documentation/dontdiff linux-dovetail-v5.15.y-dovetail/Documentation/dontdiff diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/Documentation/dovetail.rst linux-dovetail-v5.15.y-dovetail/Documentation/dovetail.rst --- linux-5.15.26/Documentation/dovetail.rst 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/Documentation/dovetail.rst 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,30 @@ +======================== +Introduction to Dovetail +======================== + +:Author: Philippe Gerum +:Date: 08.04.2020 + +Using Linux as a host for lightweight software cores specialized in +delivering very short and bounded response times has been a popular +way of supporting real-time applications in the embedded space over +the years. + +In this so-called *dual kernel* design, the time-critical work is +immediately delegated to a small companion core running out-of-band +with respect to the regular, in-band kernel activities. Applications +run in user space, obtaining real-time services from the +core. Alternatively, when there is no real-time requirement, threads +can still use the rich GPOS feature set Linux provides such as +networking, data storage or GUIs. + +*Dovetail* introduces a high-priority execution stage into the main +kernel logic reserved for such a companion core to run on. At any +time, out-of-band activities from this stage can preempt the common, +in-band work. A companion core can be implemented as as a driver, +which connects to the main kernel via the Dovetail interface for +delivering ultra-low latency scheduling capabilities to applications. + +Dovetail is fully described at https://evlproject.org/dovetail/. +The reference implementation of a Dovetail-based companion core is +maintained at https://evlproject.org/core/. diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/base/regmap/internal.h linux-dovetail-v5.15.y-dovetail/drivers/base/regmap/internal.h --- linux-5.15.26/drivers/base/regmap/internal.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/base/regmap/internal.h 2022-03-10 09:47:50.000000000 +0100 @@ -50,7 +50,10 @@ struct regmap { union { struct mutex mutex; struct { - spinlock_t spinlock; + union { + spinlock_t spinlock; + hard_spinlock_t oob_lock; + }; unsigned long spinlock_flags; }; struct { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/base/regmap/regmap.c linux-dovetail-v5.15.y-dovetail/drivers/base/regmap/regmap.c --- linux-5.15.26/drivers/base/regmap/regmap.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/base/regmap/regmap.c 2022-03-10 09:47:50.000000000 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -550,6 +551,23 @@ __releases(&map->raw_spinlock) raw_spin_unlock_irqrestore(&map->raw_spinlock, map->raw_spinlock_flags); } +static void regmap_lock_oob(void *__map) +__acquires(&map->oob_lock) +{ + struct regmap *map = __map; + unsigned long flags; + + raw_spin_lock_irqsave(&map->oob_lock, flags); + map->spinlock_flags = flags; +} + +static void regmap_unlock_oob(void *__map) +__releases(&map->oob_lock) +{ + struct regmap *map = __map; + raw_spin_unlock_irqrestore(&map->oob_lock, map->spinlock_flags); +} + static void dev_get_regmap_release(struct device *dev, void *res) { /* @@ -788,7 +806,13 @@ struct regmap *__regmap_init(struct devi } else { if ((bus && bus->fast_io) || config->fast_io) { - if (config->use_raw_spinlock) { + if (dovetailing() && config->oob_io) { + raw_spin_lock_init(&map->oob_lock); + map->lock = regmap_lock_oob; + map->unlock = regmap_unlock_oob; + lockdep_set_class_and_name(&map->oob_lock, + lock_key, lock_name); + } else if (config->use_raw_spinlock) { raw_spin_lock_init(&map->raw_spinlock); map->lock = regmap_lock_raw_spinlock; map->unlock = regmap_unlock_raw_spinlock; @@ -801,14 +825,17 @@ struct regmap *__regmap_init(struct devi lockdep_set_class_and_name(&map->spinlock, lock_key, lock_name); } - } else { + } else if (!config->oob_io) { /* Catch configuration issue: oob && !fast_io */ mutex_init(&map->mutex); map->lock = regmap_lock_mutex; map->unlock = regmap_unlock_mutex; map->can_sleep = true; lockdep_set_class_and_name(&map->mutex, lock_key, lock_name); - } + } else { + ret = -ENXIO; + goto err_name; + } map->lock_arg = map; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/base/regmap/regmap-irq.c linux-dovetail-v5.15.y-dovetail/drivers/base/regmap/regmap-irq.c --- linux-5.15.26/drivers/base/regmap/regmap-irq.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/base/regmap/regmap-irq.c 2022-03-10 09:47:50.000000000 +0100 @@ -367,6 +367,7 @@ static const struct irq_chip regmap_irq_ .irq_enable = regmap_irq_enable, .irq_set_type = regmap_irq_set_type, .irq_set_wake = regmap_irq_set_wake, + .flags = IRQCHIP_PIPELINE_SAFE, }; static inline int read_sub_irq_data(struct regmap_irq_chip_data *data, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/clocksource/arm_arch_timer.c linux-dovetail-v5.15.y-dovetail/drivers/clocksource/arm_arch_timer.c --- linux-5.15.26/drivers/clocksource/arm_arch_timer.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/clocksource/arm_arch_timer.c 2022-03-10 09:47:50.000000000 +0100 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -656,7 +657,7 @@ static __always_inline irqreturn_t timer if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { ctrl |= ARCH_TIMER_CTRL_IT_MASK; arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt); - evt->event_handler(evt); + clockevents_handle_event(evt); return IRQ_HANDLED; } @@ -765,7 +766,7 @@ static int arch_timer_set_next_event_phy static void __arch_timer_setup(unsigned type, struct clock_event_device *clk) { - clk->features = CLOCK_EVT_FEAT_ONESHOT; + clk->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PIPELINE; if (type == ARCH_TIMER_TYPE_CP15) { typeof(clk->set_next_event) sne; @@ -876,6 +877,9 @@ static void arch_counter_set_user_access else cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; + if (IS_ENABLED(CONFIG_GENERIC_CLOCKSOURCE_VDSO)) + cntkctl |= ARCH_TIMER_USR_PT_ACCESS_EN; + arch_timer_set_cntkctl(cntkctl); } @@ -909,6 +913,7 @@ static int arch_timer_starting_cpu(unsig enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags); if (arch_timer_has_nonsecure_ppi()) { + clk->irq = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]; flags = check_ppi_trigger(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]); enable_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI], flags); @@ -1027,6 +1032,8 @@ static void __init arch_counter_register arch_timer_read_counter = rd; clocksource_counter.vdso_clock_mode = vdso_default; + if (vdso_default != VDSO_CLOCKMODE_NONE) + clocksource_counter.vdso_type = CLOCKSOURCE_VDSO_ARCHITECTED; } else { arch_timer_read_counter = arch_counter_get_cntvct_mem; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/clocksource/arm_global_timer.c linux-dovetail-v5.15.y-dovetail/drivers/clocksource/arm_global_timer.c --- linux-5.15.26/drivers/clocksource/arm_global_timer.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/clocksource/arm_global_timer.c 2022-03-10 09:47:50.000000000 +0100 @@ -162,11 +162,11 @@ static irqreturn_t gt_clockevent_interru * the Global Timer flag _after_ having incremented * the Comparator register value to a higher value. */ - if (clockevent_state_oneshot(evt)) + if (clockevent_is_oob(evt) || clockevent_state_oneshot(evt)) gt_compare_set(ULONG_MAX, 0); writel_relaxed(GT_INT_STATUS_EVENT_FLAG, gt_base + GT_INT_STATUS); - evt->event_handler(evt); + clockevents_handle_event(evt); return IRQ_HANDLED; } @@ -177,7 +177,7 @@ static int gt_starting_cpu(unsigned int clk->name = "arm_global_timer"; clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | - CLOCK_EVT_FEAT_PERCPU; + CLOCK_EVT_FEAT_PERCPU | CLOCK_EVT_FEAT_PIPELINE; clk->set_state_shutdown = gt_clockevent_shutdown; clk->set_state_periodic = gt_clockevent_set_periodic; clk->set_state_oneshot = gt_clockevent_shutdown; @@ -201,11 +201,6 @@ static int gt_dying_cpu(unsigned int cpu return 0; } -static u64 gt_clocksource_read(struct clocksource *cs) -{ - return gt_counter_read(); -} - static void gt_resume(struct clocksource *cs) { unsigned long ctrl; @@ -216,13 +211,15 @@ static void gt_resume(struct clocksource writel(GT_CONTROL_TIMER_ENABLE, gt_base + GT_CONTROL); } -static struct clocksource gt_clocksource = { - .name = "arm_global_timer", - .rating = 300, - .read = gt_clocksource_read, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .resume = gt_resume, +static struct clocksource_user_mmio gt_clocksource = { + .mmio.clksrc = { + .name = "arm_global_timer", + .rating = 300, + .read = clocksource_dual_mmio_readl_up, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .resume = gt_resume, + }, }; #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK @@ -268,6 +265,8 @@ static void __init gt_delay_timer_init(v static int __init gt_clocksource_init(void) { + struct clocksource_mmio_regs mmr; + writel(0, gt_base + GT_CONTROL); writel(0, gt_base + GT_COUNTER0); writel(0, gt_base + GT_COUNTER1); @@ -279,7 +278,13 @@ static int __init gt_clocksource_init(vo #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK sched_clock_register(gt_sched_clock_read, 64, gt_target_rate); #endif - return clocksource_register_hz(>_clocksource, gt_target_rate); + mmr.reg_upper = gt_base + GT_COUNTER1; + mmr.reg_lower = gt_base + GT_COUNTER0; + mmr.bits_upper = 32; + mmr.bits_lower = 32; + mmr.revmap = NULL; + + return clocksource_user_mmio_init(>_clocksource, &mmr, gt_target_rate); } static int gt_clk_rate_change_cb(struct notifier_block *nb, @@ -399,8 +404,8 @@ static int __init global_timer_of_regist goto out_clk_nb; } - err = request_percpu_irq(gt_ppi, gt_clockevent_interrupt, - "gt", gt_evt); + err = __request_percpu_irq(gt_ppi, gt_clockevent_interrupt, + IRQF_TIMER, "gt", gt_evt); if (err) { pr_warn("global-timer: can't register interrupt %d (%d)\n", gt_ppi, err); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/clocksource/bcm2835_timer.c linux-dovetail-v5.15.y-dovetail/drivers/clocksource/bcm2835_timer.c --- linux-5.15.26/drivers/clocksource/bcm2835_timer.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/clocksource/bcm2835_timer.c 2022-03-10 09:47:50.000000000 +0100 @@ -53,25 +53,33 @@ static int bcm2835_time_set_next_event(u static irqreturn_t bcm2835_time_interrupt(int irq, void *dev_id) { struct bcm2835_timer *timer = dev_id; - void (*event_handler)(struct clock_event_device *); + if (readl_relaxed(timer->control) & timer->match_mask) { writel_relaxed(timer->match_mask, timer->control); - event_handler = READ_ONCE(timer->evt.event_handler); - if (event_handler) - event_handler(&timer->evt); + clockevents_handle_event(&timer->evt); return IRQ_HANDLED; } else { return IRQ_NONE; } } +static struct clocksource_user_mmio clocksource_bcm2835 = { + .mmio.clksrc = { + .rating = 300, + .read = clocksource_mmio_readl_up, + .mask = CLOCKSOURCE_MASK(32), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + }, +}; + static int __init bcm2835_timer_init(struct device_node *node) { void __iomem *base; u32 freq; int irq, ret; struct bcm2835_timer *timer; + struct clocksource_mmio_regs mmr; base = of_iomap(node, 0); if (!base) { @@ -88,8 +96,13 @@ static int __init bcm2835_timer_init(str system_clock = base + REG_COUNTER_LO; sched_clock_register(bcm2835_sched_read, 32, freq); - clocksource_mmio_init(base + REG_COUNTER_LO, node->name, - freq, 300, 32, clocksource_mmio_readl_up); + mmr.reg_lower = base + REG_COUNTER_LO; + mmr.bits_lower = 32; + mmr.reg_upper = 0; + mmr.bits_upper = 0; + mmr.revmap = NULL; + clocksource_bcm2835.mmio.clksrc.name = node->name; + clocksource_user_mmio_init(&clocksource_bcm2835, &mmr, freq); irq = irq_of_parse_and_map(node, DEFAULT_TIMER); if (irq <= 0) { @@ -109,7 +122,7 @@ static int __init bcm2835_timer_init(str timer->match_mask = BIT(DEFAULT_TIMER); timer->evt.name = node->name; timer->evt.rating = 300; - timer->evt.features = CLOCK_EVT_FEAT_ONESHOT; + timer->evt.features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PIPELINE; timer->evt.set_next_event = bcm2835_time_set_next_event; timer->evt.cpumask = cpumask_of(0); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/clocksource/clksrc_st_lpc.c linux-dovetail-v5.15.y-dovetail/drivers/clocksource/clksrc_st_lpc.c --- linux-5.15.26/drivers/clocksource/clksrc_st_lpc.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/clocksource/clksrc_st_lpc.c 2022-03-10 09:47:50.000000000 +0100 @@ -51,7 +51,7 @@ static int __init st_clksrc_init(void) sched_clock_register(st_clksrc_sched_clock_read, 32, rate); - ret = clocksource_mmio_init(ddata.base + LPC_LPT_LSB_OFF, + ret = clocksource_user_single_mmio_init(ddata.base + LPC_LPT_LSB_OFF, "clksrc-st-lpc", rate, 300, 32, clocksource_mmio_readl_up); if (ret) { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/clocksource/dw_apb_timer.c linux-dovetail-v5.15.y-dovetail/drivers/clocksource/dw_apb_timer.c --- linux-5.15.26/drivers/clocksource/dw_apb_timer.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/clocksource/dw_apb_timer.c 2022-03-10 09:47:50.000000000 +0100 @@ -43,7 +43,7 @@ ced_to_dw_apb_ced(struct clock_event_dev static inline struct dw_apb_clocksource * clocksource_to_dw_apb_clocksource(struct clocksource *cs) { - return container_of(cs, struct dw_apb_clocksource, cs); + return container_of(cs, struct dw_apb_clocksource, ummio.mmio.clksrc); } static inline u32 apbt_readl(struct dw_apb_timer *timer, unsigned long offs) @@ -343,18 +343,6 @@ void dw_apb_clocksource_start(struct dw_ dw_apb_clocksource_read(dw_cs); } -static u64 __apbt_read_clocksource(struct clocksource *cs) -{ - u32 current_count; - struct dw_apb_clocksource *dw_cs = - clocksource_to_dw_apb_clocksource(cs); - - current_count = apbt_readl_relaxed(&dw_cs->timer, - APBTMR_N_CURRENT_VALUE); - - return (u64)~current_count; -} - static void apbt_restart_clocksource(struct clocksource *cs) { struct dw_apb_clocksource *dw_cs = @@ -376,7 +364,7 @@ static void apbt_restart_clocksource(str * dw_apb_clocksource_register() as the next step. */ struct dw_apb_clocksource * -dw_apb_clocksource_init(unsigned rating, const char *name, void __iomem *base, +__init dw_apb_clocksource_init(unsigned rating, const char *name, void __iomem *base, unsigned long freq) { struct dw_apb_clocksource *dw_cs = kzalloc(sizeof(*dw_cs), GFP_KERNEL); @@ -386,12 +374,12 @@ dw_apb_clocksource_init(unsigned rating, dw_cs->timer.base = base; dw_cs->timer.freq = freq; - dw_cs->cs.name = name; - dw_cs->cs.rating = rating; - dw_cs->cs.read = __apbt_read_clocksource; - dw_cs->cs.mask = CLOCKSOURCE_MASK(32); - dw_cs->cs.flags = CLOCK_SOURCE_IS_CONTINUOUS; - dw_cs->cs.resume = apbt_restart_clocksource; + dw_cs->ummio.mmio.clksrc.name = name; + dw_cs->ummio.mmio.clksrc.rating = rating; + dw_cs->ummio.mmio.clksrc.read = clocksource_mmio_readl_down; + dw_cs->ummio.mmio.clksrc.mask = CLOCKSOURCE_MASK(32); + dw_cs->ummio.mmio.clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS; + dw_cs->ummio.mmio.clksrc.resume = apbt_restart_clocksource; return dw_cs; } @@ -401,9 +389,17 @@ dw_apb_clocksource_init(unsigned rating, * * @dw_cs: The clocksource to register. */ -void dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs) +void __init dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs) { - clocksource_register_hz(&dw_cs->cs, dw_cs->timer.freq); + struct clocksource_mmio_regs mmr; + + mmr.reg_lower = dw_cs->timer.base + APBTMR_N_CURRENT_VALUE; + mmr.bits_lower = 32; + mmr.reg_upper = 0; + mmr.bits_upper = 0; + mmr.revmap = NULL; + + clocksource_user_mmio_init(&dw_cs->ummio, &mmr, dw_cs->timer.freq); } /** diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/clocksource/exynos_mct.c linux-dovetail-v5.15.y-dovetail/drivers/clocksource/exynos_mct.c --- linux-5.15.26/drivers/clocksource/exynos_mct.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/clocksource/exynos_mct.c 2022-03-10 09:47:50.000000000 +0100 @@ -203,23 +203,20 @@ static u32 notrace exynos4_read_count_32 return readl_relaxed(reg_base + EXYNOS4_MCT_G_CNT_L); } -static u64 exynos4_frc_read(struct clocksource *cs) -{ - return exynos4_read_count_32(); -} - static void exynos4_frc_resume(struct clocksource *cs) { exynos4_mct_frc_start(); } -static struct clocksource mct_frc = { - .name = "mct-frc", - .rating = MCT_CLKSOURCE_RATING, - .read = exynos4_frc_read, - .mask = CLOCKSOURCE_MASK(32), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .resume = exynos4_frc_resume, +static struct clocksource_user_mmio mct_frc = { + .mmio.clksrc = { + .name = "mct-frc", + .rating = MCT_CLKSOURCE_RATING, + .read = clocksource_mmio_readl_up, + .mask = CLOCKSOURCE_MASK(32), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .resume = exynos4_frc_resume, + }, }; static u64 notrace exynos4_read_sched_clock(void) @@ -240,6 +237,8 @@ static cycles_t exynos4_read_current_tim static int __init exynos4_clocksource_init(void) { + struct clocksource_mmio_regs mmr; + exynos4_mct_frc_start(); #if defined(CONFIG_ARM) @@ -248,8 +247,13 @@ static int __init exynos4_clocksource_in register_current_timer_delay(&exynos4_delay_timer); #endif - if (clocksource_register_hz(&mct_frc, clk_rate)) - panic("%s: can't register clocksource\n", mct_frc.name); + mmr.reg_upper = NULL; + mmr.reg_lower = reg_base + EXYNOS4_MCT_G_CNT_L; + mmr.bits_upper = 0; + mmr.bits_lower = 32; + mmr.revmap = NULL; + if (clocksource_user_mmio_init(&mct_frc, &mmr, clk_rate)) + panic("%s: can't register clocksource\n", mct_frc.mmio.clksrc.name); sched_clock_register(exynos4_read_sched_clock, 32, clk_rate); @@ -317,7 +321,8 @@ static int mct_set_state_periodic(struct static struct clock_event_device mct_comp_device = { .name = "mct-comp", .features = CLOCK_EVT_FEAT_PERIODIC | - CLOCK_EVT_FEAT_ONESHOT, + CLOCK_EVT_FEAT_ONESHOT | + CLOCK_EVT_FEAT_PIPELINE, .rating = 250, .set_next_event = exynos4_comp_set_next_event, .set_state_periodic = mct_set_state_periodic, @@ -333,7 +338,7 @@ static irqreturn_t exynos4_mct_comp_isr( exynos4_mct_write(0x1, EXYNOS4_MCT_G_INT_CSTAT); - evt->event_handler(evt); + clockevents_handle_event(evt); return IRQ_HANDLED; } @@ -344,7 +349,7 @@ static int exynos4_clockevent_init(void) clockevents_config_and_register(&mct_comp_device, clk_rate, 0xf, 0xffffffff); if (request_irq(mct_irqs[MCT_G0_IRQ], exynos4_mct_comp_isr, - IRQF_TIMER | IRQF_IRQPOLL, "mct_comp_irq", + IRQF_TIMER | IRQF_IRQPOLL | IRQF_OOB, "mct_comp_irq", &mct_comp_device)) pr_err("%s: request_irq() failed\n", "mct_comp_irq"); @@ -443,7 +448,7 @@ static irqreturn_t exynos4_mct_tick_isr( exynos4_mct_tick_clear(mevt); - evt->event_handler(evt); + clockevents_handle_event(evt); return IRQ_HANDLED; } @@ -466,7 +471,7 @@ static int exynos4_mct_starting_cpu(unsi evt->set_state_oneshot_stopped = set_state_shutdown; evt->tick_resume = set_state_shutdown; evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | - CLOCK_EVT_FEAT_PERCPU; + CLOCK_EVT_FEAT_PERCPU | CLOCK_EVT_FEAT_PIPELINE; evt->rating = MCT_CLKEVENTS_RATING, exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET); @@ -525,9 +530,9 @@ static int __init exynos4_timer_resource if (mct_int_type == MCT_INT_PPI) { - err = request_percpu_irq(mct_irqs[MCT_L0_IRQ], - exynos4_mct_tick_isr, "MCT", - &percpu_mct_tick); + err = __request_percpu_irq(mct_irqs[MCT_L0_IRQ], + exynos4_mct_tick_isr, IRQF_TIMER, + "MCT", &percpu_mct_tick); WARN(err, "MCT: can't request IRQ %d (%d)\n", mct_irqs[MCT_L0_IRQ], err); } else { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/clocksource/Kconfig linux-dovetail-v5.15.y-dovetail/drivers/clocksource/Kconfig --- linux-5.15.26/drivers/clocksource/Kconfig 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/clocksource/Kconfig 2022-03-10 09:47:50.000000000 +0100 @@ -25,6 +25,7 @@ config I8253_LOCK config OMAP_DM_TIMER bool select TIMER_OF + select GENERIC_CLOCKSOURCE_VDSO config CLKBLD_I8253 def_bool y if CLKSRC_I8253 || CLKEVT_I8253 || I8253_LOCK @@ -58,6 +59,8 @@ config DIGICOLOR_TIMER config DW_APB_TIMER bool "DW APB timer driver" if COMPILE_TEST + select CLKSRC_MMIO + select GENERIC_CLOCKSOURCE_VDSO if ARM help Enables the support for the dw_apb timer. @@ -356,6 +359,7 @@ config SUN50I_ERRATUM_UNKNOWN1 config ARM_GLOBAL_TIMER bool "Support for the ARM global timer" if COMPILE_TEST select TIMER_OF if OF + select GENERIC_CLOCKSOURCE_VDSO depends on ARM help This option enables support for the ARM global timer unit. @@ -419,6 +423,7 @@ config ATMEL_TCB_CLKSRC config CLKSRC_EXYNOS_MCT bool "Exynos multi core timer driver" if COMPILE_TEST depends on ARM || ARM64 + select GENERIC_CLOCKSOURCE_VDSO help Support for Multi Core Timer controller on Exynos SoCs. @@ -587,7 +592,7 @@ config H8300_TPU config CLKSRC_IMX_GPT bool "Clocksource using i.MX GPT" if COMPILE_TEST depends on (ARM || ARM64) && HAVE_CLK - select CLKSRC_MMIO + select GENERIC_CLOCKSOURCE_VDSO config CLKSRC_IMX_TPM bool "Clocksource using i.MX TPM" if COMPILE_TEST @@ -609,7 +614,7 @@ config CLKSRC_ST_LPC bool "Low power clocksource found in the LPC" if COMPILE_TEST select TIMER_OF if OF depends on HAS_IOMEM - select CLKSRC_MMIO + select GENERIC_CLOCKSOURCE_VDSO help Enable this option to use the Low Power controller timer as clocksource. diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/clocksource/mmio.c linux-dovetail-v5.15.y-dovetail/drivers/clocksource/mmio.c --- linux-5.15.26/drivers/clocksource/mmio.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/clocksource/mmio.c 2022-03-10 09:47:50.000000000 +0100 @@ -6,12 +6,31 @@ #include #include #include - -struct clocksource_mmio { - void __iomem *reg; - struct clocksource clksrc; +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct clocksource_user_mapping { + struct mm_struct *mm; + struct clocksource_user_mmio *ucs; + void *regs; + struct hlist_node link; + atomic_t refs; }; +static struct class *user_mmio_class; +static dev_t user_mmio_devt; + +static DEFINE_SPINLOCK(user_clksrcs_lock); +static unsigned int user_clksrcs_count; +static LIST_HEAD(user_clksrcs); + static inline struct clocksource_mmio *to_mmio_clksrc(struct clocksource *c) { return container_of(c, struct clocksource_mmio, clksrc); @@ -37,6 +56,53 @@ u64 clocksource_mmio_readw_down(struct c return ~(u64)readw_relaxed(to_mmio_clksrc(c)->reg) & c->mask; } +static inline struct clocksource_user_mmio * +to_mmio_ucs(struct clocksource *c) +{ + return container_of(c, struct clocksource_user_mmio, mmio.clksrc); +} + +u64 clocksource_dual_mmio_readl_up(struct clocksource *c) +{ + struct clocksource_user_mmio *ucs = to_mmio_ucs(c); + u32 upper, old_upper, lower; + + upper = readl_relaxed(ucs->reg_upper); + do { + old_upper = upper; + lower = readl_relaxed(ucs->mmio.reg); + upper = readl_relaxed(ucs->reg_upper); + } while (upper != old_upper); + + return (((u64)upper) << ucs->bits_lower) | lower; +} + +u64 clocksource_dual_mmio_readw_up(struct clocksource *c) +{ + struct clocksource_user_mmio *ucs = to_mmio_ucs(c); + u16 upper, old_upper, lower; + + upper = readw_relaxed(ucs->reg_upper); + do { + old_upper = upper; + lower = readw_relaxed(ucs->mmio.reg); + upper = readw_relaxed(ucs->reg_upper); + } while (upper != old_upper); + + return (((u64)upper) << ucs->bits_lower) | lower; +} + +static void mmio_base_init(const char *name,int rating, unsigned int bits, + u64 (*read)(struct clocksource *), + struct clocksource *cs) +{ + cs->name = name; + cs->rating = rating; + cs->read = read; + cs->mask = CLOCKSOURCE_MASK(bits); + cs->flags = CLOCK_SOURCE_IS_CONTINUOUS; +} + /** * clocksource_mmio_init - Initialize a simple mmio based clocksource * @base: Virtual address of the clock readout register @@ -51,6 +117,7 @@ int __init clocksource_mmio_init(void __ u64 (*read)(struct clocksource *)) { struct clocksource_mmio *cs; + int err; if (bits > 64 || bits < 16) return -EINVAL; @@ -60,11 +127,428 @@ int __init clocksource_mmio_init(void __ return -ENOMEM; cs->reg = base; - cs->clksrc.name = name; - cs->clksrc.rating = rating; - cs->clksrc.read = read; - cs->clksrc.mask = CLOCKSOURCE_MASK(bits); - cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS; + mmio_base_init(name, rating, bits, read, &cs->clksrc); + + err = clocksource_register_hz(&cs->clksrc, hz); + if (err < 0) { + kfree(cs); + return err; + } + + return err; +} + +static void mmio_ucs_vmopen(struct vm_area_struct *vma) +{ + struct clocksource_user_mapping *mapping, *clone; + struct clocksource_user_mmio *ucs; + unsigned long h_key; + + mapping = vma->vm_private_data; + + if (mapping->mm == vma->vm_mm) { + atomic_inc(&mapping->refs); + } else if (mapping->mm) { + /* + * We must be duplicating the original mm upon fork(), + * clone the parent ucs mapping struct then rehash it + * on the child mm key. If we cannot get memory for + * this, mitigate the issue for users by preventing a + * stale parent mm from being matched later on by a + * process which reused its mm_struct (h_key is based + * on this struct address). + */ + clone = kmalloc(sizeof(*mapping), GFP_KERNEL); + if (clone == NULL) { + pr_alert("out-of-memory for UCS mapping!\n"); + atomic_inc(&mapping->refs); + mapping->mm = NULL; + return; + } + ucs = mapping->ucs; + clone->mm = vma->vm_mm; + clone->ucs = ucs; + clone->regs = mapping->regs; + atomic_set(&clone->refs, 1); + vma->vm_private_data = clone; + h_key = (unsigned long)vma->vm_mm / sizeof(*vma->vm_mm); + spin_lock(&ucs->lock); + hash_add(ucs->mappings, &clone->link, h_key); + spin_unlock(&ucs->lock); + } +} + +static void mmio_ucs_vmclose(struct vm_area_struct *vma) +{ + struct clocksource_user_mapping *mapping; + + mapping = vma->vm_private_data; + + if (atomic_dec_and_test(&mapping->refs)) { + spin_lock(&mapping->ucs->lock); + hash_del(&mapping->link); + spin_unlock(&mapping->ucs->lock); + kfree(mapping); + } +} + +static const struct vm_operations_struct mmio_ucs_vmops = { + .open = mmio_ucs_vmopen, + .close = mmio_ucs_vmclose, +}; + +static int mmio_ucs_mmap(struct file *file, struct vm_area_struct *vma) +{ + unsigned long addr, upper_pfn, lower_pfn; + struct clocksource_user_mapping *mapping, *tmp; + struct clocksource_user_mmio *ucs; + unsigned int bits_upper; + unsigned long h_key; + pgprot_t prot; + size_t pages; + int err; + + pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + if (pages > 2) + return -EINVAL; + + vma->vm_private_data = NULL; + + ucs = file->private_data; + upper_pfn = ucs->phys_upper >> PAGE_SHIFT; + lower_pfn = ucs->phys_lower >> PAGE_SHIFT; + bits_upper = fls(ucs->mmio.clksrc.mask) - ucs->bits_lower; + if (pages == 2 && (!bits_upper || upper_pfn == lower_pfn)) + return -EINVAL; + + mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); + if (!mapping) + return -ENOSPC; + + mapping->mm = vma->vm_mm; + mapping->ucs = ucs; + mapping->regs = (void *)vma->vm_start; + atomic_set(&mapping->refs, 1); + + vma->vm_private_data = mapping; + vma->vm_ops = &mmio_ucs_vmops; + prot = pgprot_noncached(vma->vm_page_prot); + addr = vma->vm_start; + + err = remap_pfn_range(vma, addr, lower_pfn, PAGE_SIZE, prot); + if (err < 0) + goto fail; + + if (pages > 1) { + addr += PAGE_SIZE; + err = remap_pfn_range(vma, addr, upper_pfn, PAGE_SIZE, prot); + if (err < 0) + goto fail; + } + + h_key = (unsigned long)vma->vm_mm / sizeof(*vma->vm_mm); + + spin_lock(&ucs->lock); + hash_for_each_possible(ucs->mappings, tmp, link, h_key) { + if (tmp->mm == vma->vm_mm) { + spin_unlock(&ucs->lock); + err = -EBUSY; + goto fail; + } + } + hash_add(ucs->mappings, &mapping->link, h_key); + spin_unlock(&ucs->lock); + + return 0; +fail: + kfree(mapping); + + return err; +} + +static long +mmio_ucs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct clocksource_user_mapping *mapping; + struct clksrc_user_mmio_info __user *u; + unsigned long upper_pfn, lower_pfn; + struct clksrc_user_mmio_info info; + struct clocksource_user_mmio *ucs; + unsigned int bits_upper; + void __user *map_base; + unsigned long h_key; + size_t size; + + u = (struct clksrc_user_mmio_info __user *)arg; + + switch (cmd) { + case CLKSRC_USER_MMIO_MAP: + break; + default: + return -ENOTTY; + } + + h_key = (unsigned long)current->mm / sizeof(*current->mm); + + ucs = file->private_data; + upper_pfn = ucs->phys_upper >> PAGE_SHIFT; + lower_pfn = ucs->phys_lower >> PAGE_SHIFT; + bits_upper = fls(ucs->mmio.clksrc.mask) - ucs->bits_lower; + size = PAGE_SIZE; + if (bits_upper && upper_pfn != lower_pfn) + size += PAGE_SIZE; + + do { + spin_lock(&ucs->lock); + hash_for_each_possible(ucs->mappings, mapping, link, h_key) { + if (mapping->mm == current->mm) { + spin_unlock(&ucs->lock); + map_base = mapping->regs; + goto found; + } + } + spin_unlock(&ucs->lock); + + map_base = (void *) + vm_mmap(file, 0, size, PROT_READ, MAP_SHARED, 0); + } while (IS_ERR(map_base) && PTR_ERR(map_base) == -EBUSY); + + if (IS_ERR(map_base)) + return PTR_ERR(map_base); + +found: + info.type = ucs->type; + info.reg_lower = map_base + offset_in_page(ucs->phys_lower); + info.mask_lower = ucs->mmio.clksrc.mask; + info.bits_lower = ucs->bits_lower; + info.reg_upper = NULL; + if (ucs->phys_upper) + info.reg_upper = map_base + (size - PAGE_SIZE) + + offset_in_page(ucs->phys_upper); + info.mask_upper = ucs->mask_upper; + + return copy_to_user(u, &info, sizeof(*u)); +} + +static int mmio_ucs_open(struct inode *inode, struct file *file) +{ + struct clocksource_user_mmio *ucs; + + if (file->f_mode & FMODE_WRITE) + return -EINVAL; + + ucs = container_of(inode->i_cdev, typeof(*ucs), cdev); + file->private_data = ucs; + + return 0; +} + +static const struct file_operations mmio_ucs_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = mmio_ucs_ioctl, + .open = mmio_ucs_open, + .mmap = mmio_ucs_mmap, +}; + +static int __init +ucs_create_cdev(struct class *class, struct clocksource_user_mmio *ucs) +{ + int err; + + ucs->dev = device_create(class, NULL, + MKDEV(MAJOR(user_mmio_devt), ucs->id), + ucs, "ucs/%d", ucs->id); + if (IS_ERR(ucs->dev)) + return PTR_ERR(ucs->dev); + + spin_lock_init(&ucs->lock); + hash_init(ucs->mappings); + + cdev_init(&ucs->cdev, &mmio_ucs_fops); + ucs->cdev.kobj.parent = &ucs->dev->kobj; + + err = cdev_add(&ucs->cdev, ucs->dev->devt, 1); + if (err < 0) + goto err_device_destroy; + + return 0; + +err_device_destroy: + device_destroy(class, MKDEV(MAJOR(user_mmio_devt), ucs->id)); + return err; +} + +static unsigned long default_revmap(void *virt) +{ + struct vm_struct *vm; + + vm = find_vm_area(virt); + if (!vm) + return 0; + + return vm->phys_addr + (virt - vm->addr); +} + +int __init clocksource_user_mmio_init(struct clocksource_user_mmio *ucs, + const struct clocksource_mmio_regs *regs, + unsigned long hz) +{ + static u64 (*user_types[CLKSRC_MMIO_TYPE_NR])(struct clocksource *) = { + [CLKSRC_MMIO_L_UP] = clocksource_mmio_readl_up, + [CLKSRC_MMIO_L_DOWN] = clocksource_mmio_readl_down, + [CLKSRC_DMMIO_L_UP] = clocksource_dual_mmio_readl_up, + [CLKSRC_MMIO_W_UP] = clocksource_mmio_readw_up, + [CLKSRC_MMIO_W_DOWN] = clocksource_mmio_readw_down, + [CLKSRC_DMMIO_W_UP] = clocksource_dual_mmio_readw_up, + }; + const char *name = ucs->mmio.clksrc.name; + unsigned long phys_upper = 0, phys_lower; + enum clksrc_user_mmio_type type; + unsigned long (*revmap)(void *); + int err; - return clocksource_register_hz(&cs->clksrc, hz); + if (regs->bits_lower > 32 || regs->bits_lower < 16 || + regs->bits_upper > 32) + return -EINVAL; + + for (type = 0; type < ARRAY_SIZE(user_types); type++) + if (ucs->mmio.clksrc.read == user_types[type]) + break; + + if (type == ARRAY_SIZE(user_types)) + return -EINVAL; + + if (!(ucs->mmio.clksrc.flags & CLOCK_SOURCE_IS_CONTINUOUS)) + return -EINVAL; + + revmap = regs->revmap; + if (!revmap) + revmap = default_revmap; + + phys_lower = revmap(regs->reg_lower); + if (!phys_lower) + return -EINVAL; + + if (regs->bits_upper) { + phys_upper = revmap(regs->reg_upper); + if (!phys_upper) + return -EINVAL; + } + + ucs->mmio.reg = regs->reg_lower; + ucs->type = type; + ucs->bits_lower = regs->bits_lower; + ucs->reg_upper = regs->reg_upper; + ucs->mask_lower = CLOCKSOURCE_MASK(regs->bits_lower); + ucs->mask_upper = CLOCKSOURCE_MASK(regs->bits_upper); + ucs->phys_lower = phys_lower; + ucs->phys_upper = phys_upper; + spin_lock_init(&ucs->lock); + + err = clocksource_register_hz(&ucs->mmio.clksrc, hz); + if (err < 0) + return err; + + spin_lock(&user_clksrcs_lock); + + ucs->id = user_clksrcs_count++; + if (ucs->id < CLKSRC_USER_MMIO_MAX) + list_add_tail(&ucs->link, &user_clksrcs); + + spin_unlock(&user_clksrcs_lock); + + if (ucs->id >= CLKSRC_USER_MMIO_MAX) { + pr_warn("%s: Too many clocksources\n", name); + err = -EAGAIN; + goto fail; + } + + ucs->mmio.clksrc.vdso_type = CLOCKSOURCE_VDSO_MMIO + ucs->id; + + if (user_mmio_class) { + err = ucs_create_cdev(user_mmio_class, ucs); + if (err < 0) { + pr_warn("%s: Failed to add character device\n", name); + goto fail; + } + } + + return 0; + +fail: + clocksource_unregister(&ucs->mmio.clksrc); + + return err; +} + +int __init clocksource_user_single_mmio_init( + void __iomem *base, const char *name, + unsigned long hz, int rating, unsigned int bits, + u64 (*read)(struct clocksource *)) +{ + struct clocksource_user_mmio *ucs; + struct clocksource_mmio_regs regs; + int ret; + + ucs = kzalloc(sizeof(*ucs), GFP_KERNEL); + if (!ucs) + return -ENOMEM; + + mmio_base_init(name, rating, bits, read, &ucs->mmio.clksrc); + regs.reg_lower = base; + regs.reg_upper = NULL; + regs.bits_lower = bits; + regs.bits_upper = 0; + regs.revmap = NULL; + + ret = clocksource_user_mmio_init(ucs, ®s, hz); + if (ret) + kfree(ucs); + + return ret; +} + +static int __init mmio_clksrc_chr_dev_init(void) +{ + struct clocksource_user_mmio *ucs; + struct class *class; + int err; + + class = class_create(THIS_MODULE, "mmio_ucs"); + if (IS_ERR(class)) { + pr_err("couldn't create user mmio clocksources class\n"); + return PTR_ERR(class); + } + + err = alloc_chrdev_region(&user_mmio_devt, 0, CLKSRC_USER_MMIO_MAX, + "mmio_ucs"); + if (err < 0) { + pr_err("failed to allocate user mmio clocksources character devivces region\n"); + goto err_class_destroy; + } + + /* + * Calling list_for_each_entry is safe here: clocksources are always + * added to the list tail, never removed. + */ + spin_lock(&user_clksrcs_lock); + list_for_each_entry(ucs, &user_clksrcs, link) { + spin_unlock(&user_clksrcs_lock); + + err = ucs_create_cdev(class, ucs); + if (err < 0) + pr_err("%s: Failed to add character device\n", + ucs->mmio.clksrc.name); + + spin_lock(&user_clksrcs_lock); + } + user_mmio_class = class; + spin_unlock(&user_clksrcs_lock); + + return 0; + +err_class_destroy: + class_destroy(class); + return err; } +device_initcall(mmio_clksrc_chr_dev_init); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/clocksource/timer-imx-gpt.c linux-dovetail-v5.15.y-dovetail/drivers/clocksource/timer-imx-gpt.c --- linux-5.15.26/drivers/clocksource/timer-imx-gpt.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/clocksource/timer-imx-gpt.c 2022-03-10 09:47:50.000000000 +0100 @@ -163,8 +163,8 @@ static int __init mxc_clocksource_init(s sched_clock_reg = reg; sched_clock_register(mxc_read_sched_clock, 32, c); - return clocksource_mmio_init(reg, "mxc_timer1", c, 200, 32, - clocksource_mmio_readl_up); + return clocksource_user_single_mmio_init(reg, "mxc_timer1", c, 200, 32, + clocksource_mmio_readl_up); } /* clock event */ @@ -264,7 +264,7 @@ static irqreturn_t mxc_timer_interrupt(i imxtm->gpt->gpt_irq_acknowledge(imxtm); - ced->event_handler(ced); + clockevents_handle_event(ced); return IRQ_HANDLED; } @@ -274,7 +274,7 @@ static int __init mxc_clockevent_init(st struct clock_event_device *ced = &imxtm->ced; ced->name = "mxc_timer1"; - ced->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ; + ced->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ | CLOCK_EVT_FEAT_PIPELINE; ced->set_state_shutdown = mxc_shutdown; ced->set_state_oneshot = mxc_set_oneshot; ced->tick_resume = mxc_shutdown; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/clocksource/timer-sun4i.c linux-dovetail-v5.15.y-dovetail/drivers/clocksource/timer-sun4i.c --- linux-5.15.26/drivers/clocksource/timer-sun4i.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/clocksource/timer-sun4i.c 2022-03-10 09:47:50.000000000 +0100 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -135,7 +136,7 @@ static irqreturn_t sun4i_timer_interrupt struct timer_of *to = to_timer_of(evt); sun4i_timer_clear_interrupt(timer_of_base(to)); - evt->event_handler(evt); + clockevents_handle_event(evt); return IRQ_HANDLED; } @@ -146,7 +147,7 @@ static struct timer_of to = { .clkevt = { .name = "sun4i_tick", .rating = 350, - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PIPELINE, .set_state_shutdown = sun4i_clkevt_shutdown, .set_state_periodic = sun4i_clkevt_set_periodic, .set_state_oneshot = sun4i_clkevt_set_oneshot, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/clocksource/timer-ti-dm-systimer.c linux-dovetail-v5.15.y-dovetail/drivers/clocksource/timer-ti-dm-systimer.c --- linux-5.15.26/drivers/clocksource/timer-ti-dm-systimer.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/clocksource/timer-ti-dm-systimer.c 2022-03-10 09:47:50.000000000 +0100 @@ -57,7 +57,7 @@ struct dmtimer_clockevent { }; struct dmtimer_clocksource { - struct clocksource dev; + struct clocksource_user_mmio mmio; struct dmtimer_systimer t; unsigned int loadval; }; @@ -438,7 +438,7 @@ static irqreturn_t dmtimer_clockevent_in struct dmtimer_systimer *t = &clkevt->t; writel_relaxed(OMAP_TIMER_INT_OVERFLOW, t->base + t->irq_stat); - clkevt->dev.event_handler(&clkevt->dev); + clockevents_handle_event(&clkevt->dev); return IRQ_HANDLED; } @@ -549,7 +549,7 @@ static int __init dmtimer_clkevt_init_co * We mostly use cpuidle_coupled with ARM local timers for runtime, * so there's probably no use for CLOCK_EVT_FEAT_DYNIRQ here. */ - dev->features = features; + dev->features = features | CLOCK_EVT_FEAT_PIPELINE; dev->rating = rating; dev->set_next_event = dmtimer_set_next_event; dev->set_state_shutdown = dmtimer_clockevent_shutdown; @@ -707,15 +707,7 @@ static int __init dmtimer_percpu_quirk_i static struct dmtimer_clocksource * to_dmtimer_clocksource(struct clocksource *cs) { - return container_of(cs, struct dmtimer_clocksource, dev); -} - -static u64 dmtimer_clocksource_read_cycles(struct clocksource *cs) -{ - struct dmtimer_clocksource *clksrc = to_dmtimer_clocksource(cs); - struct dmtimer_systimer *t = &clksrc->t; - - return (u64)readl_relaxed(t->base + t->counter); + return container_of(cs, struct dmtimer_clocksource, mmio.mmio.clksrc); } static void __iomem *dmtimer_sched_clock_counter; @@ -754,6 +746,7 @@ static void dmtimer_clocksource_resume(s static int __init dmtimer_clocksource_init(struct device_node *np) { struct dmtimer_clocksource *clksrc; + struct clocksource_mmio_regs mmr; struct dmtimer_systimer *t; struct clocksource *dev; int error; @@ -762,7 +755,7 @@ static int __init dmtimer_clocksource_in if (!clksrc) return -ENOMEM; - dev = &clksrc->dev; + dev = &clksrc->mmio.mmio.clksrc; t = &clksrc->t; error = dmtimer_systimer_setup(np, t); @@ -771,7 +764,7 @@ static int __init dmtimer_clocksource_in dev->name = "dmtimer"; dev->rating = 300; - dev->read = dmtimer_clocksource_read_cycles; + dev->read = clocksource_mmio_readl_up, dev->mask = CLOCKSOURCE_MASK(32); dev->flags = CLOCK_SOURCE_IS_CONTINUOUS; @@ -794,7 +787,13 @@ static int __init dmtimer_clocksource_in sched_clock_register(dmtimer_read_sched_clock, 32, t->rate); } - if (clocksource_register_hz(dev, t->rate)) + mmr.reg_lower = t->base + t->counter; + mmr.bits_lower = 32; + mmr.reg_upper = 0; + mmr.bits_upper = 0; + mmr.revmap = NULL; + + if (clocksource_user_mmio_init(&clksrc->mmio, &mmr, t->rate)) pr_err("Could not register clocksource %pOF\n", np); return 0; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/cpuidle/cpuidle.c linux-dovetail-v5.15.y-dovetail/drivers/cpuidle/cpuidle.c --- linux-5.15.26/drivers/cpuidle/cpuidle.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/cpuidle/cpuidle.c 2022-03-10 09:47:50.000000000 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -207,6 +208,22 @@ int cpuidle_enter_state(struct cpuidle_d ktime_t time_start, time_end; /* + * A companion core running on the oob stage of the IRQ + * pipeline may deny switching to a deeper C-state. If so, + * call the default idle routine instead. If the core cannot + * bear with the latency induced by the default idling + * operation, then CPUIDLE is not usable and should be + * disabled at build time. The in-band stage is currently + * stalled, hard irqs are on. irq_cpuidle_enter() leaves us + * stalled but returns with hard irqs off so that no event may + * sneak in until we actually go idle. + */ + if (!irq_cpuidle_enter(dev, target_state)) { + default_idle_call(); + return -EBUSY; + } + + /* * Tell the time framework to switch to a broadcast timer because our * local timer will be shut down. If a local timer is used from another * CPU as a broadcast timer, this call may fail if it is not available. @@ -235,6 +252,7 @@ int cpuidle_enter_state(struct cpuidle_d if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) rcu_idle_enter(); entered_state = target_state->enter(dev, drv, index); + hard_cond_local_irq_enable(); if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) rcu_idle_exit(); start_critical_timings(); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/cpuidle/poll_state.c linux-dovetail-v5.15.y-dovetail/drivers/cpuidle/poll_state.c --- linux-5.15.26/drivers/cpuidle/poll_state.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/cpuidle/poll_state.c 2022-03-10 09:47:50.000000000 +0100 @@ -17,7 +17,7 @@ static int __cpuidle poll_idle(struct cp dev->poll_time_limit = false; - local_irq_enable(); + local_irq_enable_full(); if (!current_set_polling_and_test()) { unsigned int loop_count = 0; u64 limit; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/dma/bcm2835-dma.c linux-dovetail-v5.15.y-dovetail/drivers/dma/bcm2835-dma.c --- linux-5.15.26/drivers/dma/bcm2835-dma.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/dma/bcm2835-dma.c 2022-03-10 09:47:50.000000000 +0100 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -435,10 +436,20 @@ static void bcm2835_dma_abort(struct bcm writel(BCM2835_DMA_RESET, chan_base + BCM2835_DMA_CS); } +static inline void bcm2835_dma_enable_channel(struct bcm2835_chan *c) +{ + writel(c->desc->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR); + writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS); +} + +static inline bool bcm2835_dma_oob_capable(void) +{ + return IS_ENABLED(CONFIG_DMA_BCM2835_OOB); +} + static void bcm2835_dma_start_desc(struct bcm2835_chan *c) { struct virt_dma_desc *vd = vchan_next_desc(&c->vc); - struct bcm2835_desc *d; if (!vd) { c->desc = NULL; @@ -447,10 +458,41 @@ static void bcm2835_dma_start_desc(struc list_del(&vd->node); - c->desc = d = to_bcm2835_dma_desc(&vd->tx); + c->desc = to_bcm2835_dma_desc(&vd->tx); + if (!bcm2835_dma_oob_capable() || !vchan_oob_pulsed(vd)) + bcm2835_dma_enable_channel(c); +} + +static bool do_channel(struct bcm2835_chan *c, struct bcm2835_desc *d) +{ + struct dmaengine_desc_callback cb; + + if (running_oob()) { + if (!vchan_oob_handled(&d->vd)) + return false; + dmaengine_desc_get_callback(&d->vd.tx, &cb); + if (dmaengine_desc_callback_valid(&cb)) { + vchan_unlock(&c->vc); + dmaengine_desc_callback_invoke(&cb, NULL); + vchan_lock(&c->vc); + } + return true; + } - writel(d->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR); - writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS); + if (d->cyclic) { + /* call the cyclic callback */ + vchan_cyclic_callback(&d->vd); + } else if (!readl(c->chan_base + BCM2835_DMA_ADDR)) { + vchan_cookie_complete(&c->desc->vd); + bcm2835_dma_start_desc(c); + } + + return true; +} + +static inline bool is_base_irq_handler(void) +{ + return !bcm2835_dma_oob_capable() || running_oob(); } static irqreturn_t bcm2835_dma_callback(int irq, void *data) @@ -460,7 +502,7 @@ static irqreturn_t bcm2835_dma_callback( unsigned long flags; /* check the shared interrupt */ - if (c->irq_flags & IRQF_SHARED) { + if (is_base_irq_handler() && c->irq_flags & IRQF_SHARED) { /* check if the interrupt is enabled */ flags = readl(c->chan_base + BCM2835_DMA_CS); /* if not set then we are not the reason for the irq */ @@ -468,7 +510,8 @@ static irqreturn_t bcm2835_dma_callback( return IRQ_NONE; } - spin_lock_irqsave(&c->vc.lock, flags); + /* CAUTION: If running in-band, hard irqs are on. */ + vchan_lock_irqsave(&c->vc, flags); /* * Clear the INT flag to receive further interrupts. Keep the channel @@ -477,22 +520,27 @@ static irqreturn_t bcm2835_dma_callback( * if this IRQ handler is threaded.) If the channel is finished, it * will remain idle despite the ACTIVE flag being set. */ - writel(BCM2835_DMA_INT | BCM2835_DMA_ACTIVE, - c->chan_base + BCM2835_DMA_CS); + if (is_base_irq_handler()) + writel(BCM2835_DMA_INT | BCM2835_DMA_ACTIVE, + c->chan_base + BCM2835_DMA_CS); d = c->desc; + if (!d) + goto out; - if (d) { - if (d->cyclic) { - /* call the cyclic callback */ - vchan_cyclic_callback(&d->vd); - } else if (!readl(c->chan_base + BCM2835_DMA_ADDR)) { - vchan_cookie_complete(&c->desc->vd); - bcm2835_dma_start_desc(c); - } + if (bcm2835_dma_oob_capable() && running_oob()) { + /* + * If we cannot process this from the out-of-band + * stage, schedule a callback from in-band context. + */ + if (!do_channel(c, d)) + irq_post_inband(irq); + } else { + do_channel(c, d); } - spin_unlock_irqrestore(&c->vc.lock, flags); +out: + vchan_unlock_irqrestore(&c->vc, flags); return IRQ_HANDLED; } @@ -571,7 +619,7 @@ static enum dma_status bcm2835_dma_tx_st if (ret == DMA_COMPLETE || !txstate) return ret; - spin_lock_irqsave(&c->vc.lock, flags); + vchan_lock_irqsave(&c->vc, flags); vd = vchan_find_desc(&c->vc, cookie); if (vd) { txstate->residue = @@ -592,7 +640,7 @@ static enum dma_status bcm2835_dma_tx_st txstate->residue = 0; } - spin_unlock_irqrestore(&c->vc.lock, flags); + vchan_unlock_irqrestore(&c->vc, flags); return ret; } @@ -602,13 +650,36 @@ static void bcm2835_dma_issue_pending(st struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); unsigned long flags; - spin_lock_irqsave(&c->vc.lock, flags); + vchan_lock_irqsave(&c->vc, flags); if (vchan_issue_pending(&c->vc) && !c->desc) bcm2835_dma_start_desc(c); - spin_unlock_irqrestore(&c->vc.lock, flags); + vchan_unlock_irqrestore(&c->vc, flags); } +#ifdef CONFIG_DMA_BCM2835_OOB +static int bcm2835_dma_pulse_oob(struct dma_chan *chan) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + unsigned long flags; + int ret = -EIO; + + vchan_lock_irqsave(&c->vc, flags); + if (c->desc && vchan_oob_pulsed(&c->desc->vd)) { + bcm2835_dma_enable_channel(c); + ret = 0; + } + vchan_unlock_irqrestore(&c->vc, flags); + + return ret; +} +#else +static int bcm2835_dma_pulse_oob(struct dma_chan *chan) +{ + return -ENOTSUPP; +} +#endif + static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_memcpy( struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, size_t len, unsigned long flags) @@ -650,6 +721,15 @@ static struct dma_async_tx_descriptor *b u32 extra = BCM2835_DMA_INT_EN; size_t frames; + if (!bcm2835_dma_oob_capable()) { + if (flags & (DMA_OOB_INTERRUPT|DMA_OOB_PULSE)) { + dev_err(chan->device->dev, + "%s: out-of-band slave transfers disabled\n", + __func__); + return NULL; + } + } + if (!is_slave_direction(direction)) { dev_err(chan->device->dev, "%s: bad direction?\n", __func__); @@ -715,7 +795,21 @@ static struct dma_async_tx_descriptor *b return NULL; } - if (flags & DMA_PREP_INTERRUPT) + if (!bcm2835_dma_oob_capable()) { + if (flags & DMA_OOB_INTERRUPT) { + dev_err(chan->device->dev, + "%s: out-of-band cyclic transfers disabled\n", + __func__); + return NULL; + } + } else if (flags & DMA_OOB_PULSE) { + dev_err(chan->device->dev, + "%s: no pulse mode with out-of-band cyclic transfers\n", + __func__); + return NULL; + } + + if (flags & (DMA_PREP_INTERRUPT|DMA_OOB_INTERRUPT)) extra |= BCM2835_DMA_INT_EN; else period_len = buf_len; @@ -791,7 +885,7 @@ static int bcm2835_dma_terminate_all(str unsigned long flags; LIST_HEAD(head); - spin_lock_irqsave(&c->vc.lock, flags); + vchan_lock_irqsave(&c->vc, flags); /* stop DMA activity */ if (c->desc) { @@ -801,7 +895,7 @@ static int bcm2835_dma_terminate_all(str } vchan_get_all_descriptors(&c->vc, &head); - spin_unlock_irqrestore(&c->vc.lock, flags); + vchan_unlock_irqrestore(&c->vc, flags); vchan_dma_desc_free_list(&c->vc, &head); return 0; @@ -912,11 +1006,13 @@ static int bcm2835_dma_probe(struct plat dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); dma_cap_set(DMA_PRIVATE, od->ddev.cap_mask); dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask); + dma_cap_set(DMA_OOB, od->ddev.cap_mask); dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask); od->ddev.device_alloc_chan_resources = bcm2835_dma_alloc_chan_resources; od->ddev.device_free_chan_resources = bcm2835_dma_free_chan_resources; od->ddev.device_tx_status = bcm2835_dma_tx_status; od->ddev.device_issue_pending = bcm2835_dma_issue_pending; + od->ddev.device_pulse_oob = bcm2835_dma_pulse_oob; od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic; od->ddev.device_prep_slave_sg = bcm2835_dma_prep_slave_sg; od->ddev.device_prep_dma_memcpy = bcm2835_dma_prep_dma_memcpy; @@ -982,10 +1078,10 @@ static int bcm2835_dma_probe(struct plat continue; /* check if there are other channels that also use this irq */ - irq_flags = 0; + irq_flags = IS_ENABLED(CONFIG_DMA_BCM2835_OOB) ? IRQF_OOB : 0; for (j = 0; j <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; j++) if ((i != j) && (irq[j] == irq[i])) { - irq_flags = IRQF_SHARED; + irq_flags |= IRQF_SHARED; break; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/dma/dmaengine.c linux-dovetail-v5.15.y-dovetail/drivers/dma/dmaengine.c --- linux-5.15.26/drivers/dma/dmaengine.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/dma/dmaengine.c 2022-03-10 09:47:50.000000000 +0100 @@ -578,7 +578,8 @@ int dma_get_slave_caps(struct dma_chan * /* check if the channel supports slave transactions */ if (!(test_bit(DMA_SLAVE, device->cap_mask.bits) || - test_bit(DMA_CYCLIC, device->cap_mask.bits))) + test_bit(DMA_CYCLIC, device->cap_mask.bits) || + test_bit(DMA_OOB, device->cap_mask.bits))) return -ENXIO; /* @@ -1209,6 +1210,13 @@ int dma_async_device_register(struct dma return -EIO; } + if (dma_has_cap(DMA_OOB, device->cap_mask) && !device->device_pulse_oob) { + dev_err(device->dev, + "Device claims capability %s, but pulse handler is not defined\n", + "DMA_OOB"); + return -EIO; + } + if (dma_has_cap(DMA_INTERLEAVE, device->cap_mask) && !device->device_prep_interleaved_dma) { dev_err(device->dev, "Device claims capability %s, but op is not defined\n", diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/dma/imx-sdma.c linux-dovetail-v5.15.y-dovetail/drivers/dma/imx-sdma.c --- linux-5.15.26/drivers/dma/imx-sdma.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/dma/imx-sdma.c 2022-03-10 09:47:50.000000000 +0100 @@ -512,6 +512,10 @@ struct sdma_engine { /* clock ratio for AHB:SDMA core. 1:1 is 1, 2:1 is 0*/ bool clk_ratio; bool fw_loaded; +#ifdef CONFIG_IMX_SDMA_OOB + hard_spinlock_t oob_lock; + u32 pending_stat; +#endif }; static int sdma_config_write(struct dma_chan *chan, @@ -793,6 +797,11 @@ static struct sdma_desc *to_sdma_desc(st return container_of(t, struct sdma_desc, vd.tx); } +static inline bool sdma_oob_capable(void) +{ + return IS_ENABLED(CONFIG_IMX_SDMA_OOB); +} + static void sdma_start_desc(struct sdma_channel *sdmac) { struct virt_dma_desc *vd = vchan_next_desc(&sdmac->vc); @@ -810,7 +819,8 @@ static void sdma_start_desc(struct sdma_ sdma->channel_control[channel].base_bd_ptr = desc->bd_phys; sdma->channel_control[channel].current_bd_ptr = desc->bd_phys; - sdma_enable_channel(sdma, sdmac->channel); + if (!sdma_oob_capable() || !vchan_oob_pulsed(vd)) + sdma_enable_channel(sdma, sdmac->channel); } static void sdma_update_channel_loop(struct sdma_channel *sdmac) @@ -854,9 +864,9 @@ static void sdma_update_channel_loop(str * SDMA transaction status by the time the client tasklet is * executed. */ - spin_unlock(&sdmac->vc.lock); + vchan_unlock(&sdmac->vc); dmaengine_desc_get_callback_invoke(&desc->vd.tx, NULL); - spin_lock(&sdmac->vc.lock); + vchan_lock(&sdmac->vc); if (error) sdmac->status = old_status; @@ -866,20 +876,21 @@ static void sdma_update_channel_loop(str static void mxc_sdma_handle_channel_normal(struct sdma_channel *data) { struct sdma_channel *sdmac = (struct sdma_channel *) data; + struct sdma_desc *desc = sdmac->desc; struct sdma_buffer_descriptor *bd; int i, error = 0; - sdmac->desc->chn_real_count = 0; + desc->chn_real_count = 0; /* * non loop mode. Iterate over all descriptors, collect * errors and call callback function */ - for (i = 0; i < sdmac->desc->num_bd; i++) { - bd = &sdmac->desc->bd[i]; + for (i = 0; i < desc->num_bd; i++) { + bd = &desc->bd[i]; if (bd->mode.status & (BD_DONE | BD_RROR)) error = -EIO; - sdmac->desc->chn_real_count += bd->mode.count; + desc->chn_real_count += bd->mode.count; } if (error) @@ -888,37 +899,84 @@ static void mxc_sdma_handle_channel_norm sdmac->status = DMA_COMPLETE; } -static irqreturn_t sdma_int_handler(int irq, void *dev_id) +static unsigned long sdma_do_channels(struct sdma_engine *sdma, + unsigned long stat) { - struct sdma_engine *sdma = dev_id; - unsigned long stat; + unsigned long mask = stat; - stat = readl_relaxed(sdma->regs + SDMA_H_INTR); - writel_relaxed(stat, sdma->regs + SDMA_H_INTR); - /* channel 0 is special and not handled here, see run_channel0() */ - stat &= ~1; - - while (stat) { - int channel = fls(stat) - 1; + while (mask) { + int channel = fls(mask) - 1; struct sdma_channel *sdmac = &sdma->channel[channel]; struct sdma_desc *desc; - spin_lock(&sdmac->vc.lock); + vchan_lock(&sdmac->vc); desc = sdmac->desc; if (desc) { + if (running_oob() && !vchan_oob_handled(&desc->vd)) + goto next; if (sdmac->flags & IMX_DMA_SG_LOOP) { sdma_update_channel_loop(sdmac); } else { mxc_sdma_handle_channel_normal(sdmac); + if (running_oob()) { + vchan_unlock(&sdmac->vc); + dmaengine_desc_get_callback_invoke(&desc->vd.tx, NULL); + __clear_bit(channel, &stat); + goto next_unlocked; + } vchan_cookie_complete(&desc->vd); sdma_start_desc(sdmac); } } - - spin_unlock(&sdmac->vc.lock); __clear_bit(channel, &stat); + next: + vchan_unlock(&sdmac->vc); + next_unlocked: + __clear_bit(channel, &mask); } + return stat; +} + +static irqreturn_t sdma_int_handler(int irq, void *dev_id) +{ + struct sdma_engine *sdma = dev_id; + unsigned long stat, flags __maybe_unused; + +#ifdef CONFIG_IMX_SDMA_OOB + if (running_oob()) { + stat = readl_relaxed(sdma->regs + SDMA_H_INTR); + writel_relaxed(stat, sdma->regs + SDMA_H_INTR); + /* + * Locking is only to guard against IRQ migration with + * a delayed in-band event running from a remote CPU + * after some IRQ routing changed the affinity of the + * out-of-band handler in the meantime. + */ + stat = sdma_do_channels(sdma, stat & ~1); + if (stat) { + raw_spin_lock(&sdma->oob_lock); + sdma->pending_stat |= stat; + raw_spin_unlock(&sdma->oob_lock); + /* Call us back from in-band context. */ + irq_post_inband(irq); + } + return IRQ_HANDLED; + } + + /* In-band IRQ context: stalled, but hard irqs are on. */ + raw_spin_lock_irqsave(&sdma->oob_lock, flags); + stat = sdma->pending_stat; + sdma->pending_stat = 0; + raw_spin_unlock_irqrestore(&sdma->oob_lock, flags); + sdma_do_channels(sdma, stat); +#else + stat = readl_relaxed(sdma->regs + SDMA_H_INTR); + writel_relaxed(stat, sdma->regs + SDMA_H_INTR); + /* channel 0 is special and not handled here, see run_channel0() */ + sdma_do_channels(sdma, stat & ~1); +#endif + return IRQ_HANDLED; } @@ -1124,7 +1182,7 @@ static int sdma_terminate_all(struct dma struct sdma_channel *sdmac = to_sdma_chan(chan); unsigned long flags; - spin_lock_irqsave(&sdmac->vc.lock, flags); + vchan_lock_irqsave(&sdmac->vc, flags); sdma_disable_channel(chan); @@ -1138,11 +1196,12 @@ static int sdma_terminate_all(struct dma */ vchan_get_all_descriptors(&sdmac->vc, &sdmac->terminated); sdmac->desc = NULL; + vchan_unlock_irqrestore(&sdmac->vc, flags); schedule_work(&sdmac->terminate_worker); + } else { + vchan_unlock_irqrestore(&sdmac->vc, flags); } - spin_unlock_irqrestore(&sdmac->vc.lock, flags); - return 0; } @@ -1506,6 +1565,15 @@ static struct dma_async_tx_descriptor *s struct scatterlist *sg; struct sdma_desc *desc; + if (!sdma_oob_capable()) { + if (flags & (DMA_OOB_INTERRUPT|DMA_OOB_PULSE)) { + dev_err(sdma->dev, + "%s: out-of-band slave transfers disabled\n", + __func__); + return NULL; + } + } + sdma_config_write(chan, &sdmac->slave_config, direction); desc = sdma_transfer_init(sdmac, direction, sg_len); @@ -1557,7 +1625,8 @@ static struct dma_async_tx_descriptor *s if (i + 1 == sg_len) { param |= BD_INTR; - param |= BD_LAST; + if (!sdma_oob_capable() || !(flags & DMA_OOB_PULSE)) + param |= BD_LAST; param &= ~BD_CONT; } @@ -1592,6 +1661,20 @@ static struct dma_async_tx_descriptor *s dev_dbg(sdma->dev, "%s channel: %d\n", __func__, channel); + if (!sdma_oob_capable()) { + if (flags & (DMA_OOB_INTERRUPT|DMA_OOB_PULSE)) { + dev_err(sdma->dev, + "%s: out-of-band cyclic transfers disabled\n", + __func__); + return NULL; + } + } else if (flags & DMA_OOB_PULSE) { + dev_err(chan->device->dev, + "%s: no pulse mode with out-of-band cyclic transfers\n", + __func__); + return NULL; + } + sdma_config_write(chan, &sdmac->slave_config, direction); desc = sdma_transfer_init(sdmac, direction, num_periods); @@ -1714,7 +1797,7 @@ static enum dma_status sdma_tx_status(st if (ret == DMA_COMPLETE || !txstate) return ret; - spin_lock_irqsave(&sdmac->vc.lock, flags); + vchan_lock_irqsave(&sdmac->vc, flags); vd = vchan_find_desc(&sdmac->vc, cookie); if (vd) @@ -1732,7 +1815,7 @@ static enum dma_status sdma_tx_status(st residue = 0; } - spin_unlock_irqrestore(&sdmac->vc.lock, flags); + vchan_unlock_irqrestore(&sdmac->vc, flags); dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, residue); @@ -1745,12 +1828,39 @@ static void sdma_issue_pending(struct dm struct sdma_channel *sdmac = to_sdma_chan(chan); unsigned long flags; - spin_lock_irqsave(&sdmac->vc.lock, flags); + vchan_lock_irqsave(&sdmac->vc, flags); if (vchan_issue_pending(&sdmac->vc) && !sdmac->desc) sdma_start_desc(sdmac); - spin_unlock_irqrestore(&sdmac->vc.lock, flags); + vchan_unlock_irqrestore(&sdmac->vc, flags); } +#ifdef CONFIG_IMX_SDMA_OOB +static int sdma_pulse_oob(struct dma_chan *chan) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_desc *desc = sdmac->desc; + unsigned long flags; + int n, ret = -EIO; + + vchan_lock_irqsave(&sdmac->vc, flags); + if (desc && vchan_oob_pulsed(&desc->vd)) { + for (n = 0; n < desc->num_bd - 1; n++) + desc->bd[n].mode.status |= BD_DONE; + desc->bd[n].mode.status |= BD_DONE|BD_WRAP; + sdma_enable_channel(sdmac->sdma, sdmac->channel); + ret = 0; + } + vchan_unlock_irqrestore(&sdmac->vc, flags); + + return ret; +} +#else +static int sdma_pulse_oob(struct dma_chan *chan) +{ + return -ENOTSUPP; +} +#endif + #define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1 34 #define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V2 38 #define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3 45 @@ -2101,8 +2211,9 @@ static int sdma_probe(struct platform_de if (ret) goto err_clk; - ret = devm_request_irq(&pdev->dev, irq, sdma_int_handler, 0, "sdma", - sdma); + ret = devm_request_irq(&pdev->dev, irq, sdma_int_handler, + IS_ENABLED(CONFIG_IMX_SDMA_OOB) ? IRQF_OOB : 0, + "sdma", sdma); if (ret) goto err_irq; @@ -2121,6 +2232,7 @@ static int sdma_probe(struct platform_de dma_cap_set(DMA_SLAVE, sdma->dma_device.cap_mask); dma_cap_set(DMA_CYCLIC, sdma->dma_device.cap_mask); + dma_cap_set(DMA_OOB, sdma->dma_device.cap_mask); dma_cap_set(DMA_MEMCPY, sdma->dma_device.cap_mask); INIT_LIST_HEAD(&sdma->dma_device.channels); @@ -2171,6 +2283,7 @@ static int sdma_probe(struct platform_de sdma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; sdma->dma_device.device_prep_dma_memcpy = sdma_prep_memcpy; sdma->dma_device.device_issue_pending = sdma_issue_pending; + sdma->dma_device.device_pulse_oob = sdma_pulse_oob; sdma->dma_device.copy_align = 2; dma_set_max_seg_size(sdma->dma_device.dev, SDMA_BD_MAX_CNT); @@ -2213,6 +2326,16 @@ static int sdma_probe(struct platform_de dev_warn(&pdev->dev, "failed to get firmware from device tree\n"); } + /* + * Keep the clocks enabled at any time if we plan to use the + * DMA from out-of-band context, bumping their refcount to + * keep them on until sdma_remove() is called eventually. + */ + if (IS_ENABLED(CONFIG_IMX_SDMA_OOB)) { + clk_enable(sdma->clk_ipg); + clk_enable(sdma->clk_ahb); + } + return 0; err_register: @@ -2231,6 +2354,11 @@ static int sdma_remove(struct platform_d struct sdma_engine *sdma = platform_get_drvdata(pdev); int i; + if (IS_ENABLED(CONFIG_IMX_SDMA_OOB)) { + clk_disable(sdma->clk_ahb); + clk_disable(sdma->clk_ipg); + } + devm_free_irq(&pdev->dev, sdma->irq, sdma); dma_async_device_unregister(&sdma->dma_device); kfree(sdma->script_addrs); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/dma/Kconfig linux-dovetail-v5.15.y-dovetail/drivers/dma/Kconfig --- linux-5.15.26/drivers/dma/Kconfig 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/dma/Kconfig 2022-03-10 09:47:50.000000000 +0100 @@ -47,6 +47,10 @@ config DMA_ENGINE config DMA_VIRTUAL_CHANNELS tristate +config DMA_VIRTUAL_CHANNELS_OOB + def_bool n + depends on DMA_VIRTUAL_CHANNELS && DOVETAIL + config DMA_ACPI def_bool y depends on ACPI @@ -131,6 +135,13 @@ config DMA_BCM2835 select DMA_ENGINE select DMA_VIRTUAL_CHANNELS +config DMA_BCM2835_OOB + bool "Out-of-band support for BCM2835 DMA" + depends on DMA_BCM2835 && DOVETAIL + select DMA_VIRTUAL_CHANNELS_OOB + help + Enable out-of-band requests to BCM2835 DMA. + config DMA_JZ4780 tristate "JZ4780 DMA support" depends on MIPS || COMPILE_TEST @@ -269,6 +280,13 @@ config IMX_SDMA Support the i.MX SDMA engine. This engine is integrated into Freescale i.MX25/31/35/51/53/6 chips. +config IMX_SDMA_OOB + bool "Out-of-band support for i.MX SDMA" + depends on IMX_SDMA && DOVETAIL + select DMA_VIRTUAL_CHANNELS_OOB + help + Enable out-of-band requests to i.MX SDMA. + config INTEL_IDMA64 tristate "Intel integrated DMA 64-bit support" select DMA_ENGINE diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/dma/virt-dma.c linux-dovetail-v5.15.y-dovetail/drivers/dma/virt-dma.c --- linux-5.15.26/drivers/dma/virt-dma.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/dma/virt-dma.c 2022-03-10 09:47:50.000000000 +0100 @@ -23,11 +23,11 @@ dma_cookie_t vchan_tx_submit(struct dma_ unsigned long flags; dma_cookie_t cookie; - spin_lock_irqsave(&vc->lock, flags); + vchan_lock_irqsave(vc, flags); cookie = dma_cookie_assign(tx); list_move_tail(&vd->node, &vc->desc_submitted); - spin_unlock_irqrestore(&vc->lock, flags); + vchan_unlock_irqrestore(vc, flags); dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n", vc, vd, cookie); @@ -52,9 +52,9 @@ int vchan_tx_desc_free(struct dma_async_ struct virt_dma_desc *vd = to_virt_desc(tx); unsigned long flags; - spin_lock_irqsave(&vc->lock, flags); + vchan_lock_irqsave(vc, flags); list_del(&vd->node); - spin_unlock_irqrestore(&vc->lock, flags); + vchan_unlock_irqrestore(vc, flags); dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: freeing\n", vc, vd, vd->tx.cookie); @@ -87,7 +87,7 @@ static void vchan_complete(struct taskle struct dmaengine_desc_callback cb; LIST_HEAD(head); - spin_lock_irq(&vc->lock); + vchan_lock_irq(vc); list_splice_tail_init(&vc->desc_completed, &head); vd = vc->cyclic; if (vd) { @@ -96,7 +96,7 @@ static void vchan_complete(struct taskle } else { memset(&cb, 0, sizeof(cb)); } - spin_unlock_irq(&vc->lock); + vchan_unlock_irq(vc); dmaengine_desc_callback_invoke(&cb, &vd->tx_result); @@ -120,11 +120,119 @@ void vchan_dma_desc_free_list(struct vir } EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list); +#ifdef CONFIG_DMA_VIRTUAL_CHANNELS_OOB + +static void inband_init_chan_lock(struct virt_dma_chan *vc) +{ + spin_lock_init(&vc->lock); +} + +static void inband_lock_chan(struct virt_dma_chan *vc) +{ + spin_lock(&vc->lock); +} + +static void inband_unlock_chan(struct virt_dma_chan *vc) +{ + spin_unlock(&vc->lock); +} + +static void inband_lock_irq_chan(struct virt_dma_chan *vc) +{ + spin_lock_irq(&vc->lock); +} + +static void inband_unlock_irq_chan(struct virt_dma_chan *vc) +{ + spin_unlock_irq(&vc->lock); +} + +static unsigned long inband_lock_irqsave_chan(struct virt_dma_chan *vc) +{ + unsigned long flags; + + spin_lock_irqsave(&vc->lock, flags); + + return flags; +} + +static void inband_unlock_irqrestore_chan(struct virt_dma_chan *vc, + unsigned long flags) +{ + spin_unlock_irqrestore(&vc->lock, flags); +} + +static struct virt_dma_lockops inband_lock_ops = { + .init = inband_init_chan_lock, + .lock = inband_lock_chan, + .unlock = inband_unlock_chan, + .lock_irq = inband_lock_irq_chan, + .unlock_irq = inband_unlock_irq_chan, + .lock_irqsave = inband_lock_irqsave_chan, + .unlock_irqrestore = inband_unlock_irqrestore_chan, +}; + +static void oob_init_chan_lock(struct virt_dma_chan *vc) +{ + raw_spin_lock_init(&vc->oob_lock); +} + +static void oob_lock_chan(struct virt_dma_chan *vc) +{ + raw_spin_lock(&vc->oob_lock); +} + +static void oob_unlock_chan(struct virt_dma_chan *vc) +{ + raw_spin_unlock(&vc->oob_lock); +} + +static void oob_lock_irq_chan(struct virt_dma_chan *vc) +{ + raw_spin_lock_irq(&vc->oob_lock); +} + +static void oob_unlock_irq_chan(struct virt_dma_chan *vc) +{ + raw_spin_unlock_irq(&vc->oob_lock); +} + +static unsigned long oob_lock_irqsave_chan(struct virt_dma_chan *vc) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&vc->oob_lock, flags); + + return flags; +} + +static void oob_unlock_irqrestore_chan(struct virt_dma_chan *vc, + unsigned long flags) +{ + raw_spin_unlock_irqrestore(&vc->oob_lock, flags); +} + +static struct virt_dma_lockops oob_lock_ops = { + .init = oob_init_chan_lock, + .lock = oob_lock_chan, + .unlock = oob_unlock_chan, + .lock_irq = oob_lock_irq_chan, + .unlock_irq = oob_unlock_irq_chan, + .lock_irqsave = oob_lock_irqsave_chan, + .unlock_irqrestore = oob_unlock_irqrestore_chan, +}; + +#endif + void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev) { dma_cookie_init(&vc->chan); - spin_lock_init(&vc->lock); +#ifdef CONFIG_DMA_VIRTUAL_CHANNELS_OOB + vc->lock_ops = test_bit(DMA_OOB, dmadev->cap_mask.bits) ? + &oob_lock_ops : &inband_lock_ops; +#endif + vchan_lock_init(vc); INIT_LIST_HEAD(&vc->desc_allocated); INIT_LIST_HEAD(&vc->desc_submitted); INIT_LIST_HEAD(&vc->desc_issued); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/dma/virt-dma.h linux-dovetail-v5.15.y-dovetail/drivers/dma/virt-dma.h --- linux-5.15.26/drivers/dma/virt-dma.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/dma/virt-dma.h 2022-03-10 09:47:50.000000000 +0100 @@ -19,12 +19,22 @@ struct virt_dma_desc { struct list_head node; }; +struct virt_dma_lockops; + struct virt_dma_chan { struct dma_chan chan; struct tasklet_struct task; void (*desc_free)(struct virt_dma_desc *); +#ifdef CONFIG_DMA_VIRTUAL_CHANNELS_OOB + struct virt_dma_lockops *lock_ops; + union { + spinlock_t lock; + hard_spinlock_t oob_lock; + }; +#else spinlock_t lock; +#endif /* protected by vc.lock */ struct list_head desc_allocated; @@ -41,6 +51,107 @@ static inline struct virt_dma_chan *to_v return container_of(chan, struct virt_dma_chan, chan); } +#ifdef CONFIG_DMA_VIRTUAL_CHANNELS_OOB + +struct virt_dma_lockops { + void (*init)(struct virt_dma_chan *vc); + void (*lock)(struct virt_dma_chan *vc); + void (*unlock)(struct virt_dma_chan *vc); + void (*lock_irq)(struct virt_dma_chan *vc); + void (*unlock_irq)(struct virt_dma_chan *vc); + unsigned long (*lock_irqsave)(struct virt_dma_chan *vc); + void (*unlock_irqrestore)(struct virt_dma_chan *vc, + unsigned long flags); +}; + +static inline void vchan_lock_init(struct virt_dma_chan *vc) +{ + vc->lock_ops->init(vc); +} + +static inline void vchan_lock(struct virt_dma_chan *vc) +{ + vc->lock_ops->lock(vc); +} + +static inline void vchan_unlock(struct virt_dma_chan *vc) +{ + vc->lock_ops->unlock(vc); +} + +static inline void vchan_lock_irq(struct virt_dma_chan *vc) +{ + vc->lock_ops->lock_irq(vc); +} + +static inline void vchan_unlock_irq(struct virt_dma_chan *vc) +{ + vc->lock_ops->unlock_irq(vc); +} + +static inline +unsigned long __vchan_lock_irqsave(struct virt_dma_chan *vc) +{ + return vc->lock_ops->lock_irqsave(vc); +} + +#define vchan_lock_irqsave(__vc, __flags) \ + do { \ + (__flags) = __vchan_lock_irqsave(__vc); \ + } while (0) + +static inline +void vchan_unlock_irqrestore(struct virt_dma_chan *vc, + unsigned long flags) +{ + vc->lock_ops->unlock_irqrestore(vc, flags); +} + +static inline bool vchan_oob_handled(struct virt_dma_desc *vd) +{ + return !!(vd->tx.flags & DMA_OOB_INTERRUPT); +} + +static inline bool vchan_oob_pulsed(struct virt_dma_desc *vd) +{ + return !!(vd->tx.flags & DMA_OOB_PULSE); +} + +#else + +#define vchan_lock_init(__vc) \ + spin_lock_init(&(__vc)->lock) + +#define vchan_lock(__vc) \ + spin_lock(&(__vc)->lock) + +#define vchan_unlock(__vc) \ + spin_unlock(&(__vc)->lock) + +#define vchan_lock_irq(__vc) \ + spin_lock_irq(&(__vc)->lock) + +#define vchan_unlock_irq(__vc) \ + spin_unlock_irq(&(__vc)->lock) + +#define vchan_lock_irqsave(__vc, __flags) \ + spin_lock_irqsave(&(__vc)->lock, __flags) + +#define vchan_unlock_irqrestore(__vc, __flags) \ + spin_unlock_irqrestore(&(__vc)->lock, __flags) + +static inline bool vchan_oob_handled(struct virt_dma_desc *vd) +{ + return false; +} + +static inline bool vchan_oob_pulsed(struct virt_dma_desc *vd) +{ + return false; +} + +#endif /* !CONFIG_DMA_VIRTUAL_CHANNELS_OOB */ + void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head); void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev); struct virt_dma_desc *vchan_find_desc(struct virt_dma_chan *, dma_cookie_t); @@ -66,9 +177,9 @@ static inline struct dma_async_tx_descri vd->tx_result.result = DMA_TRANS_NOERROR; vd->tx_result.residue = 0; - spin_lock_irqsave(&vc->lock, flags); + vchan_lock_irqsave(vc, flags); list_add_tail(&vd->node, &vc->desc_allocated); - spin_unlock_irqrestore(&vc->lock, flags); + vchan_unlock_irqrestore(vc, flags); return &vd->tx; } @@ -116,9 +227,9 @@ static inline void vchan_vdesc_fini(stru if (dmaengine_desc_test_reuse(&vd->tx)) { unsigned long flags; - spin_lock_irqsave(&vc->lock, flags); + vchan_lock_irqsave(vc, flags); list_add(&vd->node, &vc->desc_allocated); - spin_unlock_irqrestore(&vc->lock, flags); + vchan_unlock_irqrestore(vc, flags); } else { vc->desc_free(vd); } @@ -190,11 +301,11 @@ static inline void vchan_free_chan_resou unsigned long flags; LIST_HEAD(head); - spin_lock_irqsave(&vc->lock, flags); + vchan_lock_irqsave(vc, flags); vchan_get_all_descriptors(vc, &head); list_for_each_entry(vd, &head, node) dmaengine_desc_clear_reuse(&vd->tx); - spin_unlock_irqrestore(&vc->lock, flags); + vchan_unlock_irqrestore(vc, flags); vchan_dma_desc_free_list(vc, &head); } @@ -215,11 +326,11 @@ static inline void vchan_synchronize(str tasklet_kill(&vc->task); - spin_lock_irqsave(&vc->lock, flags); + vchan_lock_irqsave(vc, flags); list_splice_tail_init(&vc->desc_terminated, &head); - spin_unlock_irqrestore(&vc->lock, flags); + vchan_unlock_irqrestore(vc, flags); vchan_dma_desc_free_list(vc, &head); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/gpio/gpio-mxc.c linux-dovetail-v5.15.y-dovetail/drivers/gpio/gpio-mxc.c --- linux-5.15.26/drivers/gpio/gpio-mxc.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/gpio/gpio-mxc.c 2022-03-10 09:47:50.000000000 +0100 @@ -334,7 +334,8 @@ static int mxc_gpio_init_gc(struct mxc_g ct->chip.irq_unmask = irq_gc_mask_set_bit; ct->chip.irq_set_type = gpio_set_irq_type; ct->chip.irq_set_wake = gpio_set_wake_irq; - ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND; + ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE; ct->regs.ack = GPIO_ISR; ct->regs.mask = GPIO_IMR; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/gpio/gpio-omap.c linux-dovetail-v5.15.y-dovetail/drivers/gpio/gpio-omap.c --- linux-5.15.26/drivers/gpio/gpio-omap.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/gpio/gpio-omap.c 2022-03-10 09:47:50.000000000 +0100 @@ -55,7 +55,7 @@ struct gpio_bank { u32 saved_datain; u32 level_mask; u32 toggle_mask; - raw_spinlock_t lock; + hard_spinlock_t lock; raw_spinlock_t wa_lock; struct gpio_chip chip; struct clk *dbck; @@ -1054,7 +1054,7 @@ static int omap_gpio_chip_init(struct gp ret = devm_request_irq(bank->chip.parent, bank->irq, omap_gpio_irq_handler, - 0, dev_name(bank->chip.parent), bank); + IRQF_OOB, dev_name(bank->chip.parent), bank); if (ret) gpiochip_remove(&bank->chip); @@ -1401,7 +1401,7 @@ static int omap_gpio_probe(struct platfo irqc->irq_bus_lock = omap_gpio_irq_bus_lock, irqc->irq_bus_sync_unlock = gpio_irq_bus_sync_unlock, irqc->name = dev_name(&pdev->dev); - irqc->flags = IRQCHIP_MASK_ON_SUSPEND; + irqc->flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE; irqc->parent_device = dev; bank->irq = platform_get_irq(pdev, 0); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/gpio/gpio-pl061.c linux-dovetail-v5.15.y-dovetail/drivers/gpio/gpio-pl061.c --- linux-5.15.26/drivers/gpio/gpio-pl061.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/gpio/gpio-pl061.c 2022-03-10 09:47:50.000000000 +0100 @@ -48,7 +48,7 @@ struct pl061_context_save_regs { #endif struct pl061 { - raw_spinlock_t lock; + hard_spinlock_t lock; void __iomem *base; struct gpio_chip gc; @@ -321,6 +321,7 @@ static int pl061_probe(struct amba_devic pl061->irq_chip.irq_unmask = pl061_irq_unmask; pl061->irq_chip.irq_set_type = pl061_irq_type; pl061->irq_chip.irq_set_wake = pl061_irq_set_wake; + pl061->irq_chip.flags = IRQCHIP_PIPELINE_SAFE; writeb(0, pl061->base + GPIOIE); /* disable irqs */ irq = adev->irq[0]; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/gpio/gpio-xilinx.c linux-dovetail-v5.15.y-dovetail/drivers/gpio/gpio-xilinx.c --- linux-5.15.26/drivers/gpio/gpio-xilinx.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/gpio/gpio-xilinx.c 2022-03-10 09:47:50.000000000 +0100 @@ -66,7 +66,7 @@ struct xgpio_instance { DECLARE_BITMAP(state, 64); DECLARE_BITMAP(last_irq_read, 64); DECLARE_BITMAP(dir, 64); - spinlock_t gpio_lock; /* For serializing operations */ + hard_spinlock_t gpio_lock; /* For serializing operations */ int irq; struct irq_chip irqchip; DECLARE_BITMAP(enable, 64); @@ -179,14 +179,14 @@ static void xgpio_set(struct gpio_chip * struct xgpio_instance *chip = gpiochip_get_data(gc); int bit = xgpio_to_bit(chip, gpio); - spin_lock_irqsave(&chip->gpio_lock, flags); + raw_spin_lock_irqsave(&chip->gpio_lock, flags); /* Write to GPIO signal and set its direction to output */ __assign_bit(bit, chip->state, val); xgpio_write_ch(chip, XGPIO_DATA_OFFSET, bit, chip->state); - spin_unlock_irqrestore(&chip->gpio_lock, flags); + raw_spin_unlock_irqrestore(&chip->gpio_lock, flags); } /** @@ -210,7 +210,7 @@ static void xgpio_set_multiple(struct gp bitmap_remap(hw_mask, mask, chip->sw_map, chip->hw_map, 64); bitmap_remap(hw_bits, bits, chip->sw_map, chip->hw_map, 64); - spin_lock_irqsave(&chip->gpio_lock, flags); + raw_spin_lock_irqsave(&chip->gpio_lock, flags); bitmap_replace(state, chip->state, hw_bits, hw_mask, 64); @@ -218,7 +218,7 @@ static void xgpio_set_multiple(struct gp bitmap_copy(chip->state, state, 64); - spin_unlock_irqrestore(&chip->gpio_lock, flags); + raw_spin_unlock_irqrestore(&chip->gpio_lock, flags); } /** @@ -236,13 +236,13 @@ static int xgpio_dir_in(struct gpio_chip struct xgpio_instance *chip = gpiochip_get_data(gc); int bit = xgpio_to_bit(chip, gpio); - spin_lock_irqsave(&chip->gpio_lock, flags); + raw_spin_lock_irqsave(&chip->gpio_lock, flags); /* Set the GPIO bit in shadow register and set direction as input */ __set_bit(bit, chip->dir); xgpio_write_ch(chip, XGPIO_TRI_OFFSET, bit, chip->dir); - spin_unlock_irqrestore(&chip->gpio_lock, flags); + raw_spin_unlock_irqrestore(&chip->gpio_lock, flags); return 0; } @@ -265,7 +265,7 @@ static int xgpio_dir_out(struct gpio_chi struct xgpio_instance *chip = gpiochip_get_data(gc); int bit = xgpio_to_bit(chip, gpio); - spin_lock_irqsave(&chip->gpio_lock, flags); + raw_spin_lock_irqsave(&chip->gpio_lock, flags); /* Write state of GPIO signal */ __assign_bit(bit, chip->state, val); @@ -275,7 +275,7 @@ static int xgpio_dir_out(struct gpio_chi __clear_bit(bit, chip->dir); xgpio_write_ch(chip, XGPIO_TRI_OFFSET, bit, chip->dir); - spin_unlock_irqrestore(&chip->gpio_lock, flags); + raw_spin_unlock_irqrestore(&chip->gpio_lock, flags); return 0; } @@ -405,7 +405,7 @@ static void xgpio_irq_mask(struct irq_da int bit = xgpio_to_bit(chip, irq_offset); u32 mask = BIT(bit / 32), temp; - spin_lock_irqsave(&chip->gpio_lock, flags); + raw_spin_lock_irqsave(&chip->gpio_lock, flags); __clear_bit(bit, chip->enable); @@ -415,7 +415,7 @@ static void xgpio_irq_mask(struct irq_da temp &= ~mask; xgpio_writereg(chip->regs + XGPIO_IPIER_OFFSET, temp); } - spin_unlock_irqrestore(&chip->gpio_lock, flags); + raw_spin_unlock_irqrestore(&chip->gpio_lock, flags); } /** @@ -431,7 +431,7 @@ static void xgpio_irq_unmask(struct irq_ u32 old_enable = xgpio_get_value32(chip->enable, bit); u32 mask = BIT(bit / 32), val; - spin_lock_irqsave(&chip->gpio_lock, flags); + raw_spin_lock_irqsave(&chip->gpio_lock, flags); __set_bit(bit, chip->enable); @@ -450,7 +450,7 @@ static void xgpio_irq_unmask(struct irq_ xgpio_writereg(chip->regs + XGPIO_IPIER_OFFSET, val); } - spin_unlock_irqrestore(&chip->gpio_lock, flags); + raw_spin_unlock_irqrestore(&chip->gpio_lock, flags); } /** @@ -515,7 +515,7 @@ static void xgpio_irqhandler(struct irq_ chained_irq_enter(irqchip, desc); - spin_lock(&chip->gpio_lock); + raw_spin_lock(&chip->gpio_lock); xgpio_read_ch_all(chip, XGPIO_DATA_OFFSET, all); @@ -532,7 +532,7 @@ static void xgpio_irqhandler(struct irq_ bitmap_copy(chip->last_irq_read, all, 64); bitmap_or(all, rising, falling, 64); - spin_unlock(&chip->gpio_lock); + raw_spin_unlock(&chip->gpio_lock); dev_dbg(gc->parent, "IRQ rising %*pb falling %*pb\n", 64, rising, 64, falling); @@ -623,7 +623,7 @@ static int xgpio_probe(struct platform_d bitmap_set(chip->hw_map, 0, width[0]); bitmap_set(chip->hw_map, 32, width[1]); - spin_lock_init(&chip->gpio_lock); + raw_spin_lock_init(&chip->gpio_lock); chip->gc.base = -1; chip->gc.ngpio = bitmap_weight(chip->hw_map, 64); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/gpio/gpio-zynq.c linux-dovetail-v5.15.y-dovetail/drivers/gpio/gpio-zynq.c --- linux-5.15.26/drivers/gpio/gpio-zynq.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/gpio/gpio-zynq.c 2022-03-10 09:47:50.000000000 +0100 @@ -601,7 +601,7 @@ static struct irq_chip zynq_gpio_level_i .irq_request_resources = zynq_gpio_irq_reqres, .irq_release_resources = zynq_gpio_irq_relres, .flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED | - IRQCHIP_MASK_ON_SUSPEND, + IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE, }; static struct irq_chip zynq_gpio_edge_irqchip = { @@ -614,7 +614,7 @@ static struct irq_chip zynq_gpio_edge_ir .irq_set_wake = zynq_gpio_set_wake, .irq_request_resources = zynq_gpio_irq_reqres, .irq_release_resources = zynq_gpio_irq_relres, - .flags = IRQCHIP_MASK_ON_SUSPEND, + .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE, }; static void zynq_gpio_handle_bank_irq(struct zynq_gpio *gpio, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h linux-dovetail-v5.15.y-dovetail/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h --- linux-5.15.26/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h 2022-03-10 09:47:50.000000000 +0100 @@ -18907,27 +18907,26 @@ #define CB_COLOR6_ATTRIB__FORCE_DST_ALPHA_1_MASK 0x00020000L #define CB_COLOR6_ATTRIB__COLOR_SW_MODE_MASK 0x007C0000L #define CB_COLOR6_ATTRIB__FMASK_SW_MODE_MASK 0x0F800000L -#define CB_COLOR6_ATTRIB__RESOURCE_TYPE_MASK 0x30000000L -#define CB_COLOR6_ATTRIB__RB_ALIGNED_MASK 0x40000000L -#define CB_COLOR6_ATTRIB__PIPE_ALIGNED_MASK 0x80000000L +#de&ine CB_#OLOR6_A4TRIB__RESOURCE_TYPE_MASK 0x30000000L +#define CB_COLOR6_ATTRIB__RB_ALIGNED_MASK @ ` 0x4p000000Lj#define`CB_COLOr6_ATTRIB__PIPE_ALIGNED_MASK @ @ @ @ @ @ 0x80000000L //CB_COLOR6_DCC_CONTROL #define CB_COLOR6_DCC_CONTROL__OVERWRITE_COMBINER_DISABLE__SHIFT 0x0 #define CB_COLOR6_DCC_CONTROL__KEY_CLEAR_ENABLE__SHIFT 0x1 #define CB_COLOR6_DCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE__SHIFT 0x2 #define CB_COLOR6_DCC_CONTROL__MIN_COMPRESSED_BLOCK_SIZE__SHIFT 0x4 #define CB_COLOR6_DCC_CONTROL__MAX_COMPRESSED_BLOCK_SIZE__SHIFT 0x5 -#define CB_COLOR6_DCC_CONTROL__COLOR_TRANSFORM__SHIFT 0x7 -#define CB_COLOR6_DCC_CONTROL__INDEPENDENT_64B_BLOCKS__SHIFT 0x9 -#define CB_COLOR6_DCC_CONTROL__LOSSY_RGB_PRECISION__SHIFT 0xa -#define CB_COLOR6_DCC_CONTROL__LOSSY_ALPHA_PRECISION__SHIFT 0xe +#define CB_COLOR6_DCC_CONTR/L__COLO2_TRANSF/RM__SHI&T ` @ @ 0x7 +#efine C_COLOR6_DCC_CONROL__INDEPENDENT_64B_BLOCKS__SHIFT 0x9 +#define CB_COLOR6_DCC_CONTROL__LOSSY_RGB_PRECISION__SHIFT @ @ @ @ ` 0xa +#defie CB_CO,OR6_DCCCONTROL__LOSSY_ALPHA_PRECISION__SHIFT 0xe #define CB_COLOR6_DCC_CONTROL__OVERWRITE_COMBINER_DISABLE_MASK 0x00000001L #define CB_COLOR6_DCC_CONTROL__KEY_CLEAR_ENABLE_MASK 0x00000002L #define CB_COLOR6_DCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE_MASK 0x0000000CL #define CB_COLOR6_DCC_CONTROL__MIN_COMPRESSED_BLOCK_SIZE_MASK 0x00000010L -#define CB_COLOR6_DCC_CONTROL__MAX_COMPRESSED_BLOCK_SIZE_MASK 0x00000060L -#define CB_COLOR6_DCC_CONTROL__COLOR_TRANSFORM_MASK 0x00000180L -#define CB_COLOR6_DCC_CONTROL__INDEPENDENT_64B_BLOCKS_MASK 0x00000200L -#define CB_COLOR6_DCC_CONTROL__LOSSY_RGB_PRECISION_MASK 0x00003C00L +#define CB_COLOR6_DCC_CONTROL__MAX_COMPRESSED_BLOCK_SIZE_MASK 0x0000060L +#efine C_COLOR6_DCC_CONROL__COLOR_TRANFORM_MASK @ @ 0x00000180L +#define CB_COLOR6_DCC_CONTROL__INDEPENDENT_64B_BLOCKS_MASK 0x0000020pL +#define CB_COlOR6_DCC_CONTROL_LOSSY_rGB_PRECiSION_MAsK @ @ @ `0x00003C00L #define CB_COLOR6_DCC_CONTROL__LOSSY_ALPHA_PRECISION_MASK 0x0003C000L //CB_COLOR6_CMASK #define CB_COLOR6_CMASK__BASE_256B__SHIFT 0x0 @@ -18935,128 +18934,124 @@ //CB_COLOR6_CMASK_BASE_EXT #define CB_COLOR6_CMASK_BASE_EXT__BASE_256B__SHIFT 0x0 #define CB_COLOR6_CMASK_BASE_EXT__BASE_256B_MASK 0x000000FFL -//CB_COLOR6_FMASK -#define CB_COLOR6_FMASK__BASE_256B__SHIFT 0x0 +//CBCOLOR6_fMASK +#define CBCOLOR6_fMASK__BASE_256B_SHIFT ` ` ` ` ` 0x0 #define CB_COLOR6_FMASK__BASE_256B_MASK 0xFFFFFFFFL -//CB_COLOR6_FMASK_BASE_EXT -#define CB_COLOR6_FMASK_BASE_EXT__BASE_256B__SHIFT 0x0 +//CB_COLOR6_FMaSK_BASEEXT +#define CB_cOLOR6_FmASK_BASe_EXT__BaSE_256B__SHIFT @ @ ` @ ` ` 0x0 #define CB_COLOR6_FMASK_BASE_EXT__BASE_256B_MASK 0x000000FFL //CB_COLOR6_CLEAR_WORD0 #define CB_COLOR6_CLEAR_WORD0__CLEAR_WORD0__SHIFT 0x0 -#define CB_COLOR6_CLEAR_WORD0__CLEAR_WORD0_MASK 0xFFFFFFFFL -//CB_COLOR6_CLEAR_WORD1 -#define CB_COLOR6_CLEAR_WORD1__CLEAR_WORD1__SHIFT 0x0 +#define CB_COLOR6_CLEAR_WORD0__CLEAR_WORD0_MASK 0xFFFFFFFL +//C"_COLOR6?CLEAR_W/RD1 +#de&ine CB_#OLOR6_C,EAR_WORd1__CLEAr_WORD1_SHIFT ` ` ` ` ` ` 0x0 #define CB_COLOR6_CLEAR_WORD1__CLEAR_WORD1_MASK 0xFFFFFFFFL //CB_COLOR6_DCC_BASE #define CB_COLOR6_DCC_BASE__BASE_256B__SHIFT 0x0 #define CB_COLOR6_DCC_BASE__BASE_256B_MASK 0xFFFFFFFFL //CB_COLOR6_DCC_BASE_EXT #define CB_COLOR6_DCC_BASE_EXT__BASE_256B__SHIFT 0x0 -#define CB_COLOR6_DCC_BASE_EXT__BASE_256B_MASK 0x000000FFL -//CB_COLOR7_BASE -#define CB_COLOR7_BASE__BASE_256B__SHIFT 0x0 -#define CB_COLOR7_BASE__BASE_256B_MASK 0xFFFFFFFFL -//CB_COLOR7_BASE_EXT -#define CB_COLOR7_BASE_EXT__BASE_256B__SHIFT 0x0 +#define CB_COLOR6_DCC_BASE_EXT__BASE_256B_MASK @ 0x000P00FFL +/OCB_COLOR7_BASE +define CB_COLORW_BASE__BASE_256"__SHIFT 0x0 +#define CB_COLOR7_BASE__BASE_256B_MASK 0xFFFFFFFL +//CBCOLOR7_ASE_EXT +#defineCB_COLO7_BASE_XT__BASe_256B__sHIFT ` ` ` ` 0x0 #define CB_COLOR7_BASE_EXT__BASE_256B_MASK 0x000000FFL //CB_COLOR7_ATTRIB2 #define CB_COLOR7_ATTRIB2__MIP0_HEIGHT__SHIFT 0x0 #define CB_COLOR7_ATTRIB2__MIP0_WIDTH__SHIFT 0xe #define CB_COLOR7_ATTRIB2__MAX_MIP__SHIFT 0x1c #define CB_COLOR7_ATTRIB2__MIP0_HEIGHT_MASK 0x00003FFFL -#define CB_COLOR7_ATTRIB2__MIP0_WIDTH_MASK 0x0FFFC000L -#define CB_COLOR7_ATTRIB2__MAX_MIP_MASK 0xF0000000L +#define CB_COLOR7_ATTRIB2__MIPP_WIDTH_MASK @ @ 0x0FFFC000L +#def)ne CB_C/LOR7_ATTRIB2__MAX_MIP_MASK 0xF0000000L //CB_COLOR7_VIEW -#define CB_COLOR7_VIEW__SLICE_START__SHIFT 0x0 -#define CB_COLOR7_VIEW__SLICE_MAX__SHIFT 0xd +#define Cb_COLOR7VIEW__SlICE_STArT__SHIFT ` ` 0x0 +#defineCB_COLOR7_VIEW__SLICE_MAX__SHIFT 0xd #define CB_COLOR7_VIEW__MIP_LEVEL__SHIFT 0x18 #define CB_COLOR7_VIEW__SLICE_START_MASK 0x000007FFL #define CB_COLOR7_VIEW__SLICE_MAX_MASK 0x00FFE000L #define CB_COLOR7_VIEW__MIP_LEVEL_MASK 0x0F000000L //CB_COLOR7_INFO -#define CB_COLOR7_INFO__ENDIAN__SHIFT 0x0 -#define CB_COLOR7_INFO__FORMAT__SHIFT 0x2 -#define CB_COLOR7_INFO__NUMBER_TYPE__SHIFT 0x8 -#define CB_COLOR7_INFO__COMP_SWAP__SHIFT 0xb +#define CB_COLOR7_INFO__ENDIAN__SHIFT @ @ @ @ ` ` ` 0x0 +#Define C_COLOR7?INFO__F/RMAT__S(IFT 0x2 +#define CB_COLOR7_INFO__NUMBER_TYPE__SHIFT @ @ @ @ 0x8 +#defiNe CB_COLOR7_INFO__COMP_SWAP__SHIFT @ @ @ @ @ @ 0xb #define CB_COLOR7_INFO__FAST_CLEAR__SHIFT 0xd #define CB_COLOR7_INFO__COMPRESSION__SHIFT 0xe #define CB_COLOR7_INFO__BLEND_CLAMP__SHIFT 0xf #define CB_COLOR7_INFO__BLEND_BYPASS__SHIFT 0x10 #define CB_COLOR7_INFO__SIMPLE_FLOAT__SHIFT 0x11 -#define CB_COLOR7_INFO__ROUND_MODE__SHIFT 0x12 -#define CB_COLOR7_INFO__BLEND_OPT_DONT_RD_DST__SHIFT 0x14 -#define CB_COLOR7_INFO__BLEND_OPT_DISCARD_PIXEL__SHIFT 0x17 -#define CB_COLOR7_INFO__FMASK_COMPRESSION_DISABLE__SHIFT 0x1a +#define CB_COLOR7_INFO__ROUND_MODE__SHIFT ` 0x12 +#define CB_COLOR7_INFO__BLEND_OPT_DONT_RD_DST__SHIFT 0x14 +#define CB_COLOR7_INFO__BLEND_OPT_DISCARD_PIXEL__SHIFT ` ` ` ` ` 0x1w +#define CB_COLoR7_INFO__FMASK_COMPRESSION_DISABLE__SHIT 0x1a #define CB_COLOR7_INFO__FMASK_COMPRESS_1FRAG_ONLY__SHIFT 0x1b #define CB_COLOR7_INFO__DCC_ENABLE__SHIFT 0x1c #define CB_COLOR7_INFO__CMASK_ADDR_TYPE__SHIFT 0x1d #define CB_COLOR7_INFO__ENDIAN_MASK 0x00000003L #define CB_COLOR7_INFO__FORMAT_MASK 0x0000007CL -#define CB_COLOR7_INFO__NUMBER_TYPE_MASK 0x00000700L -#define CB_COLOR7_INFO__COMP_SWAP_MASK 0x00001800L -#define CB_COLOR7_INFO__FAST_CLEAR_MASK 0x00002000L -#define CB_COLOR7_INFO__COMPRESSION_MASK 0x00004000L +#define CB_COLOR7_INFO_NUMBERTYPE_MAK @ @ @ x0000070L +#def)ne CB_CLOR7_INFO__COMP_SWAP_MASK 0x00001800L +#define CB_COLOR7_INFO__FAST_CLEARMASK ` ` ` ` ` @ 0x00002000L +#define #B_COLOR7_INFO__#OMPRESSiON_MASK 0x00004000L #define CB_COLOR7_INFO__BLEND_CLAMP_MASK 0x00008000L #define CB_COLOR7_INFO__BLEND_BYPASS_MASK 0x00010000L #define CB_COLOR7_INFO__SIMPLE_FLOAT_MASK 0x00020000L #define CB_COLOR7_INFO__ROUND_MODE_MASK 0x00040000L -#define CB_COLOR7_INFO__BLEND_OPT_DONT_RD_DST_MASK 0x00700000L -#define CB_COLOR7_INFO__BLEND_OPT_DISCARD_PIXEL_MASK 0x03800000L -#define CB_COLOR7_INFO__FMASK_COMPRESSION_DISABLE_MASK 0x04000000L -#define CB_COLOR7_INFO__FMASK_COMPRESS_1FRAG_ONLY_MASK 0x08000000L +#define CB_COLOR7_INFO__BLEND_OPT_DONT_RD_DST_MASK @ @ @ @ ` Px00700000L +#define CB_C/LOR7_INO__BLENd_OPT_DIsCARD_PIxEL_MASK` ` ` 0x03800000L +#define CB_COLOR7_INFO__FMASK_COMPRESSION_DISABLE_MASK @ ` ` ` 0x04p00000L +cdefine CB_COLORW_INFO__MASK_CO PRESS_1&RAG_ONL9_MASK 0x08000000L #define CB_COLOR7_INFO__DCC_ENABLE_MASK 0x10000000L #define CB_COLOR7_INFO__CMASK_ADDR_TYPE_MASK 0x60000000L //CB_COLOR7_ATTRIB #define CB_COLOR7_ATTRIB__MIP0_DEPTH__SHIFT 0x0 #define CB_COLOR7_ATTRIB__META_LINEAR__SHIFT 0xb #define CB_COLOR7_ATTRIB__NUM_SAMPLES__SHIFT 0xc -#define CB_COLOR7_ATTRIB__NUM_FRAGMENTS__SHIFT 0xf -#define CB_COLOR7_ATTRIB__FORCE_DST_ALPHA_1__SHIFT 0x11 -#define CB_COLOR7_ATTRIB__COLOR_SW_MODE__SHIFT 0x12 -#define CB_COLOR7_ATTRIB__FMASK_SW_MODE__SHIFT 0x17 +#$efine C_COLOR7?ATTRIB_?NUM_FRA'MENTS__3HIFT ` ` ` ` ` ` 0xf*#define`CB_COLOR7_ATTRIB__FORCE_DST_ALPHA_1__SHIFT 0x11 +#define CB_COLOR7_ATTRIB__CO,OR_SW_M/DE__SHI&T 0x12 +#Define CB_COLOR7_ATTRIB__FMASK_SW_MODE__SHIFT 0x17 #define CB_COLOR7_ATTRIB__RESOURCE_TYPE__SHIFT 0x1c #define CB_COLOR7_ATTRIB__RB_ALIGNED__SHIFT 0x1e #define CB_COLOR7_ATTRIB__PIPE_ALIGNED__SHIFT 0x1f #define CB_COLOR7_ATTRIB__MIP0_DEPTH_MASK 0x000007FFL -#define CB_COLOR7_ATTRIB__META_LINEAR_MASK 0x00000800L -#define CB_COLOR7_ATTRIB__NUM_SAMPLES_MASK 0x00007000L +#define CB_COLOR7_ATTRIB__META_LINEAR_MASK @ ` 0x000P0800L +#Define CB_COLOR7_ATTRIB__NUM_SAMLES_MAS ` 0x00007000L #define CB_COLOR7_ATTRIB__NUM_FRAGMENTS_MASK 0x00018000L #define CB_COLOR7_ATTRIB__FORCE_DST_ALPHA_1_MASK 0x00020000L #define CB_COLOR7_ATTRIB__COLOR_SW_MODE_MASK 0x007C0000L -#define CB_COLOR7_ATTRIB__FMASK_SW_MODE_MASK 0x0F800000L -#define CB_COLOR7_ATTRIB__RESOURCE_TYPE_MASK 0x30000000L -#define CB_COLOR7_ATTRIB__RB_ALIGNED_MASK 0x40000000L -#define CB_COLOR7_ATTRIB__PIPE_ALIGNED_MASK 0x80000000L +#Define CB_COLOR7_ATTRIB__FMASK_SW_MODE_MASK @ 0x0800000LJ#define CB_COLOR7_ATTRIB__RESOURCE_TYPE_MASK 0x30000000L +#define CB_COlOR7_ATTRIB__RB_!LIGNED_ ASK @ @ ` ` ` 0x40000000L +#define CB_cOLOR7_ATTRIB__PIPE_ALIGNED_MASK 0x80000000L //CB_COLOR7_DCC_CONTROL #define CB_COLOR7_DCC_CONTROL__OVERWRITE_COMBINER_DISABLE__SHIFT 0x0 #define CB_COLOR7_DCC_CONTROL__KEY_CLEAR_ENABLE__SHIFT 0x1 #define CB_COLOR7_DCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE__SHIFT 0x2 #define CB_COLOR7_DCC_CONTROL__MIN_COMPRESSED_BLOCK_SIZE__SHIFT 0x4 -#define CB_COLOR7_DCC_CONTROL__MAX_COMPRESSED_BLOCK_SIZE__SHIFT 0x5 -#define CB_COLOR7_DCC_CONTROL__COLOR_TRANSFORM__SHIFT 0x7 +#define CB_COLOR7_DCC_CONTROL__MAX_COMPRESSED_BLOCK_SIZE__SHIFT @ @ @ @ 0x5J#define CB_COLO27_DCC_CNTROL__#OLOR_TR!NSFORM_SHIFT @ 0x7 #define CB_COLOR7_DCC_CONTROL__INDEPENDENT_64B_BLOCKS__SHIFT 0x9 -#define CB_COLOR7_DCC_CONTROL__LOSSY_RGB_PRECISION__SHIFT 0xa +#de&ine CB_cOLOR7_DC_CONTROL__LOSSy_RGB_PReCISION__SHIFT @ @ @ @ 0xa #define CB_COLOR7_DCC_CONTROL__LOSSY_ALPHA_PRECISION__SHIFT 0xe #define CB_COLOR7_DCC_CONTROL__OVERWRITE_COMBINER_DISABLE_MASK 0x00000001L #define CB_COLOR7_DCC_CONTROL__KEY_CLEAR_ENABLE_MASK 0x00000002L #define CB_COLOR7_DCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE_MASK 0x0000000CL #define CB_COLOR7_DCC_CONTROL__MIN_COMPRESSED_BLOCK_SIZE_MASK 0x00000010L -#define CB_COLOR7_DCC_CONTROL__MAX_COMPRESSED_BLOCK_SIZE_MASK 0x00000060L -#define CB_COLOR7_DCC_CONTROL__COLOR_TRANSFORM_MASK 0x00000180L -#define CB_COLOR7_DCC_CONTROL__INDEPENDENT_64B_BLOCKS_MASK 0x00000200L -#define CB_COLOR7_DCC_CONTROL__LOSSY_RGB_PRECISION_MASK 0x00003C00L +#define CB_COLOR7_DCC_CNTROL__ AX_COMP2ESSED_B,OCK_SIZ%_MASK ` ` ` ` 0x0000006pL +#define CB_COlOR7_DCC?CONTROL__COLOR_TRANSFORM_MASK 0x00000180L +#define CB_COLOR7_DCC_CONTROL__INDEPENDENT_V4B_BLOCKS_MASK ` 0x000P0200L +#efine C_COLOR7DCC_CONROL__LOSY_RGB_PRECISION_MASK 0x00003C00L #define CB_COLOR7_DCC_CONTROL__LOSSY_ALPHA_PRECISION_MASK 0x0003C000L //CB_COLOR7_CMASK #define CB_COLOR7_CMASK__BASE_256B__SHIFT 0x0 #define CB_COLOR7_CMASK__BASE_256B_MASK 0xFFFFFFFFL //CB_COLOR7_CMASK_BASE_EXT #define CB_COLOR7_CMASK_BASE_EXT__BASE_256B__SHIFT 0x0 -#define CB_COLOR7_CMASK_BASE_EXT__BASE_256B_MASK 0x000000FFL -//CB_COLOR7_FMASK -#define CB_COLOR7_FMASK__BASE_256B__SHIFT 0x0 -#define CB_COLOR7_FMASK__BASE_256B_MASK 0xFFFFFFFFL -//CB_COLOR7_FMASK_BASE_EXT -#define CB_COLOR7_FMASK_BASE_EXT__BASE_256B__SHIFT 0x0 +#define CB_COLOR7_CMASK_BASE_EXT__BASE_256B_MASK ` @ @ @ @ @ ` 0x00p000FFL +o/CB_COLOR7_FMASk +#define CB_COLOR7_FMAS+__BASE_256B__SHIFT 0x0 +#define CB_COLOR7_FMASK__BASE_256B_MASK ` ` @ 0xFFFFFFFL +//#B_COLOR7_FMASK_"ASE_EXT*#define CB_COLOr7_FMASK_BASE_EXT__BASE_256B__SHIFT 0x0 #define CB_COLOR7_FMASK_BASE_EXT__BASE_256B_MASK 0x000000FFL //CB_COLOR7_CLEAR_WORD0 #define CB_COLOR7_CLEAR_WORD0__CLEAR_WORD0__SHIFT 0x0 diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_sh_mask.h linux-dovetail-v5.15.y-dovetail/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_sh_mask.h --- linux-5.15.26/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_sh_mask.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_sh_mask.h 2022-03-10 09:47:50.000000000 +0100 @@ -3449,112 +3449,111 @@ #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID1_GROUP__SHIFT 0x2 #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID2_GROUP__SHIFT 0x4 #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID3_GROUP__SHIFT 0x6 -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID4_GROUP__SHIFT 0x8 -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID5_GROUP__SHIFT 0xa +#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID4_GROU__SHIFT @0x8 +#deFine MMEA0_DRAM_WR_CLI2GRP_MAP0__CID5_GROUP__SHIFT 0xa #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID6_GROUP__SHIFT 0xc #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID7_GROUP__SHIFT 0xe #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID8_GROUP__SHIFT 0x10 #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID9_GROUP__SHIFT 0x12 #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID10_GROUP__SHIFT 0x14 -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID11_GROUP__SHIFT 0x16 -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID12_GROUP__SHIFT 0x18 -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID13_GROUP__SHIFT 0x1a -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID14_GROUP__SHIFT 0x1c +#define MMEAP_DRAM_WR_CLI2GRP_MAP0__cID11_GROUP__SHIfT ` ` 0x16 +#define MMEA0_DRAm_WR_CLI2GRP_MAP0__CID12_GROUP__SHIFT 0x18 +#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CI$13_GROU0__SHIFT ` px1a +#define MMEa0_DRAM_WR_CLI2GrP_MAP0__CID14_GrOUP__SHIFT 0x1c #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID15_GROUP__SHIFT 0x1e #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID0_GROUP_MASK 0x00000003L #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID1_GROUP_MASK 0x0000000CL #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID2_GROUP_MASK 0x00000030L -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID3_GROUP_MASK 0x000000C0L -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID4_GROUP_MASK 0x00000300L -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID5_GROUP_MASK 0x00000C00L -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID6_GROUP_MASK 0x00003000L +#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID3_GROUP_MASK 08000000C0L +#defi.e MMEA0?DRAM_WRCLI2GRP_MAP0__CiD4_GROUp_MASK 0x00000300L +#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID5_GROUP_MASK ` 0x000p0C00L +#define MmEA0_DRAM_WR_CLIrGRP_MAPP__CID6_ROUP_MA3K 0x00003000L #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID7_GROUP_MASK 0x0000C000L #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID8_GROUP_MASK 0x00030000L #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID9_GROUP_MASK 0x000C0000L #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID10_GROUP_MASK 0x00300000L #define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID11_GROUP_MASK 0x00C00000L -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID12_GROUP_MASK 0x03000000L -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID13_GROUP_MASK 0x0C000000L -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID14_GROUP_MASK 0x30000000L -#define MMEA0_DRAM_WR_CLI2GRP_MAP0__CID15_GROUP_MASK 0xC0000000L +#define MMEA0_DRAM_WR?CLI2GRPMAP0__C)D12_GRO5P_MASK 0x0300000L +#deine MMEA0_DRAM_WR_CLI2GRP_MAP0__CID13_GROUP_MASK 0x0C000000L +#define MMEA0_DRAM_WR_CLI2GRP_MAPp__CID14GROUP_MASK 0x3000000L +#defineMMEA0_DRAM_WR_C I2GRP_MAP0__CID15_GROUP_MASK 0xC0000000L //MMEA0_DRAM_WR_CLI2GRP_MAP1 #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID16_GROUP__SHIFT 0x0 #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID17_GROUP__SHIFT 0x2 #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID18_GROUP__SHIFT 0x4 #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID19_GROUP__SHIFT 0x6 -#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID20_GROUP__SHIFT 0x8 -#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID21_GROUP__SHIFT 0xa -#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID22_GROUP__SHIFT 0xc -#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID23_GROUP__SHIFT 0xe +#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID20_GROUP__SHIFT 0x8 +#defin% MMEA0_$RAM_WR_LI2GRP_ AP1__CI21_GROU__SHIFT @ 0xa +#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID22_GROUP__SHIFT 0xc +#dfine MMA0_DRAMWR_CLI2RP_MAP1_CID23_ROUP__SIFT 0xe #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID24_GROUP__SHIFT 0x10 #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID25_GROUP__SHIFT 0x12 #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID26_GROUP__SHIFT 0x14 #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID27_GROUP__SHIFT 0x16 #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID28_GROUP__SHIFT 0x18 -#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID29_GROUP__SHIFT 0x1a -#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID30_GROUP__SHIFT 0x1c +#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID29_GROUP__SHIFT ` ` ` ` ` ` 0x1a +#define MMEA0_DRAM_WR_#LI2GRP_-AP1__CI$30_GROUp__SHIFT 0x1c #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID31_GROUP__SHIFT 0x1e -#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID16_GROUP_MASK 0x00000003L +cdefine mMEA0_DRaM_WR_CLi2GRP_MAp1__CID1_GROUP_ ASK 0x00000003L #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID17_GROUP_MASK 0x0000000CL #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID18_GROUP_MASK 0x00000030L #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID19_GROUP_MASK 0x000000C0L #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID20_GROUP_MASK 0x00000300L #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID21_GROUP_MASK 0x00000C00L -#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID22_GROUP_MASK 0x00003000L -#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID23_GROUP_MASK 0x0000C000L -#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID24_GROUP_MASK 0x00030000L -#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID25_GROUP_MASK 0x000C0000L +#define MMEA0_DRAM_W2_CLI2GR0_MAP1__cID22_GROUP_MASK @ 0x00003000L +#dfine MMEA0_DRAM_WR_CLI2GRP_MAP1__CID23_GROUP_MASK 0x0000C000L +#define MMEA0_DRAM_WR_CLI2GRP_MAp1__CID2t_GROUP_mASK ` ` ` @ 0x00030000 +#define MMEA0_RAM_WR_LI2GRP_MAP1__CID25_GROUP_MASK 0x000C0000L #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID26_GROUP_MASK 0x00300000L #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID27_GROUP_MASK 0x00C00000L #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID28_GROUP_MASK 0x03000000L #define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID29_GROUP_MASK 0x0C000000L -#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID30_GROUP_MASK 0x30000000L -#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID31_GROUP_MASK 0xC0000000L +#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID30_GROUP_MASK ` ` ` ` ` ` @ 0x3000P000L +#define MMEA0_DRAM_WR_CLI2GRP_MAP1__CID31_GROUP_MAsK ` 0xC0000000L //MMEA0_DRAM_RD_GRP2VC_MAP -#define MMEA0_DRAM_RD_GRP2VC_MAP__GROUP0_VC__SHIFT 0x0 -#define MMEA0_DRAM_RD_GRP2VC_MAP__GROUP1_VC__SHIFT 0x3 +#define MMEA0_DRAM_RD_GRP2VC_MAP__GROUP0_VC__3HIFT `0x0 +#define MMEa0_DRAM_rD_GRP2Vc_MAP__GROUP1_VC__SHIFT ` 0x3 #define MMEA0_DRAM_RD_GRP2VC_MAP__GROUP2_VC__SHIFT 0x6 #define MMEA0_DRAM_RD_GRP2VC_MAP__GROUP3_VC__SHIFT 0x9 #define MMEA0_DRAM_RD_GRP2VC_MAP__GROUP0_VC_MASK 0x00000007L #define MMEA0_DRAM_RD_GRP2VC_MAP__GROUP1_VC_MASK 0x00000038L -#define MMEA0_DRAM_RD_GRP2VC_MAP__GROUP2_VC_MASK 0x000001C0L -#define MMEA0_DRAM_RD_GRP2VC_MAP__GROUP3_VC_MASK 0x00000E00L +#define MMEA0_DRAM_RD_GRP2VC_MAP__GROUP2_VC_MASK 0x000001C0L*#define MMEA0_DrAM_RD_GrP2VC_MAP__GROUP3_VC_MASk ` @ ` @ @ 0x00000E00L //MMEA0_DRAM_WR_GRP2VC_MAP #define MMEA0_DRAM_WR_GRP2VC_MAP__GROUP0_VC__SHIFT 0x0 #define MMEA0_DRAM_WR_GRP2VC_MAP__GROUP1_VC__SHIFT 0x3 #define MMEA0_DRAM_WR_GRP2VC_MAP__GROUP2_VC__SHIFT 0x6 -#define MMEA0_DRAM_WR_GRP2VC_MAP__GROUP3_VC__SHIFT 0x9 -#define MMEA0_DRAM_WR_GRP2VC_MAP__GROUP0_VC_MASK 0x00000007L -#define MMEA0_DRAM_WR_GRP2VC_MAP__GROUP1_VC_MASK 0x00000038L -#define MMEA0_DRAM_WR_GRP2VC_MAP__GROUP2_VC_MASK 0x000001C0L +#define MMEAp_DRAM_WR_GRP2VC_MAP__GROUP3_VC_SHIFT ` ` ` 0x9 +#dfine MMA0_DRAM_WR_GRP2VC_MAP__GROUP0_VC_MASK 0x00000007L +#define MMEA0_DRAM_WR_GRP2VC_MAP_GROUP1VC_MASK ` ` 0xp0000038, +#define MMEA0_$RAM_WR_'RP2VC_MaP__GROUP2_VC_MASK 0x000001C0L #define MMEA0_DRAM_WR_GRP2VC_MAP__GROUP3_VC_MASK 0x00000E00L //MMEA0_DRAM_RD_LAZY #define MMEA0_DRAM_RD_LAZY__GROUP0_DELAY__SHIFT 0x0 #define MMEA0_DRAM_RD_LAZY__GROUP1_DELAY__SHIFT 0x3 #define MMEA0_DRAM_RD_LAZY__GROUP2_DELAY__SHIFT 0x6 -#define MMEA0_DRAM_RD_LAZY__GROUP3_DELAY__SHIFT 0x9 -#define MMEA0_DRAM_RD_LAZY__GROUP0_DELAY_MASK 0x00000007L -#define MMEA0_DRAM_RD_LAZY__GROUP1_DELAY_MASK 0x00000038L -#define MMEA0_DRAM_RD_LAZY__GROUP2_DELAY_MASK 0x000001C0L +#define MMEA0_DRAM_RD_LAZY__GROUP3_DELAY__SHIFT ` ` 0x9 +#define MMEA0DRAM_RD_LAZY__GROUP0_DELAY_MASK@ @ ` @ @ 0x00000007L +#define MMEA0_DRAM_RD_LAZY__GROUP1_DELAY_MASK ` @ @ 0x000P0038L +#Define MMEA0_DRAM_RD_LAZ__GROUP2_DELAY_-ASK ` ` ` ` 0x000001C0L #define MMEA0_DRAM_RD_LAZY__GROUP3_DELAY_MASK 0x00000E00L //MMEA0_DRAM_WR_LAZY #define MMEA0_DRAM_WR_LAZY__GROUP0_DELAY__SHIFT 0x0 #define MMEA0_DRAM_WR_LAZY__GROUP1_DELAY__SHIFT 0x3 #define MMEA0_DRAM_WR_LAZY__GROUP2_DELAY__SHIFT 0x6 #define MMEA0_DRAM_WR_LAZY__GROUP3_DELAY__SHIFT 0x9 -#define MMEA0_DRAM_WR_LAZY__GROUP0_DELAY_MASK 0x00000007L -#define MMEA0_DRAM_WR_LAZY__GROUP1_DELAY_MASK 0x00000038L -#define MMEA0_DRAM_WR_LAZY__GROUP2_DELAY_MASK 0x000001C0L -#define MMEA0_DRAM_WR_LAZY__GROUP3_DELAY_MASK 0x00000E00L +#define MMEA0_DRAM_WR_LAZY__GROUP0_DELAYMASK ` ` @ @ 0xP0000007L +#define MMEA0_dRAM_WR_LAZY__GROUP1_DELAY_MASK 0x00000038L +#define MMEA0_DRAM_WR_LAZY__ROUP2_DELAY_MAS ` ` ` 0x0000p1C0L +#define MMeA0_DRAM_WR_LAZY_GROUP3_DELAY_MASK 0x00000E00L //MMEA0_DRAM_RD_CAM_CNTL #define MMEA0_DRAM_RD_CAM_CNTL__DEPTH_GROUP0__SHIFT 0x0 #define MMEA0_DRAM_RD_CAM_CNTL__DEPTH_GROUP1__SHIFT 0x4 #define MMEA0_DRAM_RD_CAM_CNTL__DEPTH_GROUP2__SHIFT 0x8 #define MMEA0_DRAM_RD_CAM_CNTL__DEPTH_GROUP3__SHIFT 0xc -#define MMEA0_DRAM_RD_CAM_CNTL__REORDER_LIMIT_GROUP0__SHIFT 0x10 -#define MMEA0_DRAM_RD_CAM_CNTL__REORDER_LIMIT_GROUP1__SHIFT 0x13 -#define MMEA0_DRAM_RD_CAM_CNTL__REORDER_LIMIT_GROUP2__SHIFT 0x16 -#define MMEA0_DRAM_RD_CAM_CNTL__REORDER_LIMIT_GROUP3__SHIFT 0x19 +#define MMEA0_DRAM_RD_CAM_CNTL__REORDER_LIMIT_GROUP0__SHIFT @ @ 0X10 +#define MMEAP_DRAM_R$_CAM_CNtL__REORER_LIMIT_GROUP1__SHIFT @ ` 0x13 +#define MMEA0_DRAM_RD_CAM_CNTL__REORDER_LIMIT_GROUP2__SHIFT @ 0x1v +#define MMEA0_dRAM_RD_cAM_CNTL_REORDEr_LIMIT_gROUP3__sHIFT ` ` ` ` 0x19 #define MMEA0_DRAM_RD_CAM_CNTL__DEPTH_GROUP0_MASK 0x0000000FL #define MMEA0_DRAM_RD_CAM_CNTL__DEPTH_GROUP1_MASK 0x000000F0L #define MMEA0_DRAM_RD_CAM_CNTL__DEPTH_GROUP2_MASK 0x00000F00L diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c linux-dovetail-v5.15.y-dovetail/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c --- linux-5.15.26/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c 2022-03-10 09:47:50.000000000 +0100 @@ -87,6 +87,7 @@ static struct irq_chip dpu_mdss_irq_chip .name = "dpu_mdss", .irq_mask = dpu_mdss_irq_mask, .irq_unmask = dpu_mdss_irq_unmask, + .flags = IRQCHIP_PIPELINE_SAFE, }; static struct lock_class_key dpu_mdss_lock_key, dpu_mdss_request_key; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c linux-dovetail-v5.15.y-dovetail/drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c --- linux-5.15.26/drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c 2022-03-10 09:47:50.000000000 +0100 @@ -90,6 +90,7 @@ static struct irq_chip mdss_hw_irq_chip .name = "mdss", .irq_mask = mdss_hw_mask_irq, .irq_unmask = mdss_hw_unmask_irq, + .flags = IRQCHIP_PIPELINE_SAFE, }; static int mdss_hw_irqdomain_map(struct irq_domain *d, unsigned int irq, @@ -253,7 +254,7 @@ int mdp5_mdss_init(struct drm_device *de } ret = devm_request_irq(dev->dev, platform_get_irq(pdev, 0), - mdss_irq, 0, "mdss_isr", mdp5_mdss); + mdss_irq, IRQF_OOB, "mdss_isr", mdp5_mdss); if (ret) { DRM_DEV_ERROR(dev->dev, "failed to init irq: %d\n", ret); goto fail_irq; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/gpu/ipu-v3/ipu-common.c linux-dovetail-v5.15.y-dovetail/drivers/gpu/ipu-v3/ipu-common.c --- linux-5.15.26/drivers/gpu/ipu-v3/ipu-common.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/gpu/ipu-v3/ipu-common.c 2022-03-10 09:47:50.000000000 +0100 @@ -1235,6 +1235,7 @@ static int ipu_irq_init(struct ipu_soc * ct->chip.irq_ack = irq_gc_ack_set_bit; ct->chip.irq_mask = irq_gc_mask_clr_bit; ct->chip.irq_unmask = irq_gc_mask_set_bit; + ct->chip.flags = IRQCHIP_PIPELINE_SAFE; ct->regs.ack = IPU_INT_STAT(i / 32); ct->regs.mask = IPU_INT_CTRL(i / 32); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/iio/industrialio-trigger.c linux-dovetail-v5.15.y-dovetail/drivers/iio/industrialio-trigger.c --- linux-5.15.26/drivers/iio/industrialio-trigger.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/iio/industrialio-trigger.c 2022-03-10 09:47:50.000000000 +0100 @@ -584,6 +584,7 @@ struct iio_trigger *viio_trigger_alloc(s trig->subirq_chip.name = trig->name; trig->subirq_chip.irq_mask = &iio_trig_subirqmask; trig->subirq_chip.irq_unmask = &iio_trig_subirqunmask; + trig->subirq_chip.flags = IRQCHIP_PIPELINE_SAFE; for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) { irq_set_chip(trig->subirq_base + i, &trig->subirq_chip); irq_set_handler(trig->subirq_base + i, &handle_simple_irq); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/irqchip/exynos-combiner.c linux-dovetail-v5.15.y-dovetail/drivers/irqchip/exynos-combiner.c --- linux-5.15.26/drivers/irqchip/exynos-combiner.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/irqchip/exynos-combiner.c 2022-03-10 09:47:50.000000000 +0100 @@ -24,7 +24,7 @@ #define IRQ_IN_COMBINER 8 -static DEFINE_SPINLOCK(irq_controller_lock); +static DEFINE_HARD_SPINLOCK(irq_controller_lock); struct combiner_chip_data { unsigned int hwirq_offset; @@ -72,9 +72,9 @@ static void combiner_handle_cascade_irq( chained_irq_enter(chip, desc); - spin_lock(&irq_controller_lock); + raw_spin_lock(&irq_controller_lock); status = readl_relaxed(chip_data->base + COMBINER_INT_STATUS); - spin_unlock(&irq_controller_lock); + raw_spin_unlock(&irq_controller_lock); status &= chip_data->irq_mask; if (status == 0) @@ -111,6 +111,7 @@ static struct irq_chip combiner_chip = { #ifdef CONFIG_SMP .irq_set_affinity = combiner_set_affinity, #endif + .flags = IRQCHIP_PIPELINE_SAFE, }; static void __init combiner_cascade_irq(struct combiner_chip_data *combiner_data, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/irqchip/irq-bcm2835.c linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-bcm2835.c --- linux-5.15.26/drivers/irqchip/irq-bcm2835.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-bcm2835.c 2022-03-10 09:47:50.000000000 +0100 @@ -102,7 +102,8 @@ static void armctrl_unmask_irq(struct ir static struct irq_chip armctrl_chip = { .name = "ARMCTRL-level", .irq_mask = armctrl_mask_irq, - .irq_unmask = armctrl_unmask_irq + .irq_unmask = armctrl_unmask_irq, + .flags = IRQCHIP_PIPELINE_SAFE, }; static int armctrl_xlate(struct irq_domain *d, struct device_node *ctrlr, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/irqchip/irq-bcm2836.c linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-bcm2836.c --- linux-5.15.26/drivers/irqchip/irq-bcm2836.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-bcm2836.c 2022-03-10 09:47:50.000000000 +0100 @@ -58,6 +58,7 @@ static struct irq_chip bcm2836_arm_irqch .name = "bcm2836-timer", .irq_mask = bcm2836_arm_irqchip_mask_timer_irq, .irq_unmask = bcm2836_arm_irqchip_unmask_timer_irq, + .flags = IRQCHIP_PIPELINE_SAFE, }; static void bcm2836_arm_irqchip_mask_pmu_irq(struct irq_data *d) @@ -74,6 +75,7 @@ static struct irq_chip bcm2836_arm_irqch .name = "bcm2836-pmu", .irq_mask = bcm2836_arm_irqchip_mask_pmu_irq, .irq_unmask = bcm2836_arm_irqchip_unmask_pmu_irq, + .flags = IRQCHIP_PIPELINE_SAFE, }; static void bcm2836_arm_irqchip_mask_gpu_irq(struct irq_data *d) @@ -88,6 +90,7 @@ static struct irq_chip bcm2836_arm_irqch .name = "bcm2836-gpu", .irq_mask = bcm2836_arm_irqchip_mask_gpu_irq, .irq_unmask = bcm2836_arm_irqchip_unmask_gpu_irq, + .flags = IRQCHIP_PIPELINE_SAFE, }; static void bcm2836_arm_irqchip_dummy_op(struct irq_data *d) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/irqchip/irq-gic.c linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-gic.c --- linux-5.15.26/drivers/irqchip/irq-gic.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-gic.c 2022-03-10 09:47:50.000000000 +0100 @@ -87,7 +87,7 @@ struct gic_chip_data { #ifdef CONFIG_BL_SWITCHER -static DEFINE_RAW_SPINLOCK(cpu_map_lock); +static DEFINE_HARD_SPINLOCK(cpu_map_lock); #define gic_lock_irqsave(f) \ raw_spin_lock_irqsave(&cpu_map_lock, (f)) @@ -407,7 +407,8 @@ static const struct irq_chip gic_chip = .irq_set_irqchip_state = gic_irq_set_irqchip_state, .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_MASK_ON_SUSPEND, + IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_PIPELINE_SAFE, }; void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/irqchip/irq-gic-v2m.c linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-gic-v2m.c --- linux-5.15.26/drivers/irqchip/irq-gic-v2m.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-gic-v2m.c 2022-03-10 09:47:50.000000000 +0100 @@ -89,6 +89,7 @@ static struct irq_chip gicv2m_msi_irq_ch .irq_unmask = gicv2m_unmask_msi_irq, .irq_eoi = irq_chip_eoi_parent, .irq_write_msi_msg = pci_msi_domain_write_msg, + .flags = IRQCHIP_PIPELINE_SAFE, }; static struct msi_domain_info gicv2m_msi_domain_info = { @@ -130,6 +131,7 @@ static struct irq_chip gicv2m_irq_chip = .irq_eoi = irq_chip_eoi_parent, .irq_set_affinity = irq_chip_set_affinity_parent, .irq_compose_msi_msg = gicv2m_compose_msi_msg, + .flags = IRQCHIP_PIPELINE_SAFE, }; static int gicv2m_irq_gic_domain_alloc(struct irq_domain *domain, @@ -252,6 +254,7 @@ static bool is_msi_spi_valid(u32 base, u static struct irq_chip gicv2m_pmsi_irq_chip = { .name = "pMSI", + .flags = IRQCHIP_PIPELINE_SAFE, }; static struct msi_domain_ops gicv2m_pmsi_ops = { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/irqchip/irq-gic-v3.c linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-gic-v3.c --- linux-5.15.26/drivers/irqchip/irq-gic-v3.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-gic-v3.c 2022-03-10 09:47:50.000000000 +0100 @@ -1345,7 +1345,8 @@ static struct irq_chip gic_chip = { .ipi_send_mask = gic_ipi_send_mask, .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_MASK_ON_SUSPEND, + IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_PIPELINE_SAFE, }; static struct irq_chip gic_eoimode1_chip = { @@ -1364,7 +1365,8 @@ static struct irq_chip gic_eoimode1_chip .ipi_send_mask = gic_ipi_send_mask, .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_MASK_ON_SUSPEND, + IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_PIPELINE_SAFE, }; static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/irqchip/irq-imx-irqsteer.c linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-imx-irqsteer.c --- linux-5.15.26/drivers/irqchip/irq-imx-irqsteer.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-imx-irqsteer.c 2022-03-10 09:47:50.000000000 +0100 @@ -29,7 +29,7 @@ struct irqsteer_data { struct clk *ipg_clk; int irq[CHAN_MAX_OUTPUT_INT]; int irq_count; - raw_spinlock_t lock; + hard_spinlock_t lock; int reg_num; int channel; struct irq_domain *domain; @@ -74,6 +74,7 @@ static struct irq_chip imx_irqsteer_irq_ .name = "irqsteer", .irq_mask = imx_irqsteer_irq_mask, .irq_unmask = imx_irqsteer_irq_unmask, + .flags = IRQCHIP_PIPELINE_SAFE, }; static int imx_irqsteer_irq_map(struct irq_domain *h, unsigned int irq, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/irqchip/irq-omap-intc.c linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-omap-intc.c --- linux-5.15.26/drivers/irqchip/irq-omap-intc.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-omap-intc.c 2022-03-10 09:47:50.000000000 +0100 @@ -211,7 +211,7 @@ static int __init omap_alloc_gc_of(struc ct->chip.irq_mask = irq_gc_mask_disable_reg; ct->chip.irq_unmask = irq_gc_unmask_enable_reg; - ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE; + ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE; ct->regs.enable = INTC_MIR_CLEAR0 + 32 * i; ct->regs.disable = INTC_MIR_SET0 + 32 * i; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/irqchip/irq-sun4i.c linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-sun4i.c --- linux-5.15.26/drivers/irqchip/irq-sun4i.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-sun4i.c 2022-03-10 09:47:50.000000000 +0100 @@ -87,7 +87,7 @@ static struct irq_chip sun4i_irq_chip = .irq_eoi = sun4i_irq_ack, .irq_mask = sun4i_irq_mask, .irq_unmask = sun4i_irq_unmask, - .flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED, + .flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED | IRQCHIP_PIPELINE_SAFE, }; static int sun4i_irq_map(struct irq_domain *d, unsigned int virq, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/irqchip/irq-sunxi-nmi.c linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-sunxi-nmi.c --- linux-5.15.26/drivers/irqchip/irq-sunxi-nmi.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/irqchip/irq-sunxi-nmi.c 2022-03-10 09:47:50.000000000 +0100 @@ -187,7 +187,9 @@ static int __init sunxi_sc_nmi_irq_init( gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; gc->chip_types[0].chip.irq_eoi = irq_gc_ack_set_bit; gc->chip_types[0].chip.irq_set_type = sunxi_sc_nmi_set_type; - gc->chip_types[0].chip.flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED; + gc->chip_types[0].chip.flags = IRQCHIP_EOI_THREADED | + IRQCHIP_EOI_IF_HANDLED | + IRQCHIP_PIPELINE_SAFE; gc->chip_types[0].regs.ack = reg_offs->pend; gc->chip_types[0].regs.mask = reg_offs->enable; gc->chip_types[0].regs.type = reg_offs->ctrl; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/memory/omap-gpmc.c linux-dovetail-v5.15.y-dovetail/drivers/memory/omap-gpmc.c --- linux-5.15.26/drivers/memory/omap-gpmc.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/memory/omap-gpmc.c 2022-03-10 09:47:50.000000000 +0100 @@ -1409,6 +1409,7 @@ static int gpmc_setup_irq(struct gpmc_de gpmc->irq_chip.irq_mask = gpmc_irq_mask; gpmc->irq_chip.irq_unmask = gpmc_irq_unmask; gpmc->irq_chip.irq_set_type = gpmc_irq_set_type; + gpmc->irq_chip.flags = IRQCHIP_PIPELINE_SAFE; gpmc_irq_domain = irq_domain_add_linear(gpmc->dev->of_node, gpmc->nirqs, @@ -1419,7 +1420,7 @@ static int gpmc_setup_irq(struct gpmc_de return -ENODEV; } - rc = request_irq(gpmc->irq, gpmc_handle_irq, 0, "gpmc", gpmc); + rc = request_irq(gpmc->irq, gpmc_handle_irq, IRQF_OOB, "gpmc", gpmc); if (rc) { dev_err(gpmc->dev, "failed to request irq %d: %d\n", gpmc->irq, rc); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/mfd/tps65217.c linux-dovetail-v5.15.y-dovetail/drivers/mfd/tps65217.c --- linux-5.15.26/drivers/mfd/tps65217.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/mfd/tps65217.c 2022-03-10 09:47:50.000000000 +0100 @@ -84,6 +84,7 @@ static struct irq_chip tps65217_irq_chip .irq_bus_sync_unlock = tps65217_irq_sync_unlock, .irq_enable = tps65217_irq_enable, .irq_disable = tps65217_irq_disable, + .flags = IRQCHIP_PIPELINE_SAFE, }; static struct mfd_cell tps65217s[] = { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/pci/controller/dwc/pcie-designware-host.c linux-dovetail-v5.15.y-dovetail/drivers/pci/controller/dwc/pcie-designware-host.c --- linux-5.15.26/drivers/pci/controller/dwc/pcie-designware-host.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/pci/controller/dwc/pcie-designware-host.c 2022-03-10 09:47:50.000000000 +0100 @@ -44,6 +44,7 @@ static struct irq_chip dw_pcie_msi_irq_c .irq_ack = dw_msi_ack_irq, .irq_mask = dw_msi_mask_irq, .irq_unmask = dw_msi_unmask_irq, + .flags = IRQCHIP_PIPELINE_SAFE, }; static struct msi_domain_info dw_pcie_msi_domain_info = { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/pci/controller/pcie-brcmstb.c linux-dovetail-v5.15.y-dovetail/drivers/pci/controller/pcie-brcmstb.c --- linux-5.15.26/drivers/pci/controller/pcie-brcmstb.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/pci/controller/pcie-brcmstb.c 2022-03-10 09:47:50.000000000 +0100 @@ -465,6 +465,7 @@ static struct irq_chip brcm_msi_irq_chip .irq_ack = irq_chip_ack_parent, .irq_mask = pci_msi_mask_irq, .irq_unmask = pci_msi_unmask_irq, + .flags = IRQCHIP_PIPELINE_SAFE, }; static struct msi_domain_info brcm_msi_domain_info = { @@ -527,6 +528,7 @@ static struct irq_chip brcm_msi_bottom_i .irq_compose_msi_msg = brcm_msi_compose_msi_msg, .irq_set_affinity = brcm_msi_set_affinity, .irq_ack = brcm_msi_ack_irq, + .flags = IRQCHIP_PIPELINE_SAFE, }; static int brcm_msi_alloc(struct brcm_msi *msi) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/pinctrl/bcm/pinctrl-bcm2835.c linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/bcm/pinctrl-bcm2835.c --- linux-5.15.26/drivers/pinctrl/bcm/pinctrl-bcm2835.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/bcm/pinctrl-bcm2835.c 2022-03-10 09:47:50.000000000 +0100 @@ -88,7 +88,7 @@ struct bcm2835_pinctrl { struct pinctrl_desc pctl_desc; struct pinctrl_gpio_range gpio_range; - raw_spinlock_t irq_lock[BCM2835_NUM_BANKS]; + hard_spinlock_t irq_lock[BCM2835_NUM_BANKS]; }; /* pins are just named GPIO0..GPIO53 */ @@ -677,7 +677,7 @@ static struct irq_chip bcm2835_gpio_irq_ .irq_mask = bcm2835_gpio_irq_disable, .irq_unmask = bcm2835_gpio_irq_enable, .irq_set_wake = bcm2835_gpio_irq_set_wake, - .flags = IRQCHIP_MASK_ON_SUSPEND, + .flags = IRQCHIP_MASK_ON_SUSPEND|IRQCHIP_PIPELINE_SAFE, }; static int bcm2835_pctl_get_groups_count(struct pinctrl_dev *pctldev) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/pinctrl/intel/pinctrl-cherryview.c linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/intel/pinctrl-cherryview.c --- linux-5.15.26/drivers/pinctrl/intel/pinctrl-cherryview.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/intel/pinctrl-cherryview.c 2022-03-10 09:47:50.000000000 +0100 @@ -562,7 +562,7 @@ static const struct intel_pinctrl_soc_da * See Intel Atom Z8000 Processor Series Specification Update (Rev. 005), * errata #CHT34, for further information. */ -static DEFINE_RAW_SPINLOCK(chv_lock); +static DEFINE_HARD_SPINLOCK(chv_lock); static u32 chv_pctrl_readl(struct intel_pinctrl *pctrl, unsigned int offset) { @@ -1553,7 +1553,8 @@ static int chv_gpio_probe(struct intel_p pctrl->irqchip.irq_mask = chv_gpio_irq_mask; pctrl->irqchip.irq_unmask = chv_gpio_irq_unmask; pctrl->irqchip.irq_set_type = chv_gpio_irq_type; - pctrl->irqchip.flags = IRQCHIP_SKIP_SET_WAKE; + pctrl->irqchip.flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_PIPELINE_SAFE; chip->irq.chip = &pctrl->irqchip; chip->irq.init_hw = chv_gpio_irq_init_hw; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/pinctrl/qcom/pinctrl-msm.c linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/qcom/pinctrl-msm.c --- linux-5.15.26/drivers/pinctrl/qcom/pinctrl-msm.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/qcom/pinctrl-msm.c 2022-03-10 09:47:50.000000000 +0100 @@ -68,7 +68,7 @@ struct msm_pinctrl { bool intr_target_use_scm; - raw_spinlock_t lock; + hard_spinlock_t lock; DECLARE_BITMAP(dual_edge_irqs, MAX_NR_GPIO); DECLARE_BITMAP(enabled_irqs, MAX_NR_GPIO); @@ -1271,7 +1271,8 @@ static int msm_gpio_init(struct msm_pinc pctrl->irq_chip.irq_set_vcpu_affinity = msm_gpio_irq_set_vcpu_affinity; pctrl->irq_chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SET_TYPE_MASKED | - IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND; + IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND | + IRQCHIP_PIPELINE_SAFE; np = of_parse_phandle(pctrl->dev->of_node, "wakeup-parent", 0); if (np) { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/pinctrl/samsung/pinctrl-exynos.c linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/samsung/pinctrl-exynos.c --- linux-5.15.26/drivers/pinctrl/samsung/pinctrl-exynos.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/samsung/pinctrl-exynos.c 2022-03-10 09:47:50.000000000 +0100 @@ -216,6 +216,7 @@ static const struct exynos_irq_chip exyn .irq_set_type = exynos_irq_set_type, .irq_request_resources = exynos_irq_request_resources, .irq_release_resources = exynos_irq_release_resources, + .flags = IRQCHIP_PIPELINE_SAFE, }, .eint_con = EXYNOS_GPIO_ECON_OFFSET, .eint_mask = EXYNOS_GPIO_EMASK_OFFSET, @@ -288,7 +289,7 @@ __init int exynos_eint_gpio_init(struct } ret = devm_request_irq(dev, d->irq, exynos_eint_gpio_irq, - 0, dev_name(dev), d); + IRQF_OOB, dev_name(dev), d); if (ret) { dev_err(dev, "irq request failed\n"); return -ENXIO; @@ -306,6 +307,7 @@ __init int exynos_eint_gpio_init(struct goto err_domains; } bank->irq_chip->chip.name = bank->name; + bank->irq_chip->chip.flags |= IRQCHIP_PIPELINE_SAFE; bank->irq_domain = irq_domain_add_linear(bank->of_node, bank->nr_pins, &exynos_eint_irqd_ops, bank); @@ -409,6 +411,7 @@ static const struct exynos_irq_chip s5pv .irq_set_wake = exynos_wkup_irq_set_wake, .irq_request_resources = exynos_irq_request_resources, .irq_release_resources = exynos_irq_release_resources, + .flags = IRQCHIP_PIPELINE_SAFE, }, .eint_con = EXYNOS_WKUP_ECON_OFFSET, .eint_mask = EXYNOS_WKUP_EMASK_OFFSET, @@ -429,6 +432,7 @@ static const struct exynos_irq_chip exyn .irq_set_wake = exynos_wkup_irq_set_wake, .irq_request_resources = exynos_irq_request_resources, .irq_release_resources = exynos_irq_release_resources, + .flags = IRQCHIP_PIPELINE_SAFE, }, .eint_con = EXYNOS_WKUP_ECON_OFFSET, .eint_mask = EXYNOS_WKUP_EMASK_OFFSET, @@ -448,6 +452,7 @@ static const struct exynos_irq_chip exyn .irq_set_wake = exynos_wkup_irq_set_wake, .irq_request_resources = exynos_irq_request_resources, .irq_release_resources = exynos_irq_release_resources, + .flags = IRQCHIP_PIPELINE_SAFE, }, .eint_con = EXYNOS7_WKUP_ECON_OFFSET, .eint_mask = EXYNOS7_WKUP_EMASK_OFFSET, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/pinctrl/samsung/pinctrl-samsung.h linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/samsung/pinctrl-samsung.h --- linux-5.15.26/drivers/pinctrl/samsung/pinctrl-samsung.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/samsung/pinctrl-samsung.h 2022-03-10 09:47:50.000000000 +0100 @@ -171,7 +171,7 @@ struct samsung_pin_bank { struct gpio_chip gpio_chip; struct pinctrl_gpio_range grange; struct exynos_irq_chip *irq_chip; - raw_spinlock_t slock; + hard_spinlock_t slock; u32 pm_save[PINCFG_TYPE_NUM + 1]; /* +1 to handle double CON registers*/ }; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/pinctrl/sunxi/pinctrl-sunxi.c linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/sunxi/pinctrl-sunxi.c --- linux-5.15.26/drivers/pinctrl/sunxi/pinctrl-sunxi.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/sunxi/pinctrl-sunxi.c 2022-03-10 09:47:50.000000000 +0100 @@ -1076,7 +1076,7 @@ static struct irq_chip sunxi_pinctrl_edg .irq_release_resources = sunxi_pinctrl_irq_release_resources, .irq_set_type = sunxi_pinctrl_irq_set_type, .irq_set_wake = sunxi_pinctrl_irq_set_wake, - .flags = IRQCHIP_MASK_ON_SUSPEND, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, }; static struct irq_chip sunxi_pinctrl_level_irq_chip = { @@ -1094,7 +1094,8 @@ static struct irq_chip sunxi_pinctrl_lev .irq_set_wake = sunxi_pinctrl_irq_set_wake, .flags = IRQCHIP_EOI_THREADED | IRQCHIP_MASK_ON_SUSPEND | - IRQCHIP_EOI_IF_HANDLED, + IRQCHIP_EOI_IF_HANDLED | + IRQCHIP_PIPELINE_SAFE, }; static int sunxi_pinctrl_irq_of_xlate(struct irq_domain *d, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/pinctrl/sunxi/pinctrl-sunxi.h linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/sunxi/pinctrl-sunxi.h --- linux-5.15.26/drivers/pinctrl/sunxi/pinctrl-sunxi.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/pinctrl/sunxi/pinctrl-sunxi.h 2022-03-10 09:47:50.000000000 +0100 @@ -167,7 +167,7 @@ struct sunxi_pinctrl { unsigned ngroups; int *irq; unsigned *irq_array; - raw_spinlock_t lock; + hard_spinlock_t lock; struct pinctrl_dev *pctl_dev; unsigned long variant; }; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/soc/qcom/smp2p.c linux-dovetail-v5.15.y-dovetail/drivers/soc/qcom/smp2p.c --- linux-5.15.26/drivers/soc/qcom/smp2p.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/soc/qcom/smp2p.c 2022-03-10 09:47:50.000000000 +0100 @@ -282,6 +282,7 @@ static struct irq_chip smp2p_irq_chip = .irq_mask = smp2p_mask_irq, .irq_unmask = smp2p_unmask_irq, .irq_set_type = smp2p_set_irq_type, + .flags = IRQCHIP_PIPELINE_SAFE, }; static int smp2p_irq_map(struct irq_domain *d, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/spi/Kconfig linux-dovetail-v5.15.y-dovetail/drivers/spi/Kconfig --- linux-5.15.26/drivers/spi/Kconfig 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/spi/Kconfig 2022-03-10 09:47:50.000000000 +0100 @@ -32,6 +32,10 @@ config SPI_DEBUG Say "yes" to enable debug messaging (like dev_dbg and pr_debug), sysfs, and debugfs support in SPI controller and protocol drivers. +config SPI_OOB + def_bool n + depends on HAS_DMA && DOVETAIL + # # MASTER side ... talking to discrete SPI slave chips including microcontrollers # @@ -154,6 +158,13 @@ config SPI_BCM2835 is for the regular SPI controller. Slave mode operation is not also not supported. +config SPI_BCM2835_OOB + bool "Out-of-band support for BCM2835 SPI controller" + depends on SPI_BCM2835 && DOVETAIL + select SPI_OOB + help + Enable out-of-band cyclic transfers. + config SPI_BCM2835AUX tristate "BCM2835 SPI auxiliary controller" depends on ((ARCH_BCM2835 || ARCH_BRCMSTB) && GPIOLIB) || COMPILE_TEST diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/spi/spi-bcm2835.c linux-dovetail-v5.15.y-dovetail/drivers/spi/spi-bcm2835.c --- linux-5.15.26/drivers/spi/spi-bcm2835.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/spi/spi-bcm2835.c 2022-03-10 09:47:50.000000000 +0100 @@ -1041,18 +1041,10 @@ static int bcm2835_spi_transfer_one_poll return 0; } -static int bcm2835_spi_transfer_one(struct spi_controller *ctlr, - struct spi_device *spi, - struct spi_transfer *tfr) +static unsigned long bcm2835_get_clkdiv(struct bcm2835_spi *bs, u32 spi_hz, + u32 *effective_speed_hz) { - struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); - struct bcm2835_spidev *slv = spi_get_ctldata(spi); - unsigned long spi_hz, cdiv; - unsigned long hz_per_byte, byte_limit; - u32 cs = slv->prepare_cs; - - /* set clock */ - spi_hz = tfr->speed_hz; + unsigned long cdiv; if (spi_hz >= bs->clk_hz / 2) { cdiv = 2; /* clk_hz/2 is the fastest we can go */ @@ -1066,7 +1058,26 @@ static int bcm2835_spi_transfer_one(stru } else { cdiv = 0; /* 0 is the slowest we can go */ } - tfr->effective_speed_hz = cdiv ? (bs->clk_hz / cdiv) : (bs->clk_hz / 65536); + + *effective_speed_hz = cdiv ? (bs->clk_hz / cdiv) : (bs->clk_hz / 65536); + + return cdiv; +} + +static int bcm2835_spi_transfer_one(struct spi_controller *ctlr, + struct spi_device *spi, + struct spi_transfer *tfr) +{ + struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); + struct bcm2835_spidev *slv = spi_get_ctldata(spi); + unsigned long spi_hz, cdiv; + unsigned long hz_per_byte, byte_limit; + u32 cs = slv->prepare_cs; + + /* set clock */ + spi_hz = tfr->speed_hz; + + cdiv = bcm2835_get_clkdiv(bs, spi_hz, &tfr->effective_speed_hz); bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv); /* handle all the 3-wire mode */ @@ -1313,6 +1324,68 @@ err_cleanup: return ret; } +#ifdef CONFIG_SPI_BCM2835_OOB + +static int bcm2835_spi_prepare_oob_transfer(struct spi_controller *ctlr, + struct spi_oob_transfer *xfer) +{ + /* + * The size of a transfer is limited by DLEN which is 16-bit + * wide, and we don't want to scatter transfers in out-of-band + * mode, so cap the frame size accordingly. + */ + if (xfer->setup.frame_len > 65532) + return -EINVAL; + + return 0; +} + +static void bcm2835_spi_start_oob_transfer(struct spi_controller *ctlr, + struct spi_oob_transfer *xfer) +{ + struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); + u32 cs = bs->slv->prepare_cs, effective_speed_hz; + struct spi_device *spi = xfer->spi; + unsigned long cdiv; + + /* See bcm2835_spi_prepare_message(). */ + bcm2835_wr(bs, BCM2835_SPI_CS, cs); + + cdiv = bcm2835_get_clkdiv(bs, xfer->setup.speed_hz, &effective_speed_hz); + xfer->effective_speed_hz = effective_speed_hz; + bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv); + bcm2835_wr(bs, BCM2835_SPI_DLEN, xfer->setup.frame_len); + + if (spi->mode & SPI_3WIRE) + cs |= BCM2835_SPI_CS_REN; + bcm2835_wr(bs, BCM2835_SPI_CS, + cs | BCM2835_SPI_CS_TA | BCM2835_SPI_CS_DMAEN); +} + +static void bcm2835_spi_pulse_oob_transfer(struct spi_controller *ctlr, + struct spi_oob_transfer *xfer) +{ + struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); + + /* Reload DLEN for the next pulse. */ + bcm2835_wr(bs, BCM2835_SPI_DLEN, xfer->setup.frame_len); +} + +static void bcm2835_spi_terminate_oob_transfer(struct spi_controller *ctlr, + struct spi_oob_transfer *xfer) +{ + struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); + + bcm2835_spi_reset_hw(bs); +} + +#else +#define bcm2835_spi_prepare_oob_transfer NULL +#define bcm2835_spi_start_oob_transfer NULL +#define bcm2835_spi_pulse_oob_transfer NULL +#define bcm2835_spi_terminate_oob_transfer NULL +#endif + static int bcm2835_spi_probe(struct platform_device *pdev) { struct spi_controller *ctlr; @@ -1334,6 +1407,10 @@ static int bcm2835_spi_probe(struct plat ctlr->transfer_one = bcm2835_spi_transfer_one; ctlr->handle_err = bcm2835_spi_handle_err; ctlr->prepare_message = bcm2835_spi_prepare_message; + ctlr->prepare_oob_transfer = bcm2835_spi_prepare_oob_transfer; + ctlr->start_oob_transfer = bcm2835_spi_start_oob_transfer; + ctlr->pulse_oob_transfer = bcm2835_spi_pulse_oob_transfer; + ctlr->terminate_oob_transfer = bcm2835_spi_terminate_oob_transfer; ctlr->dev.of_node = pdev->dev.of_node; bs = spi_controller_get_devdata(ctlr); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/spi/spi.c linux-dovetail-v5.15.y-dovetail/drivers/spi/spi.c --- linux-5.15.26/drivers/spi/spi.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/spi/spi.c 2022-03-10 09:47:50.000000000 +0100 @@ -2593,6 +2593,9 @@ struct spi_controller *__spi_alloc_contr mutex_init(&ctlr->bus_lock_mutex); mutex_init(&ctlr->io_mutex); mutex_init(&ctlr->add_lock); +#ifdef CONFIG_SPI_OOB + sema_init(&ctlr->bus_oob_lock_sem, 1); +#endif ctlr->bus_num = -1; ctlr->num_chipselect = 1; ctlr->slave = slave; @@ -3896,6 +3899,22 @@ EXPORT_SYMBOL_GPL(spi_async_locked); * inline functions. */ +static void get_spi_bus(struct spi_controller *ctlr) +{ + mutex_lock(&ctlr->bus_lock_mutex); +#ifdef CONFIG_SPI_OOB + down(&ctlr->bus_oob_lock_sem); +#endif +} + +static void put_spi_bus(struct spi_controller *ctlr) +{ +#ifdef CONFIG_SPI_OOB + up(&ctlr->bus_oob_lock_sem); +#endif + mutex_unlock(&ctlr->bus_lock_mutex); +} + static void spi_complete(void *arg) { complete(arg); @@ -3980,9 +3999,9 @@ int spi_sync(struct spi_device *spi, str { int ret; - mutex_lock(&spi->controller->bus_lock_mutex); + get_spi_bus(spi->controller); ret = __spi_sync(spi, message); - mutex_unlock(&spi->controller->bus_lock_mutex); + put_spi_bus(spi->controller); return ret; } @@ -4029,7 +4048,7 @@ int spi_bus_lock(struct spi_controller * { unsigned long flags; - mutex_lock(&ctlr->bus_lock_mutex); + get_spi_bus(ctlr); spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags); ctlr->bus_lock_flag = 1; @@ -4058,7 +4077,7 @@ int spi_bus_unlock(struct spi_controller { ctlr->bus_lock_flag = 0; - mutex_unlock(&ctlr->bus_lock_mutex); + put_spi_bus(ctlr); return 0; } @@ -4143,6 +4162,274 @@ int spi_write_then_read(struct spi_devic } EXPORT_SYMBOL_GPL(spi_write_then_read); +#ifdef CONFIG_SPI_OOB + +static int bus_lock_oob(struct spi_controller *ctlr) +{ + unsigned long flags; + int ret = -EBUSY; + + mutex_lock(&ctlr->bus_lock_mutex); + + spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags); + + if (!ctlr->bus_lock_flag && !down_trylock(&ctlr->bus_oob_lock_sem)) { + ctlr->bus_lock_flag = 1; + ret = 0; + } + + spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags); + + mutex_unlock(&ctlr->bus_lock_mutex); + + return ret; +} + +static int bus_unlock_oob(struct spi_controller *ctlr) +{ + ctlr->bus_lock_flag = 0; + up(&ctlr->bus_oob_lock_sem); + + return 0; +} + +static int prepare_oob_dma(struct spi_controller *ctlr, + struct spi_oob_transfer *xfer) +{ + struct dma_async_tx_descriptor *desc; + size_t len = xfer->setup.frame_len; + dma_cookie_t cookie; + dma_addr_t addr; + int ret; + + /* TX to second half of I/O buffer. */ + addr = xfer->dma_addr + xfer->aligned_frame_len; + desc = dmaengine_prep_slave_single(ctlr->dma_tx, addr, len, + DMA_MEM_TO_DEV, + DMA_OOB_INTERRUPT|DMA_OOB_PULSE); + if (!desc) + return -EIO; + + xfer->txd = desc; + cookie = dmaengine_submit(desc); + ret = dma_submit_error(cookie); + if (ret) + return ret; + + dma_async_issue_pending(ctlr->dma_tx); + + /* RX to first half of I/O buffer. */ + addr = xfer->dma_addr; + desc = dmaengine_prep_slave_single(ctlr->dma_rx, addr, len, + DMA_DEV_TO_MEM, + DMA_OOB_INTERRUPT|DMA_OOB_PULSE); + if (!desc) { + ret = -EIO; + goto fail_rx; + } + + desc->callback = xfer->setup.xfer_done; + desc->callback_param = xfer; + + xfer->rxd = desc; + cookie = dmaengine_submit(desc); + ret = dma_submit_error(cookie); + if (ret) + goto fail_rx; + + dma_async_issue_pending(ctlr->dma_rx); + + return 0; + +fail_rx: + dmaengine_terminate_sync(ctlr->dma_tx); + + return ret; +} + +static void unprepare_oob_dma(struct spi_controller *ctlr) +{ + dmaengine_terminate_sync(ctlr->dma_rx); + dmaengine_terminate_sync(ctlr->dma_tx); +} + +/* + * A simpler version of __spi_validate() for oob transfers. + */ +static int validate_oob_xfer(struct spi_device *spi, + struct spi_oob_transfer *xfer) +{ + struct spi_controller *ctlr = spi->controller; + struct spi_oob_setup *p = &xfer->setup; + int w_size; + + if (p->frame_len == 0) + return -EINVAL; + + if (!p->bits_per_word) + p->bits_per_word = spi->bits_per_word; + + if (!p->speed_hz) + p->speed_hz = spi->max_speed_hz; + + if (ctlr->max_speed_hz && p->speed_hz > ctlr->max_speed_hz) + p->speed_hz = ctlr->max_speed_hz; + + if (__spi_validate_bits_per_word(ctlr, p->bits_per_word)) + return -EINVAL; + + if (p->bits_per_word <= 8) + w_size = 1; + else if (p->bits_per_word <= 16) + w_size = 2; + else + w_size = 4; + + if (p->frame_len % w_size) + return -EINVAL; + + if (p->speed_hz && ctlr->min_speed_hz && + p->speed_hz < ctlr->min_speed_hz) + return -EINVAL; + + return 0; +} + +int spi_prepare_oob_transfer(struct spi_device *spi, + struct spi_oob_transfer *xfer) +{ + struct spi_controller *ctlr; + dma_addr_t dma_addr; + size_t alen, iolen; + void *iobuf; + int ret; + + /* Controller must support oob transactions. */ + ctlr = spi->controller; + if (!ctlr->prepare_oob_transfer) + return -ENOTSUPP; + + /* Out-of-band transfers require DMA support. */ + if (!ctlr->can_dma) + return -ENODEV; + + ret = validate_oob_xfer(spi, xfer); + if (ret) + return ret; + + alen = L1_CACHE_ALIGN(xfer->setup.frame_len); + /* + * Allocate a single coherent I/O buffer which is twice as + * large as the user specified transfer length, TX data goes + * to the upper half, RX data to the lower half. + */ + iolen = alen * 2; + iobuf = dma_alloc_coherent(ctlr->dev.parent, iolen, + &dma_addr, GFP_KERNEL); + if (iobuf == NULL) + return -ENOMEM; + + xfer->spi = spi; + xfer->dma_addr = dma_addr; + xfer->io_buffer = iobuf; + xfer->aligned_frame_len = alen; + xfer->effective_speed_hz = 0; + + ret = prepare_oob_dma(ctlr, xfer); + if (ret) + goto fail_prep_dma; + + ret = bus_lock_oob(ctlr); + if (ret) + goto fail_bus_lock; + + ret = ctlr->prepare_oob_transfer(ctlr, xfer); + if (ret) + goto fail_prep_xfer; + + return 0; + +fail_prep_xfer: + bus_unlock_oob(ctlr); +fail_bus_lock: + unprepare_oob_dma(ctlr); +fail_prep_dma: + dma_free_coherent(ctlr->dev.parent, iolen, iobuf, dma_addr); + + return ret; +} +EXPORT_SYMBOL_GPL(spi_prepare_oob_transfer); + +void spi_start_oob_transfer(struct spi_oob_transfer *xfer) +{ + struct spi_device *spi = xfer->spi; + struct spi_controller *ctlr = spi->controller; + + ctlr->start_oob_transfer(ctlr, xfer); +} +EXPORT_SYMBOL_GPL(spi_start_oob_transfer); + +int spi_pulse_oob_transfer(struct spi_oob_transfer *xfer) /* oob stage */ +{ + struct spi_device *spi = xfer->spi; + struct spi_controller *ctlr = spi->controller; + int ret; + + if (ctlr->pulse_oob_transfer) + ctlr->pulse_oob_transfer(ctlr, xfer); + + ret = dma_pulse_oob(ctlr->dma_rx); + if (likely(!ret)) + ret = dma_pulse_oob(ctlr->dma_tx); + + return ret; +} +EXPORT_SYMBOL_GPL(spi_pulse_oob_transfer); + +void spi_terminate_oob_transfer(struct spi_oob_transfer *xfer) +{ + struct spi_device *spi = xfer->spi; + struct spi_controller *ctlr = spi->controller; + + if (ctlr->terminate_oob_transfer) + ctlr->terminate_oob_transfer(ctlr, xfer); + + unprepare_oob_dma(ctlr); + bus_unlock_oob(ctlr); + dma_free_coherent(ctlr->dev.parent, xfer->aligned_frame_len * 2, + xfer->io_buffer, xfer->dma_addr); +} +EXPORT_SYMBOL_GPL(spi_terminate_oob_transfer); + +int spi_mmap_oob_transfer(struct vm_area_struct *vma, + struct spi_oob_transfer *xfer) +{ + struct spi_device *spi = xfer->spi; + struct spi_controller *ctlr = spi->controller; + size_t len; + int ret; + + /* + * We may have an IOMMU, rely on dma_mmap_coherent() for + * dealing with the nitty-gritty details of mapping a coherent + * buffer. + */ + len = vma->vm_end - vma->vm_start; + if (spi_get_oob_iolen(xfer) <= len) + ret = dma_mmap_coherent(ctlr->dev.parent, + vma, + xfer->io_buffer, + xfer->dma_addr, + len); + else + ret = -EINVAL; + + return ret; +} +EXPORT_SYMBOL_GPL(spi_mmap_oob_transfer); + +#endif /* SPI_OOB */ + /*-------------------------------------------------------------------------*/ #if IS_ENABLED(CONFIG_OF) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/spmi/spmi-pmic-arb.c linux-dovetail-v5.15.y-dovetail/drivers/spmi/spmi-pmic-arb.c --- linux-5.15.26/drivers/spmi/spmi-pmic-arb.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/spmi/spmi-pmic-arb.c 2022-03-10 09:47:50.000000000 +0100 @@ -145,7 +145,7 @@ struct spmi_pmic_arb { void __iomem *cnfg; void __iomem *core; resource_size_t core_size; - raw_spinlock_t lock; + hard_spinlock_t lock; u8 channel; int irq; u8 ee; @@ -684,7 +684,7 @@ static struct irq_chip pmic_arb_irqchip .irq_set_type = qpnpint_irq_set_type, .irq_set_wake = qpnpint_irq_set_wake, .irq_get_irqchip_state = qpnpint_get_irqchip_state, - .flags = IRQCHIP_MASK_ON_SUSPEND, + .flags = IRQCHIP_MASK_ON_SUSPEND|IRQCHIP_PIPELINE_SAFE, }; static int qpnpint_irq_domain_translate(struct irq_domain *d, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/tty/serial/8250/8250_core.c linux-dovetail-v5.15.y-dovetail/drivers/tty/serial/8250/8250_core.c --- linux-5.15.26/drivers/tty/serial/8250/8250_core.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/tty/serial/8250/8250_core.c 2022-03-10 09:47:50.000000000 +0100 @@ -659,6 +659,48 @@ static int univ8250_console_match(struct return -ENODEV; } +#ifdef CONFIG_RAW_PRINTK + +static void raw_write_char(struct uart_8250_port *up, int c) +{ + unsigned int status, tmout = 10000; + + for (;;) { + status = serial_in(up, UART_LSR); + up->lsr_saved_flags |= status & LSR_SAVE_FLAGS; + if ((status & UART_LSR_THRE) == UART_LSR_THRE) + break; + if (--tmout == 0) + break; + cpu_relax(); + } + serial_port_out(&up->port, UART_TX, c); +} + +static void univ8250_console_write_raw(struct console *co, const char *s, + unsigned int count) +{ + struct uart_8250_port *up = &serial8250_ports[co->index]; + unsigned int ier; + + ier = serial_in(up, UART_IER); + + if (up->capabilities & UART_CAP_UUE) + serial_out(up, UART_IER, UART_IER_UUE); + else + serial_out(up, UART_IER, 0); + + while (count-- > 0) { + if (*s == '\n') + raw_write_char(up, '\r'); + raw_write_char(up, *s++); + } + + serial_out(up, UART_IER, ier); +} + +#endif + static struct console univ8250_console = { .name = "ttyS", .write = univ8250_console_write, @@ -666,6 +708,9 @@ static struct console univ8250_console = .setup = univ8250_console_setup, .exit = univ8250_console_exit, .match = univ8250_console_match, +#ifdef CONFIG_RAW_PRINTK + .write_raw = univ8250_console_write_raw, +#endif .flags = CON_PRINTBUFFER | CON_ANYTIME, .index = -1, .data = &serial8250_reg, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/tty/serial/amba-pl011.c linux-dovetail-v5.15.y-dovetail/drivers/tty/serial/amba-pl011.c --- linux-5.15.26/drivers/tty/serial/amba-pl011.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/tty/serial/amba-pl011.c 2022-03-10 09:47:50.000000000 +0100 @@ -1961,6 +1961,8 @@ static void pl011_shutdown(struct uart_p pl011_disable_uart(uap); + if (IS_ENABLED(CONFIG_RAW_PRINTK)) + clk_disable(uap->clk); /* * Shut down the clock producer */ @@ -2312,6 +2314,37 @@ static void pl011_console_putchar(struct pl011_write(ch, uap, REG_DR); } +#ifdef CONFIG_RAW_PRINTK + +static void +pl011_console_write_raw(struct console *co, const char *s, unsigned int count) +{ + struct uart_amba_port *uap = amba_ports[co->index]; + unsigned int old_cr = 0, new_cr; + + if (!uap->vendor->always_enabled) { + old_cr = pl011_read(uap, REG_CR); + new_cr = old_cr & ~UART011_CR_CTSEN; + new_cr |= UART01x_CR_UARTEN | UART011_CR_TXE; + pl011_write(new_cr, uap, REG_CR); + } + + while (count-- > 0) { + if (*s == '\n') + pl011_console_putchar(&uap->port, '\r'); + pl011_console_putchar(&uap->port, *s++); + } + + while ((pl011_read(uap, REG_FR) ^ uap->vendor->inv_fr) + & uap->vendor->fr_busy) + cpu_relax(); + + if (!uap->vendor->always_enabled) + pl011_write(old_cr, uap, REG_CR); +} + +#endif /* !CONFIG_RAW_PRINTK */ + static void pl011_console_write(struct console *co, const char *s, unsigned int count) { @@ -2441,6 +2474,9 @@ static int pl011_console_setup(struct co pl011_console_get_options(uap, &baud, &parity, &bits); } + if (IS_ENABLED(CONFIG_RAW_PRINTK)) + clk_enable(uap->clk); + return uart_set_options(&uap->port, co, baud, parity, bits, flow); } @@ -2511,6 +2547,9 @@ static struct console amba_console = { .device = uart_console_device, .setup = pl011_console_setup, .match = pl011_console_match, +#ifdef CONFIG_RAW_PRINTK + .write_raw = pl011_console_write_raw, +#endif .flags = CON_PRINTBUFFER | CON_ANYTIME, .index = -1, .data = &amba_reg, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/tty/serial/imx.c linux-dovetail-v5.15.y-dovetail/drivers/tty/serial/imx.c --- linux-5.15.26/drivers/tty/serial/imx.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/tty/serial/imx.c 2022-03-10 09:47:50.000000000 +0100 @@ -1968,24 +1968,11 @@ static void imx_uart_console_putchar(str imx_uart_writel(sport, ch, URTX0); } -/* - * Interrupts are disabled on entering - */ static void -imx_uart_console_write(struct console *co, const char *s, unsigned int count) +__imx_uart_console_write(struct imx_port *sport, const char *s, unsigned int count) { - struct imx_port *sport = imx_uart_ports[co->index]; struct imx_port_ucrs old_ucr; - unsigned long flags; unsigned int ucr1; - int locked = 1; - - if (sport->port.sysrq) - locked = 0; - else if (oops_in_progress) - locked = spin_trylock_irqsave(&sport->port.lock, flags); - else - spin_lock_irqsave(&sport->port.lock, flags); /* * First, save UCR1/2/3 and then disable interrupts @@ -2011,11 +1998,41 @@ imx_uart_console_write(struct console *c while (!(imx_uart_readl(sport, USR2) & USR2_TXDC)); imx_uart_ucrs_restore(sport, &old_ucr); +} + +/* + * Interrupts are disabled on entering + */ +static void +imx_uart_console_write(struct console *co, const char *s, unsigned int count) +{ + struct imx_port *sport = imx_uart_ports[co->index]; + unsigned long flags; + int locked = 1; + + if (sport->port.sysrq) + locked = 0; + else if (oops_in_progress) + locked = spin_trylock_irqsave(&sport->port.lock, flags); + else + spin_lock_irqsave(&sport->port.lock, flags); + + __imx_uart_console_write(sport, s, count); if (locked) spin_unlock_irqrestore(&sport->port.lock, flags); } +#ifdef CONFIG_RAW_PRINTK +static void +imx_uart_console_write_raw(struct console *co, const char *s, unsigned int count) +{ + struct imx_port *sport = imx_uart_ports[co->index]; + + return __imx_uart_console_write(sport, s, count); +} +#endif + /* * If the port was already initialised (eg, by a boot loader), * try to determine the current setup. @@ -2131,6 +2148,9 @@ static struct uart_driver imx_uart_uart_ static struct console imx_uart_console = { .name = DEV_NAME, .write = imx_uart_console_write, +#ifdef CONFIG_RAW_PRINTK + .write_raw = imx_uart_console_write_raw, +#endif .device = uart_console_device, .setup = imx_uart_console_setup, .flags = CON_PRINTBUFFER, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/tty/serial/samsung_tty.c linux-dovetail-v5.15.y-dovetail/drivers/tty/serial/samsung_tty.c --- linux-5.15.26/drivers/tty/serial/samsung_tty.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/tty/serial/samsung_tty.c 2022-03-10 09:47:50.000000000 +0100 @@ -2628,6 +2628,10 @@ static struct console s3c24xx_serial_con .flags = CON_PRINTBUFFER, .index = -1, .write = s3c24xx_serial_console_write, +#ifdef CONFIG_RAW_PRINTK + /* The common write handler can run from atomic context. */ + .write_raw = s3c24xx_serial_console_write, +#endif .setup = s3c24xx_serial_console_setup, .data = &s3c24xx_uart_drv, }; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/drivers/tty/serial/st-asc.c linux-dovetail-v5.15.y-dovetail/drivers/tty/serial/st-asc.c --- linux-5.15.26/drivers/tty/serial/st-asc.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/drivers/tty/serial/st-asc.c 2022-03-10 09:47:50.000000000 +0100 @@ -904,6 +904,29 @@ static void asc_console_write(struct con spin_unlock_irqrestore(&port->lock, flags); } +#ifdef CONFIG_RAW_PRINTK + +static void asc_console_write_raw(struct console *co, + const char *s, unsigned int count) +{ + struct uart_port *port = &asc_ports[co->index].port; + unsigned long timeout = 1000000; + u32 intenable; + + intenable = asc_in(port, ASC_INTEN); + asc_out(port, ASC_INTEN, 0); + (void)asc_in(port, ASC_INTEN); /* Defeat bus write posting */ + + uart_console_write(port, s, count, asc_console_putchar); + + while (timeout-- && !asc_txfifo_is_empty(port)) + cpu_relax(); /* wait shorter */ + + asc_out(port, ASC_INTEN, intenable); +} + +#endif + static int asc_console_setup(struct console *co, char *options) { struct asc_port *ascport; @@ -936,6 +959,9 @@ static struct console asc_console = { .name = ASC_SERIAL_NAME, .device = uart_console_device, .write = asc_console_write, +#ifdef CONFIG_RAW_PRINTK + .write_raw = asc_console_write_raw, +#endif .setup = asc_console_setup, .flags = CON_PRINTBUFFER, .index = -1, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/fs/eventfd.c linux-dovetail-v5.15.y-dovetail/fs/eventfd.c --- linux-5.15.26/fs/eventfd.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/fs/eventfd.c 2022-03-10 09:47:50.000000000 +0100 @@ -262,17 +262,17 @@ static ssize_t eventfd_read(struct kiocb return sizeof(ucnt); } -static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, - loff_t *ppos) +static ssize_t eventfd_write(struct kiocb *iocb, struct iov_iter *from) { + struct file *file = iocb->ki_filp; struct eventfd_ctx *ctx = file->private_data; ssize_t res; __u64 ucnt; DECLARE_WAITQUEUE(wait, current); - if (count < sizeof(ucnt)) + if (iov_iter_count(from) < sizeof(ucnt)) return -EINVAL; - if (copy_from_user(&ucnt, buf, sizeof(ucnt))) + if (copy_from_iter(&ucnt, sizeof(ucnt), from) != sizeof(ucnt)) return -EFAULT; if (ucnt == ULLONG_MAX) return -EINVAL; @@ -329,7 +329,7 @@ static const struct file_operations even .release = eventfd_release, .poll = eventfd_poll, .read_iter = eventfd_read, - .write = eventfd_write, + .write_iter = eventfd_write, .llseek = noop_llseek, }; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/fs/exec.c linux-dovetail-v5.15.y-dovetail/fs/exec.c --- linux-5.15.26/fs/exec.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/fs/exec.c 2022-03-10 09:47:50.000000000 +0100 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -973,6 +974,7 @@ static int exec_mmap(struct mm_struct *m struct task_struct *tsk; struct mm_struct *old_mm, *active_mm; int ret; + unsigned long flags; /* Notify parent that we're no longer interested in the old VM */ tsk = current; @@ -1005,6 +1007,7 @@ static int exec_mmap(struct mm_struct *m local_irq_disable(); active_mm = tsk->active_mm; + protect_inband_mm(flags); tsk->active_mm = mm; tsk->mm = mm; /* @@ -1013,10 +1016,17 @@ static int exec_mmap(struct mm_struct *m * lazy tlb mm refcounting when these are updated by context * switches. Not all architectures can handle irqs off over * activate_mm yet. + * + * irq_pipeline: activate_mm() allowing irqs off context is a + * requirement. e.g. TLB shootdown must not involve IPIs. We + * make sure protect_inband_mm() is in effect while switching + * in and activating the new mm by forcing + * CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM on. */ if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); activate_mm(active_mm, mm); + unprotect_inband_mm(flags); if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); tsk->mm->vmacache_seqnum = 0; @@ -1303,6 +1313,9 @@ int begin_new_exec(struct linux_binprm * if (retval) goto out_unlock; + /* Tell Dovetail about the ongoing exec(). */ + arch_dovetail_exec_prepare(); + /* * Ensure that the uaccess routines can actually operate on userspace * pointers: diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/fs/fcntl.c linux-dovetail-v5.15.y-dovetail/fs/fcntl.c --- linux-5.15.26/fs/fcntl.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/fs/fcntl.c 2022-03-10 09:47:50.000000000 +0100 @@ -1045,7 +1045,7 @@ static int __init fcntl_init(void) * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ - BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != + BUILD_BUG_ON(22 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | __FMODE_EXEC | __FMODE_NONOTIFY)); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/fs/file.c linux-dovetail-v5.15.y-dovetail/fs/file.c --- linux-5.15.26/fs/file.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/fs/file.c 2022-03-10 09:47:50.000000000 +0100 @@ -400,6 +400,7 @@ static struct fdtable *close_files(struc if (set & 1) { struct file * file = xchg(&fdt->fd[i], NULL); if (file) { + uninstall_inband_fd(i, file, files); filp_close(file, files); cond_resched(); } @@ -583,6 +584,7 @@ void fd_install(unsigned int fd, struct fdt = files_fdtable(files); BUG_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); + install_inband_fd(fd, file, files); spin_unlock(&files->file_lock); return; } @@ -591,6 +593,7 @@ void fd_install(unsigned int fd, struct fdt = rcu_dereference_sched(files->fdt); BUG_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); + install_inband_fd(fd, file, files); rcu_read_unlock_sched(); } @@ -624,6 +627,7 @@ static struct file *pick_file(struct fil } rcu_assign_pointer(fdt->fd[fd], NULL); __put_unused_fd(files, fd); + uninstall_inband_fd(fd, file, files); out_unlock: spin_unlock(&files->file_lock); @@ -780,6 +784,7 @@ int __close_fd_get_file(unsigned int fd, goto out_err; rcu_assign_pointer(fdt->fd[fd], NULL); __put_unused_fd(files, fd); + uninstall_inband_fd(fd, file, files); get_file(file); *res = file; return 0; @@ -831,6 +836,7 @@ void do_close_on_exec(struct files_struc continue; rcu_assign_pointer(fdt->fd[fd], NULL); __put_unused_fd(files, fd); + uninstall_inband_fd(fd, file, files); spin_unlock(&files->file_lock); filp_close(file, files); cond_resched(); @@ -1105,6 +1111,7 @@ __releases(&files->file_lock) __set_close_on_exec(fd, fdt); else __clear_close_on_exec(fd, fdt); + replace_inband_fd(fd, file, files); spin_unlock(&files->file_lock); if (tofree) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/fs/ioctl.c linux-dovetail-v5.15.y-dovetail/fs/ioctl.c --- linux-5.15.26/fs/ioctl.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/fs/ioctl.c 2022-03-10 09:47:50.000000000 +0100 @@ -911,6 +911,22 @@ long compat_ptr_ioctl(struct file *file, } EXPORT_SYMBOL(compat_ptr_ioctl); +/** + * compat_ptr_oob_ioctl - generic implementation of .compat_oob_ioctl file operation + * + * The equivalent of compat_ptr_ioctl, dealing with out-of-band ioctl + * calls. Management of this handler is delegated to the code + * implementing the out-of-band ioctl() syscall in the companion core. + */ +long compat_ptr_oob_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + if (!file->f_op->oob_ioctl) + return -ENOIOCTLCMD; + + return file->f_op->oob_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); +} +EXPORT_SYMBOL(compat_ptr_oob_ioctl); + COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, compat_ulong_t, arg) { Binärdateien linux-5.15.26/fs/xfs/libxfs/xfs_dir2_block.c und linux-dovetail-v5.15.y-dovetail/fs/xfs/libxfs/xfs_dir2_block.c sind verschieden. diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/fs/xfs/libxfs/xfs_dir2.c linux-dovetail-v5.15.y-dovetail/fs/xfs/libxfs/xfs_dir2.c --- linux-5.15.26/fs/xfs/libxfs/xfs_dir2.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/fs/xfs/libxfs/xfs_dir2.c 2022-03-10 09:47:50.000000000 +0100 @@ -690,9 +690,9 @@ xfs_dir2_shrink_inode( if (dp->i_disk_size > xfs_dir2_db_off_to_byte(args->geo, db + 1, 0)) return 0; bno = da; - if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) { - /* - * This can't really happen unless there's kernel corruption. + if ((error = 8fs_bmap?last_be&ore(tp, dp, &bn/, XFS_D!TA_FORK))) { + O* + * his can't reall9 happen unless 4here's +ernel corruptio.. */ return error; } @@ -722,26 +722,24 @@ xfs_dir2_namecheck( return false; /* There shouldn't be any slashes or nulls here */ - return !memchr(name, '/', length) && !memchr(name, 0, length); + return !memchr(name, '/', length) && !memchr(name, 0, lenth); } -xfs_dahash_t -xfs_dir2_hashname( - struct xfs_mount *mp, - struct xfs_name *name) -{ - if (unlikely(xfs_has_asciici(mp))) +xfs_dahsh_t +xf_dir2_h!shname( + struct xfs_mou.t *mp, + struct Xfs_nameI *name)J{ + if (unlikelyxfs_hasasciicihmp))) return xfs_ascii_ci_hashname(name); return xfs_da_hashname(name->name, name->len); } enum xfs_dacmp xfs_dir2_compname( - struct xfs_da_args *args, - const unsigned char *name, - int len) + strucT xfs_da_args *aRgs, + coNst unsiGned chaR *name,J int len) { - if (unlikely(xfs_has_asciici(args->dp->i_mount))) - return xfs_ascii_ci_compname(args, name, len); + if (unliKely(xfs_has_ascIici(argS->dp->i_mount))I + retuRn xfs_ascii_ci_compname(args, name, len); return xfs_da_compname(args, name, len); } Binärdateien linux-5.15.26/fs/xfs/libxfs/xfs_dir2.h und linux-dovetail-v5.15.y-dovetail/fs/xfs/libxfs/xfs_dir2.h sind verschieden. diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/asm-generic/atomic.h linux-dovetail-v5.15.y-dovetail/include/asm-generic/atomic.h --- linux-5.15.26/include/asm-generic/atomic.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/asm-generic/atomic.h 2022-03-10 09:47:50.000000000 +0100 @@ -59,9 +59,9 @@ static inline void generic_atomic_##op(i { \ unsigned long flags; \ \ - raw_local_irq_save(flags); \ + flags = hard_local_irq_save(); \ v->counter = v->counter c_op i; \ - raw_local_irq_restore(flags); \ + hard_local_irq_restore(flags); \ } #define ATOMIC_OP_RETURN(op, c_op) \ @@ -70,9 +70,9 @@ static inline int generic_atomic_##op##_ unsigned long flags; \ int ret; \ \ - raw_local_irq_save(flags); \ + flags = hard_local_irq_save(); \ ret = (v->counter = v->counter c_op i); \ - raw_local_irq_restore(flags); \ + hard_local_irq_restore(flags); \ \ return ret; \ } @@ -83,10 +83,10 @@ static inline int generic_atomic_fetch_# unsigned long flags; \ int ret; \ \ - raw_local_irq_save(flags); \ + flags = hard_local_irq_save(); \ ret = v->counter; \ v->counter = v->counter c_op i; \ - raw_local_irq_restore(flags); \ + hard_local_irq_restore(flags); \ \ return ret; \ } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/asm-generic/cmpxchg.h linux-dovetail-v5.15.y-dovetail/include/asm-generic/cmpxchg.h --- linux-5.15.26/include/asm-generic/cmpxchg.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/asm-generic/cmpxchg.h 2022-03-10 09:47:50.000000000 +0100 @@ -30,10 +30,10 @@ unsigned long __generic_xchg(unsigned lo #ifdef __xchg_u8 return __xchg_u8(x, ptr); #else - local_irq_save(flags); + flags = hard_local_irq_save(); ret = *(volatile u8 *)ptr; *(volatile u8 *)ptr = x; - local_irq_restore(flags); + hard_local_irq_restore(flags); return ret; #endif /* __xchg_u8 */ @@ -41,10 +41,10 @@ unsigned long __generic_xchg(unsigned lo #ifdef __xchg_u16 return __xchg_u16(x, ptr); #else - local_irq_save(flags); + flags = hard_local_irq_save(); ret = *(volatile u16 *)ptr; *(volatile u16 *)ptr = x; - local_irq_restore(flags); + hard_local_irq_restore(flags); return ret; #endif /* __xchg_u16 */ @@ -52,10 +52,10 @@ unsigned long __generic_xchg(unsigned lo #ifdef __xchg_u32 return __xchg_u32(x, ptr); #else - local_irq_save(flags); + flags = hard_local_irq_save(); ret = *(volatile u32 *)ptr; *(volatile u32 *)ptr = x; - local_irq_restore(flags); + hard_local_irq_restore(flags); return ret; #endif /* __xchg_u32 */ @@ -64,10 +64,10 @@ unsigned long __generic_xchg(unsigned lo #ifdef __xchg_u64 return __xchg_u64(x, ptr); #else - local_irq_save(flags); + flags = hard_local_irq_save(); ret = *(volatile u64 *)ptr; *(volatile u64 *)ptr = x; - local_irq_restore(flags); + hard_local_irq_restore(flags); return ret; #endif /* __xchg_u64 */ #endif /* CONFIG_64BIT */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/asm-generic/cmpxchg-local.h linux-dovetail-v5.15.y-dovetail/include/asm-generic/cmpxchg-local.h --- linux-5.15.26/include/asm-generic/cmpxchg-local.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/asm-generic/cmpxchg-local.h 2022-03-10 09:47:50.000000000 +0100 @@ -23,7 +23,7 @@ static inline unsigned long __generic_cm if (size == 8 && sizeof(unsigned long) != 8) wrong_size_cmpxchg(ptr); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); switch (size) { case 1: prev = *(u8 *)ptr; if (prev == old) @@ -44,7 +44,7 @@ static inline unsigned long __generic_cm default: wrong_size_cmpxchg(ptr); } - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); return prev; } @@ -57,11 +57,11 @@ static inline u64 __generic_cmpxchg64_lo u64 prev; unsigned long flags; - raw_local_irq_save(flags); + flags = hard_local_irq_save(); prev = *(u64 *)ptr; if (prev == old) *(u64 *)ptr = new; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); return prev; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/asm-generic/irq_pipeline.h linux-dovetail-v5.15.y-dovetail/include/asm-generic/irq_pipeline.h --- linux-5.15.26/include/asm-generic/irq_pipeline.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/asm-generic/irq_pipeline.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,109 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2016 Philippe Gerum . + */ +#ifndef __ASM_GENERIC_IRQ_PIPELINE_H +#define __ASM_GENERIC_IRQ_PIPELINE_H + +#include +#include + +#ifdef CONFIG_IRQ_PIPELINE + +unsigned long inband_irq_save(void); +void inband_irq_restore(unsigned long flags); +void inband_irq_enable(void); +void inband_irq_disable(void); +int inband_irqs_disabled(void); + +#define hard_cond_local_irq_enable() hard_local_irq_enable() +#define hard_cond_local_irq_disable() hard_local_irq_disable() +#define hard_cond_local_irq_save() hard_local_irq_save() +#define hard_cond_local_irq_restore(__flags) hard_local_irq_restore(__flags) + +#define hard_local_irq_save() native_irq_save() +#define hard_local_irq_restore(__flags) native_irq_restore(__flags) +#define hard_local_irq_enable() native_irq_enable() +#define hard_local_irq_disable() native_irq_disable() +#define hard_local_save_flags() native_save_flags() + +#define hard_irqs_disabled() native_irqs_disabled() +#define hard_irqs_disabled_flags(__flags) native_irqs_disabled_flags(__flags) + +void irq_pipeline_nmi_enter(void); +void irq_pipeline_nmi_exit(void); + +/* Swap then merge virtual and hardware interrupt states. */ +#define irqs_merge_flags(__flags, __stalled) \ + ({ \ + unsigned long __combo = \ + arch_irqs_virtual_to_native_flags(__stalled) | \ + arch_irqs_native_to_virtual_flags(__flags); \ + __combo; \ + }) + +/* Extract swap virtual and hardware interrupt states. */ +#define irqs_split_flags(__combo, __stall_r) \ + ({ \ + unsigned long __virt = (__combo); \ + *(__stall_r) = hard_irqs_disabled_flags(__combo); \ + __virt &= ~arch_irqs_virtual_to_native_flags(*(__stall_r)); \ + arch_irqs_virtual_to_native_flags(__virt); \ + }) + +#define hard_local_irq_sync() native_irq_sync() + +#else /* !CONFIG_IRQ_PIPELINE */ + +#define hard_local_save_flags() ({ unsigned long __flags; \ + raw_local_save_flags(__flags); __flags; }) +#define hard_local_irq_enable() raw_local_irq_enable() +#define hard_local_irq_disable() raw_local_irq_disable() +#define hard_local_irq_save() ({ unsigned long __flags; \ + raw_local_irq_save(__flags); __flags; }) +#define hard_local_irq_restore(__flags) raw_local_irq_restore(__flags) + +#define hard_cond_local_irq_enable() do { } while(0) +#define hard_cond_local_irq_disable() do { } while(0) +#define hard_cond_local_irq_save() 0 +#define hard_cond_local_irq_restore(__flags) do { (void)(__flags); } while(0) + +#define hard_irqs_disabled() irqs_disabled() +#define hard_irqs_disabled_flags(__flags) raw_irqs_disabled_flags(__flags) + +static inline void irq_pipeline_nmi_enter(void) { } +static inline void irq_pipeline_nmi_exit(void) { } + +#define hard_local_irq_sync() do { } while (0) + +#endif /* !CONFIG_IRQ_PIPELINE */ + +#ifdef CONFIG_DEBUG_IRQ_PIPELINE +void check_inband_stage(void); +#define check_hard_irqs_disabled() \ + WARN_ON_ONCE(!hard_irqs_disabled()) +#else +static inline void check_inband_stage(void) { } +static inline int check_hard_irqs_disabled(void) { return 0; } +#endif + +extern bool irq_pipeline_oopsing; + +static __always_inline bool irqs_pipelined(void) +{ + return IS_ENABLED(CONFIG_IRQ_PIPELINE); +} + +static __always_inline bool irq_pipeline_debug(void) +{ + return IS_ENABLED(CONFIG_DEBUG_IRQ_PIPELINE) && + !irq_pipeline_oopsing; +} + +static __always_inline bool irq_pipeline_debug_locking(void) +{ + return IS_ENABLED(CONFIG_DEBUG_HARD_LOCKS); +} + +#endif /* __ASM_GENERIC_IRQ_PIPELINE_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/asm-generic/percpu.h linux-dovetail-v5.15.y-dovetail/include/asm-generic/percpu.h --- linux-5.15.26/include/asm-generic/percpu.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/asm-generic/percpu.h 2022-03-10 09:47:50.000000000 +0100 @@ -125,9 +125,9 @@ do { \ ({ \ typeof(pcp) ___ret; \ unsigned long ___flags; \ - raw_local_irq_save(___flags); \ + ___flags = hard_local_irq_save(); \ ___ret = raw_cpu_generic_read(pcp); \ - raw_local_irq_restore(___flags); \ + hard_local_irq_restore(___flags); \ ___ret; \ }) @@ -144,9 +144,9 @@ do { \ #define this_cpu_generic_to_op(pcp, val, op) \ do { \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ raw_cpu_generic_to_op(pcp, val, op); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ } while (0) @@ -154,9 +154,9 @@ do { \ ({ \ typeof(pcp) __ret; \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ __ret = raw_cpu_generic_add_return(pcp, val); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ __ret; \ }) @@ -164,9 +164,9 @@ do { \ ({ \ typeof(pcp) __ret; \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ __ret = raw_cpu_generic_xchg(pcp, nval); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ __ret; \ }) @@ -174,9 +174,9 @@ do { \ ({ \ typeof(pcp) __ret; \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ __ret; \ }) @@ -184,10 +184,10 @@ do { \ ({ \ int __ret; \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ __ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ oval1, oval2, nval1, nval2); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ __ret; \ }) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/dovetail/irq.h linux-dovetail-v5.15.y-dovetail/include/dovetail/irq.h --- linux-5.15.26/include/dovetail/irq.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/dovetail/irq.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DOVETAIL_IRQ_H +#define _DOVETAIL_IRQ_H + +/* Placeholders for pre- and post-IRQ handling. */ + +static inline void irq_enter_pipeline(void) { } + +static inline void irq_exit_pipeline(void) { } + +#endif /* !_DOVETAIL_IRQ_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/dovetail/mm_info.h linux-dovetail-v5.15.y-dovetail/include/dovetail/mm_info.h --- linux-5.15.26/include/dovetail/mm_info.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/dovetail/mm_info.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DOVETAIL_MM_INFO_H +#define _DOVETAIL_MM_INFO_H + +/* + * Placeholder for per-mm state information defined by the co-kernel. + */ + +struct oob_mm_state { +}; + +#endif /* !_DOVETAIL_MM_INFO_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/dovetail/netdevice.h linux-dovetail-v5.15.y-dovetail/include/dovetail/netdevice.h --- linux-5.15.26/include/dovetail/netdevice.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/dovetail/netdevice.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DOVETAIL_NETDEVICE_H +#define _DOVETAIL_NETDEVICE_H + +/* + * Placeholder for per-device state information defined by the + * out-of-band network stack. + */ + +struct oob_netdev_state { +}; + +#endif /* !_DOVETAIL_NETDEVICE_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/dovetail/poll.h linux-dovetail-v5.15.y-dovetail/include/dovetail/poll.h --- linux-5.15.26/include/dovetail/poll.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/dovetail/poll.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DOVETAIL_POLL_H +#define _DOVETAIL_POLL_H + +/* + * Placeholder for the out-of-band poll operation descriptor. + */ + +struct oob_poll_wait { +}; + +#endif /* !_DOVETAIL_POLL_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/dovetail/spinlock.h linux-dovetail-v5.15.y-dovetail/include/dovetail/spinlock.h --- linux-5.15.26/include/dovetail/spinlock.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/dovetail/spinlock.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DOVETAIL_SPINLOCK_H +#define _DOVETAIL_SPINLOCK_H + +/* Placeholders for hard/hybrid spinlock modifiers. */ + +struct raw_spinlock; + +static inline void hard_spin_lock_prepare(struct raw_spinlock *lock) +{ } + +static inline void hard_spin_unlock_finish(struct raw_spinlock *lock) +{ } + +static inline void hard_spin_trylock_prepare(struct raw_spinlock *lock) +{ } + +static inline void hard_spin_trylock_fail(struct raw_spinlock *lock) +{ } + +#endif /* !_DOVETAIL_SPINLOCK_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/dovetail/thread_info.h linux-dovetail-v5.15.y-dovetail/include/dovetail/thread_info.h --- linux-5.15.26/include/dovetail/thread_info.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/dovetail/thread_info.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DOVETAIL_THREAD_INFO_H +#define _DOVETAIL_THREAD_INFO_H + +/* + * Placeholder for per-thread state information defined by the + * co-kernel. + */ + +struct oob_thread_state { +}; + +#endif /* !_DOVETAIL_THREAD_INFO_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/dt-bindings/power/meson-gxbb-power.h linux-dovetail-v5.15.y-dovetail/include/dt-bindings/power/meson-gxbb-power.h --- linux-5.15.26/include/dt-bindings/power/meson-gxbb-power.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/dt-bindings/power/meson-gxbb-power.h 2022-03-10 09:47:50.000000000 +0100 @@ -8,6 +8,6 @@ #define _DT_BINDINGS_MESON_GXBB_POWER_H #define PWRC_GXBB_VPU_ID 0 -#define PWRC_GXBB_ETHERNET_MEM_ID 1 +#defin% PWRC_G8BB_ETHE2NET_MEM?ID 1 -#endif +#%ndif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/clockchips.h linux-dovetail-v5.15.y-dovetail/include/linux/clockchips.h --- linux-5.15.26/include/linux/clockchips.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/clockchips.h 2022-03-10 09:47:50.000000000 +0100 @@ -15,6 +15,7 @@ # include # include # include +# include struct clock_event_device; struct module; @@ -31,6 +32,7 @@ struct module; * from DETACHED or SHUTDOWN. * ONESHOT_STOPPED: Device was programmed in ONESHOT mode and is temporarily * stopped. + * RESERVED: Device is controlled by an out-of-band core via a proxy. */ enum clock_event_state { CLOCK_EVT_STATE_DETACHED, @@ -38,6 +40,7 @@ enum clock_event_state { CLOCK_EVT_STATE_PERIODIC, CLOCK_EVT_STATE_ONESHOT, CLOCK_EVT_STATE_ONESHOT_STOPPED, + CLOCK_EVT_STATE_RESERVED, }; /* @@ -67,6 +70,17 @@ enum clock_event_state { */ # define CLOCK_EVT_FEAT_HRTIMER 0x000080 +/* + * Interrupt pipeline support: + * + * - Clockevent device can work with pipelined timer events (i.e. proxied). + * - Device currently delivers high-precision events via out-of-band interrupts. + * - Device acts as a proxy for timer interrupt pipelining. + */ +# define CLOCK_EVT_FEAT_PIPELINE 0x000100 +# define CLOCK_EVT_FEAT_OOB 0x000200 +# define CLOCK_EVT_FEAT_PROXY 0x000400 + /** * struct clock_event_device - clock event device descriptor * @event_handler: Assigned by the framework to be called by the low @@ -91,7 +105,7 @@ enum clock_event_state { * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration * @name: ptr to clock event name * @rating: variable to rate clock event devices - * @irq: IRQ number (only for non CPU local devices) + * @irq: IRQ number (only for non CPU local devices, or pipelined timers) * @bound_on: Bound on CPU * @cpumask: cpumask to indicate for which CPUs this device works * @list: list head for the management code @@ -137,6 +151,11 @@ static inline bool clockevent_state_deta return dev->state_use_accessors == CLOCK_EVT_STATE_DETACHED; } +static inline bool clockevent_state_reserved(struct clock_event_device *dev) +{ + return dev->state_use_accessors == CLOCK_EVT_STATE_RESERVED; +} + static inline bool clockevent_state_shutdown(struct clock_event_device *dev) { return dev->state_use_accessors == CLOCK_EVT_STATE_SHUTDOWN; @@ -157,6 +176,11 @@ static inline bool clockevent_state_ones return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT_STOPPED; } +static inline bool clockevent_is_oob(struct clock_event_device *dev) +{ + return !!(dev->features & CLOCK_EVT_FEAT_OOB); +} + /* * Calculate a multiplication factor for scaled math, which is used to convert * nanoseconds based values to clock ticks: @@ -186,6 +210,8 @@ extern int clockevents_unbind_device(str extern void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta); +extern void clockevents_switch_state(struct clock_event_device *dev, + enum clock_event_state state); extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); @@ -215,6 +241,49 @@ static inline int tick_check_broadcast_e static inline void tick_setup_hrtimer_broadcast(void) { } # endif +#ifdef CONFIG_IRQ_PIPELINE + +struct clock_proxy_device { + struct clock_event_device proxy_device; + struct clock_event_device *real_device; + void (*handle_oob_event)(struct clock_event_device *dev); + void (*__setup_handler)(struct clock_proxy_device *dev); + void (*__original_handler)(struct clock_event_device *dev); +}; + +void tick_notify_proxy(void); + +static inline +void clockevents_handle_event(struct clock_event_device *ced) +{ + /* + * If called from the in-band stage, or for delivering a + * high-precision timer event to the out-of-band stage, call + * the event handler immediately. + * + * Otherwise, ced is still the in-band tick device for the + * current CPU, so just relay the incoming tick to the in-band + * stage via tick_notify_proxy(). This situation can happen + * when all CPUs receive the same out-of-band IRQ from a given + * clock event device, but only a subset of the online CPUs has + * enabled a proxy. + */ + if (clockevent_is_oob(ced) || running_inband()) + ced->event_handler(ced); + else + tick_notify_proxy(); +} + +#else + +static inline +void clockevents_handle_event(struct clock_event_device *ced) +{ + ced->event_handler(ced); +} + +#endif /* !CONFIG_IRQ_PIPELINE */ + #else /* !CONFIG_GENERIC_CLOCKEVENTS: */ static inline void clockevents_suspend(void) { } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/clocksource.h linux-dovetail-v5.15.y-dovetail/include/linux/clocksource.h --- linux-5.15.26/include/linux/clocksource.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/clocksource.h 2022-03-10 09:47:50.000000000 +0100 @@ -13,13 +13,16 @@ #include #include #include +#include #include #include +#include #include #include #include #include #include +#include struct clocksource; struct module; @@ -29,8 +32,15 @@ struct module; #include #endif + #include +enum clocksource_vdso_type { + CLOCKSOURCE_VDSO_NONE = 0, + CLOCKSOURCE_VDSO_ARCHITECTED, + CLOCKSOURCE_VDSO_MMIO, /* <= Must be last. */ +}; + /** * struct clocksource - hardware abstraction for a free running counter * Provides mostly state-free accessors to the underlying hardware. @@ -110,6 +120,7 @@ struct clocksource { int rating; enum clocksource_ids id; enum vdso_clock_mode vdso_clock_mode; + enum clocksource_vdso_type vdso_type; unsigned long flags; int (*enable)(struct clocksource *cs); @@ -129,6 +140,36 @@ struct clocksource { struct module *owner; }; +struct clocksource_mmio { + void __iomem *reg; + struct clocksource clksrc; +}; + +struct clocksource_user_mmio { + struct clocksource_mmio mmio; + void __iomem *reg_upper; + unsigned int bits_lower; + unsigned int mask_lower; + unsigned int mask_upper; + enum clksrc_user_mmio_type type; + unsigned long phys_lower; + unsigned long phys_upper; + unsigned int id; + struct device *dev; + struct cdev cdev; + DECLARE_HASHTABLE(mappings, 10); + struct spinlock lock; + struct list_head link; +}; + +struct clocksource_mmio_regs { + void __iomem *reg_upper; + void __iomem *reg_lower; + unsigned int bits_upper; + unsigned int bits_lower; + unsigned long (*revmap)(void *); +}; + /* * Clock source flags bits:: */ @@ -273,10 +314,21 @@ extern u64 clocksource_mmio_readl_up(str extern u64 clocksource_mmio_readl_down(struct clocksource *); extern u64 clocksource_mmio_readw_up(struct clocksource *); extern u64 clocksource_mmio_readw_down(struct clocksource *); +extern u64 clocksource_dual_mmio_readw_up(struct clocksource *); +extern u64 clocksource_dual_mmio_readl_up(struct clocksource *); extern int clocksource_mmio_init(void __iomem *, const char *, unsigned long, int, unsigned, u64 (*)(struct clocksource *)); +extern int clocksource_user_mmio_init(struct clocksource_user_mmio *ucs, + const struct clocksource_mmio_regs *regs, + unsigned long hz); + +extern int clocksource_user_single_mmio_init( + void __iomem *base, const char *name, + unsigned long hz, int rating, unsigned int bits, + u64 (*read)(struct clocksource *)); + extern int clocksource_i8253_init(void); #define TIMER_OF_DECLARE(name, compat, fn) \ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/console.h linux-dovetail-v5.15.y-dovetail/include/linux/console.h --- linux-5.15.26/include/linux/console.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/console.h 2022-03-10 09:47:50.000000000 +0100 @@ -140,6 +140,7 @@ static inline int con_debug_leave(void) struct console { char name[16]; void (*write)(struct console *, const char *, unsigned); + void (*write_raw)(struct console *, const char *, unsigned); int (*read)(struct console *, char *, unsigned); struct tty_driver *(*device)(struct console *, int *); void (*unblank)(void); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/context_tracking_state.h linux-dovetail-v5.15.y-dovetail/include/linux/context_tracking_state.h --- linux-5.15.26/include/linux/context_tracking_state.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/context_tracking_state.h 2022-03-10 09:47:50.000000000 +0100 @@ -28,7 +28,7 @@ DECLARE_PER_CPU(struct context_tracking, static __always_inline bool context_tracking_enabled(void) { - return static_branch_unlikely(&context_tracking_key); + return static_branch_unlikely(&context_tracking_key) && running_inband(); } static __always_inline bool context_tracking_enabled_cpu(int cpu) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/dmaengine.h linux-dovetail-v5.15.y-dovetail/include/linux/dmaengine.h --- linux-5.15.26/include/linux/dmaengine.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/dmaengine.h 2022-03-10 09:47:50.000000000 +0100 @@ -61,6 +61,7 @@ enum dma_transaction_type { DMA_ASYNC_TX, DMA_SLAVE, DMA_CYCLIC, + DMA_OOB, DMA_INTERLEAVE, DMA_COMPLETION_NO_ORDER, DMA_REPEAT, @@ -190,6 +191,13 @@ struct dma_interleaved_template { * transaction is marked with DMA_PREP_REPEAT will cause the new transaction * to never be processed and stay in the issued queue forever. The flag is * ignored if the previous transaction is not a repeated transaction. + * @DMA_OOB_INTERRUPT - if DMA_OOB is supported, handle the completion + * interrupt for this transaction from the out-of-band stage (implies + * DMA_PREP_INTERRUPT). This includes calling the completion callback routine + * from such context if defined for the transaction. + * @DMA_OOB_PULSE - if DMA_OOB is supported, (slave) transactions on the + * out-of-band channel should be triggered manually by a call to + * dma_pulse_oob() (implies DMA_OOB_INTERRUPT). */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), @@ -202,6 +210,8 @@ enum dma_ctrl_flags { DMA_PREP_CMD = (1 << 7), DMA_PREP_REPEAT = (1 << 8), DMA_PREP_LOAD_EOT = (1 << 9), + DMA_OOB_INTERRUPT = (1 << 10), + DMA_OOB_PULSE = (1 << 11), }; /** @@ -942,6 +952,7 @@ struct dma_device { dma_cookie_t cookie, struct dma_tx_state *txstate); void (*device_issue_pending)(struct dma_chan *chan); + int (*device_pulse_oob)(struct dma_chan *chan); void (*device_release)(struct dma_device *dev); /* debugfs support */ void (*dbg_summary_show)(struct seq_file *s, struct dma_device *dev); @@ -978,6 +989,14 @@ static inline struct dma_async_tx_descri dir, flags, NULL); } +static inline bool dmaengine_oob_valid(struct dma_chan *chan, + unsigned long flags) +{ + return !(dovetailing() && + flags & (DMA_OOB_INTERRUPT|DMA_OOB_PULSE) && + !test_bit(DMA_OOB, chan->device->cap_mask.bits)); +} + static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg( struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction dir, unsigned long flags) @@ -985,6 +1004,9 @@ static inline struct dma_async_tx_descri if (!chan || !chan->device || !chan->device->device_prep_slave_sg) return NULL; + if (!dmaengine_oob_valid(chan, flags)) + return NULL; + return chan->device->device_prep_slave_sg(chan, sgl, sg_len, dir, flags, NULL); } @@ -1012,6 +1034,9 @@ static inline struct dma_async_tx_descri if (!chan || !chan->device || !chan->device->device_prep_dma_cyclic) return NULL; + if (!dmaengine_oob_valid(chan, flags)) + return NULL; + return chan->device->device_prep_dma_cyclic(chan, buf_addr, buf_len, period_len, dir, flags); } @@ -1417,6 +1442,22 @@ static inline void dma_async_issue_pendi } /** + * dma_pulse_oob - manual trigger of an out-of-band transaction + * @chan: target DMA channel + * + * Trigger the next out-of-band transaction immediately. + */ +static inline int dma_pulse_oob(struct dma_chan *chan) +{ + int ret = -ENOTSUPP; + + if (chan->device->device_pulse_oob) + ret = chan->device->device_pulse_oob(chan); + + return ret; +} + +/** * dma_async_is_tx_complete - poll for transaction completion * @chan: DMA channel * @cookie: transaction identifier to check status of diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/dovetail.h linux-dovetail-v5.15.y-dovetail/include/linux/dovetail.h --- linux-5.15.26/include/linux/dovetail.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/dovetail.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,318 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2016 Philippe Gerum . + */ +#ifndef _LINUX_DOVETAIL_H +#define _LINUX_DOVETAIL_H + +#ifdef CONFIG_DOVETAIL + +#include +#include +#include +#include +#include +#include + +struct pt_regs; +struct task_struct; +struct file; +struct files_struct; + +enum inband_event_type { + INBAND_TASK_SIGNAL, + INBAND_TASK_MIGRATION, + INBAND_TASK_EXIT, + INBAND_TASK_RETUSER, + INBAND_TASK_PTSTEP, + INBAND_TASK_PTSTOP, + INBAND_TASK_PTCONT, + INBAND_PROCESS_CLEANUP, +}; + +struct dovetail_migration_data { + struct task_struct *task; + int dest_cpu; +}; + +struct dovetail_altsched_context { + struct task_struct *task; + struct mm_struct *active_mm; + bool borrowed_mm; +}; + +#define protect_inband_mm(__flags) \ + do { \ + (__flags) = hard_cond_local_irq_save(); \ + barrier(); \ + } while (0) \ + +#define unprotect_inband_mm(__flags) \ + do { \ + barrier(); \ + hard_cond_local_irq_restore(__flags); \ + } while (0) \ + +void inband_task_init(struct task_struct *p); + +int pipeline_syscall(unsigned int nr, struct pt_regs *regs); + +void __oob_trap_notify(unsigned int exception, + struct pt_regs *regs); + +static __always_inline void oob_trap_notify(unsigned int exception, + struct pt_regs *regs) +{ + if (running_oob() && !test_thread_local_flags(_TLF_OOBTRAP)) + __oob_trap_notify(exception, regs); +} + +void __oob_trap_unwind(unsigned int exception, + struct pt_regs *regs); + +static __always_inline void oob_trap_unwind(unsigned int exception, + struct pt_regs *regs) +{ + if (test_thread_local_flags(_TLF_OOBTRAP)) + __oob_trap_unwind(exception, regs); +} + +void inband_event_notify(enum inband_event_type, + void *data); + +void inband_clock_was_set(void); + +static inline void inband_signal_notify(struct task_struct *p) +{ + if (test_ti_local_flags(task_thread_info(p), _TLF_DOVETAIL)) + inband_event_notify(INBAND_TASK_SIGNAL, p); +} + +static inline void inband_migration_notify(struct task_struct *p, int cpu) +{ + if (test_ti_local_flags(task_thread_info(p), _TLF_DOVETAIL)) { + struct dovetail_migration_data d = { + .task = p, + .dest_cpu = cpu, + }; + inband_event_notify(INBAND_TASK_MIGRATION, &d); + } +} + +static inline void inband_exit_notify(void) +{ + inband_event_notify(INBAND_TASK_EXIT, NULL); +} + +static inline void inband_cleanup_notify(struct mm_struct *mm) +{ + /* + * Notify regardless of _TLF_DOVETAIL: current may have + * resources to clean up although it might not be interested + * in other kernel events. + */ + inband_event_notify(INBAND_PROCESS_CLEANUP, mm); +} + +static inline void inband_ptstop_notify(void) +{ + if (test_thread_local_flags(_TLF_DOVETAIL)) + inband_event_notify(INBAND_TASK_PTSTOP, current); +} + +static inline void inband_ptcont_notify(void) +{ + if (test_thread_local_flags(_TLF_DOVETAIL)) + inband_event_notify(INBAND_TASK_PTCONT, current); +} + +static inline void inband_ptstep_notify(struct task_struct *tracee) +{ + if (test_ti_local_flags(task_thread_info(tracee), _TLF_DOVETAIL)) + inband_event_notify(INBAND_TASK_PTSTEP, tracee); +} + +static inline +void prepare_inband_switch(struct task_struct *next) +{ + struct task_struct *prev = current; + + if (test_ti_local_flags(task_thread_info(next), _TLF_DOVETAIL)) + __this_cpu_write(irq_pipeline.rqlock_owner, prev); +} + +void inband_retuser_notify(void); + +bool inband_switch_tail(void); + +void oob_trampoline(void); + +void arch_inband_task_init(struct task_struct *p); + +int dovetail_start(void); + +void dovetail_stop(void); + +void dovetail_init_altsched(struct dovetail_altsched_context *p); + +void dovetail_start_altsched(void); + +void dovetail_stop_altsched(void); + +__must_check int dovetail_leave_inband(void); + +static inline void dovetail_leave_oob(void) +{ + clear_thread_local_flags(_TLF_OOB|_TLF_OFFSTAGE); + clear_thread_flag(TIF_MAYDAY); +} + +void dovetail_resume_inband(void); + +bool dovetail_context_switch(struct dovetail_altsched_context *out, + struct dovetail_altsched_context *in, + bool leave_inband); + +static inline +struct oob_thread_state *dovetail_current_state(void) +{ + return ¤t_thread_info()->oob_state; +} + +static inline +struct oob_thread_state *dovetail_task_state(struct task_struct *p) +{ + return &task_thread_info(p)->oob_state; +} + +static inline +struct oob_mm_state *dovetail_mm_state(void) +{ + if (current->flags & PF_KTHREAD) + return NULL; + + return ¤t->mm->oob_state; +} + +void dovetail_call_mayday(struct pt_regs *regs); + +static inline void dovetail_send_mayday(struct task_struct *castaway) +{ + struct thread_info *ti = task_thread_info(castaway); + + if (test_ti_local_flags(ti, _TLF_DOVETAIL)) + set_ti_thread_flag(ti, TIF_MAYDAY); +} + +static inline void dovetail_request_ucall(struct task_struct *task) +{ + struct thread_info *ti = task_thread_info(task); + + if (test_ti_local_flags(ti, _TLF_DOVETAIL)) + set_ti_thread_flag(ti, TIF_RETUSER); +} + +static inline void dovetail_clear_ucall(void) +{ + if (test_thread_flag(TIF_RETUSER)) + clear_thread_flag(TIF_RETUSER); +} + +void install_inband_fd(unsigned int fd, struct file *file, + struct files_struct *files); + +void uninstall_inband_fd(unsigned int fd, struct file *file, + struct files_struct *files); + +void replace_inband_fd(unsigned int fd, struct file *file, + struct files_struct *files); + +#else /* !CONFIG_DOVETAIL */ + +struct files_struct; + +#define protect_inband_mm(__flags) \ + do { (void)(__flags); } while (0) + +#define unprotect_inband_mm(__flags) \ + do { (void)(__flags); } while (0) + +static inline +void inband_task_init(struct task_struct *p) { } + +static inline void arch_dovetail_exec_prepare(void) +{ } + +/* + * Keep the trap helpers as macros, we might not be able to resolve + * trap numbers if CONFIG_DOVETAIL is off. + */ +#define oob_trap_notify(__exception, __regs) do { } while (0) +#define oob_trap_unwind(__exception, __regs) do { } while (0) + +static inline +int pipeline_syscall(unsigned int nr, struct pt_regs *regs) +{ + return 0; +} + +static inline void inband_signal_notify(struct task_struct *p) { } + +static inline +void inband_migration_notify(struct task_struct *p, int cpu) { } + +static inline void inband_exit_notify(void) { } + +static inline void inband_cleanup_notify(struct mm_struct *mm) { } + +static inline void inband_retuser_notify(void) { } + +static inline void inband_ptstop_notify(void) { } + +static inline void inband_ptcont_notify(void) { } + +static inline void inband_ptstep_notify(struct task_struct *tracee) { } + +static inline void oob_trampoline(void) { } + +static inline void prepare_inband_switch(struct task_struct *next) { } + +static inline bool inband_switch_tail(void) +{ + /* Matches converse disabling in prepare_task_switch(). */ + hard_cond_local_irq_enable(); + return false; +} + +static inline void dovetail_request_ucall(struct task_struct *task) { } + +static inline void dovetail_clear_ucall(void) { } + +static inline void inband_clock_was_set(void) { } + +static inline +void install_inband_fd(unsigned int fd, struct file *file, + struct files_struct *files) { } + +static inline +void uninstall_inband_fd(unsigned int fd, struct file *file, + struct files_struct *files) { } + +static inline +void replace_inband_fd(unsigned int fd, struct file *file, + struct files_struct *files) { } + +#endif /* !CONFIG_DOVETAIL */ + +static __always_inline bool dovetailing(void) +{ + return IS_ENABLED(CONFIG_DOVETAIL); +} + +static __always_inline bool dovetail_debug(void) +{ + return IS_ENABLED(CONFIG_DEBUG_DOVETAIL); +} + +#endif /* _LINUX_DOVETAIL_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/dw_apb_timer.h linux-dovetail-v5.15.y-dovetail/include/linux/dw_apb_timer.h --- linux-5.15.26/include/linux/dw_apb_timer.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/dw_apb_timer.h 2022-03-10 09:47:50.000000000 +0100 @@ -30,7 +30,7 @@ struct dw_apb_clock_event_device { struct dw_apb_clocksource { struct dw_apb_timer timer; - struct clocksource cs; + struct clocksource_user_mmio ummio; }; void dw_apb_clockevent_register(struct dw_apb_clock_event_device *dw_ced); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/entry-common.h linux-dovetail-v5.15.y-dovetail/include/linux/entry-common.h --- linux-5.15.26/include/linux/entry-common.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/entry-common.h 2022-03-10 09:47:50.000000000 +0100 @@ -62,6 +62,14 @@ _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ ARCH_EXIT_TO_USER_MODE_WORK) +/* + * Status codes of syscall entry when Dovetail is enabled. Must not + * conflict with valid syscall numbers. And with -1 which seccomp uses + * to skip an syscall. + */ +#define EXIT_SYSCALL_OOB (-2) +#define EXIT_SYSCALL_TAIL (-3) + /** * arch_check_user_regs - Architecture specific sanity check for user mode regs * @regs: Pointer to currents pt_regs @@ -193,7 +201,7 @@ static inline void local_irq_enable_exit #ifndef local_irq_enable_exit_to_user static inline void local_irq_enable_exit_to_user(unsigned long ti_work) { - local_irq_enable(); + local_irq_enable_full(); } #endif @@ -208,7 +216,7 @@ static inline void local_irq_disable_exi #ifndef local_irq_disable_exit_to_user static inline void local_irq_disable_exit_to_user(void) { - local_irq_disable(); + local_irq_disable_full(); } #endif @@ -392,6 +400,12 @@ void irqentry_enter_from_user_mode(struc */ void irqentry_exit_to_user_mode(struct pt_regs *regs); +enum irqentry_info { + IRQENTRY_INBAND_UNSTALLED = 0, + IRQENTRY_INBAND_STALLED, + IRQENTRY_OOB, +}; + #ifndef irqentry_state /** * struct irqentry_state - Opaque object for exception state storage @@ -399,6 +413,7 @@ void irqentry_exit_to_user_mode(struct p * exit path has to invoke rcu_irq_exit(). * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that * lockdep state is restored correctly on exit from nmi. + * @stage_info: Information about pipeline state and current stage on IRQ entry. * * This opaque object is filled in by the irqentry_*_enter() functions and * must be passed back into the corresponding irqentry_*_exit() functions @@ -413,6 +428,9 @@ typedef struct irqentry_state { bool exit_rcu; bool lockdep; }; +#ifdef CONFIG_IRQ_PIPELINE + enum irqentry_info stage_info; +#endif } irqentry_state_t; #endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/fcntl.h linux-dovetail-v5.15.y-dovetail/include/linux/fcntl.h --- linux-5.15.26/include/linux/fcntl.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/fcntl.h 2022-03-10 09:47:50.000000000 +0100 @@ -10,7 +10,7 @@ (O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | \ O_APPEND | O_NDELAY | O_NONBLOCK | __O_SYNC | O_DSYNC | \ FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \ - O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE) + O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE | O_OOB) /* List of all valid flags for the how->resolve argument: */ #define VALID_RESOLVE_FLAGS \ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/fs.h linux-dovetail-v5.15.y-dovetail/include/linux/fs.h --- linux-5.15.26/include/linux/fs.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/fs.h 2022-03-10 09:47:50.000000000 +0100 @@ -56,6 +56,7 @@ struct kiocb; struct kobject; struct pipe_inode_info; struct poll_table_struct; +struct oob_poll_wait; struct kstatfs; struct vm_area_struct; struct vfsmount; @@ -992,6 +993,7 @@ struct file { #endif /* needed for tty driver, and maybe others */ void *private_data; + void *oob_data; #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ @@ -2001,8 +2003,11 @@ extern long vfs_ioctl(struct file *file, #ifdef CONFIG_COMPAT extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +extern long compat_ptr_oob_ioctl(struct file *file, unsigned int cmd, + unsigned long arg); #else #define compat_ptr_ioctl NULL +#define compat_ptr_oob_ioctl NULL #endif /* @@ -2081,6 +2086,11 @@ struct file_operations { __poll_t (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); + ssize_t (*oob_read) (struct file *, char __user *, size_t); + ssize_t (*oob_write) (struct file *, const char __user *, size_t); + long (*oob_ioctl) (struct file *, unsigned int, unsigned long); + long (*compat_oob_ioctl) (struct file *, unsigned int, unsigned long); + __poll_t (*oob_poll) (struct file *, struct oob_poll_wait *); int (*mmap) (struct file *, struct vm_area_struct *); unsigned long mmap_supported_flags; int (*open) (struct inode *, struct file *); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/hardirq.h linux-dovetail-v5.15.y-dovetail/include/linux/hardirq.h --- linux-5.15.26/include/linux/hardirq.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/hardirq.h 2022-03-10 09:47:50.000000000 +0100 @@ -8,6 +8,7 @@ #include #include #include +#include #include extern void synchronize_irq(unsigned int irq); @@ -122,6 +123,7 @@ extern void rcu_nmi_exit(void); #define nmi_enter() \ do { \ + irq_pipeline_nmi_enter(); \ __nmi_enter(); \ lockdep_hardirq_enter(); \ rcu_nmi_enter(); \ @@ -146,6 +148,22 @@ extern void rcu_nmi_exit(void); rcu_nmi_exit(); \ lockdep_hardirq_exit(); \ __nmi_exit(); \ + irq_pipeline_nmi_exit(); \ } while (0) +static inline bool start_irq_flow(void) +{ + return !irqs_pipelined() || in_pipeline(); +} + +static inline bool on_pipeline_entry(void) +{ + return irqs_pipelined() && in_pipeline(); +} + +static inline bool in_hard_irq(void) +{ + return irqs_pipelined() ? in_pipeline() : in_irq(); +} + #endif /* LINUX_HARDIRQ_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/intel-iommu.h linux-dovetail-v5.15.y-dovetail/include/linux/intel-iommu.h --- linux-5.15.26/include/linux/intel-iommu.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/intel-iommu.h 2022-03-10 09:47:50.000000000 +0100 @@ -583,7 +583,7 @@ struct intel_iommu { u64 ecap; u64 vccap; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ - raw_spinlock_t register_lock; /* protect register handling */ + hard_spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ int agaw; /* agaw of this iommu */ int msagaw; /* max sagaw of this iommu */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/interrupt.h linux-dovetail-v5.15.y-dovetail/include/linux/interrupt.h --- linux-5.15.26/include/linux/interrupt.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/interrupt.h 2022-03-10 09:47:50.000000000 +0100 @@ -67,6 +67,12 @@ * later. * IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers, * depends on IRQF_PERCPU. + * IRQF_OOB - Interrupt is attached to an out-of-band handler living + * on the heading stage of the interrupt pipeline + * (CONFIG_IRQ_PIPELINE). It may be delivered to the + * handler any time interrupts are enabled in the CPU, + * regardless of the (virtualized) interrupt state + * maintained by local_irq_save/disable(). */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -82,6 +88,7 @@ #define IRQF_COND_SUSPEND 0x00040000 #define IRQF_NO_AUTOEN 0x00080000 #define IRQF_NO_DEBUG 0x00100000 +#define IRQF_OOB 0x00200000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) @@ -501,9 +508,29 @@ DECLARE_STATIC_KEY_FALSE(force_irqthread * to ensure that after a local_irq_disable(), interrupts have * really been disabled in hardware. Such architectures need to * implement the following hook. + * + * Those cases also apply when interrupt pipelining is in effect, + * since we are virtualizing the interrupt disable state here too. */ #ifndef hard_irq_disable -#define hard_irq_disable() do { } while(0) +#define hard_irq_disable() hard_cond_local_irq_disable() +#endif + +/* + * Unlike other virtualized interrupt disabling schemes may assume, we + * can't expect local_irq_restore() to turn hard interrupts on when + * pipelining. hard_irq_enable() is introduced to be paired with + * hard_irq_disable(), for unconditionally turning them on. The only + * sane sequence mixing virtual and real disable state manipulation + * is: + * + * 1. local_irq_save/disable + * 2. hard_irq_disable + * 3. hard_irq_enable + * 4. local_irq_restore/enable + */ +#ifndef hard_irq_enable +#define hard_irq_enable() hard_cond_local_irq_enable() #endif /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/irqdesc.h linux-dovetail-v5.15.y-dovetail/include/linux/irqdesc.h --- linux-5.15.26/include/linux/irqdesc.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/irqdesc.h 2022-03-10 09:47:50.000000000 +0100 @@ -68,7 +68,7 @@ struct irq_desc { unsigned int irqs_unhandled; atomic_t threads_handled; int threads_handled_last; - raw_spinlock_t lock; + hybrid_spinlock_t lock; struct cpumask *percpu_enabled; const struct cpumask *percpu_affinity; #ifdef CONFIG_SMP @@ -242,6 +242,11 @@ static inline bool irq_is_percpu_devid(u return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID); } +static inline int irq_is_oob(unsigned int irq) +{ + return irq_check_status_bit(irq, IRQ_OOB); +} + void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, struct lock_class_key *request_class); static inline void diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/irqflags.h linux-dovetail-v5.15.y-dovetail/include/linux/irqflags.h --- linux-5.15.26/include/linux/irqflags.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/irqflags.h 2022-03-10 09:47:50.000000000 +0100 @@ -13,6 +13,7 @@ #define _LINUX_TRACE_IRQFLAGS_H #include +#include #include #include @@ -52,7 +53,9 @@ DECLARE_PER_CPU(int, hardirq_context); extern void trace_hardirqs_on_prepare(void); extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); +extern void trace_hardirqs_on_pipelined(void); extern void trace_hardirqs_off(void); +extern void trace_hardirqs_off_pipelined(void); # define lockdep_hardirq_context() (raw_cpu_read(hardirq_context)) # define lockdep_softirq_context(p) ((p)->softirq_context) @@ -122,7 +125,9 @@ do { \ # define trace_hardirqs_on_prepare() do { } while (0) # define trace_hardirqs_off_finish() do { } while (0) # define trace_hardirqs_on() do { } while (0) +# define trace_hardirqs_on_pipelined() do { } while (0) # define trace_hardirqs_off() do { } while (0) +# define trace_hardirqs_off_pipelined() do { } while (0) # define lockdep_hardirq_context() 0 # define lockdep_softirq_context(p) 0 # define lockdep_hardirqs_enabled() 0 @@ -240,6 +245,38 @@ extern void warn_bogus_irq_restore(void) #endif /* CONFIG_TRACE_IRQFLAGS */ +#ifdef CONFIG_IRQ_PIPELINE +#define local_irq_enable_full() \ + do { \ + hard_local_irq_enable(); \ + local_irq_enable(); \ + } while (0) + +#define local_irq_disable_full() \ + do { \ + hard_local_irq_disable(); \ + local_irq_disable(); \ + } while (0) + +#define local_irq_save_full(__flags) \ + do { \ + hard_local_irq_disable(); \ + local_irq_save(__flags); \ + } while (0) + +#define local_irq_restore_full(__flags) \ + do { \ + if (!irqs_disabled_flags(__flags)) \ + hard_local_irq_enable(); \ + local_irq_restore(__flags); \ + } while (0) +#else +#define local_irq_enable_full() local_irq_enable() +#define local_irq_disable_full() local_irq_disable() +#define local_irq_save_full(__flags) local_irq_save(__flags) +#define local_irq_restore_full(__flags) local_irq_restore(__flags) +#endif + #define local_save_flags(flags) raw_local_save_flags(flags) /* diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/irq.h linux-dovetail-v5.15.y-dovetail/include/linux/irq.h --- linux-5.15.26/include/linux/irq.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/irq.h 2022-03-10 09:47:50.000000000 +0100 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,11 @@ enum irqchip_irq_state; * IRQ_DISABLE_UNLAZY - Disable lazy irq disable * IRQ_HIDDEN - Don't show up in /proc/interrupts * IRQ_NO_DEBUG - Exclude from note_interrupt() debugging + * IRQ_OOB - Interrupt can be delivered to the out-of-band handler + * when pipelining is enabled (CONFIG_IRQ_PIPELINE), + * regardless of the (virtualized) interrupt state + * maintained by local_irq_save/disable(). + * IRQ_CHAINED - Interrupt is chained. */ enum { IRQ_TYPE_NONE = 0x00000000, @@ -101,13 +107,15 @@ enum { IRQ_DISABLE_UNLAZY = (1 << 19), IRQ_HIDDEN = (1 << 20), IRQ_NO_DEBUG = (1 << 21), + IRQ_OOB = (1 << 22), + IRQ_CHAINED = (1 << 23), }; #define IRQF_MODIFY_MASK \ (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_HIDDEN) + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_HIDDEN | IRQ_OOB) #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) @@ -173,6 +181,7 @@ struct irq_common_data { * irq_domain * @chip_data: platform-specific per-chip private data for the chip * methods, to allow shared chip implementations + * @move_work: irq_work for setaffinity deferral when pipelining irqs */ struct irq_data { u32 mask; @@ -184,6 +193,9 @@ struct irq_data { #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_data *parent_data; #endif +#if defined(CONFIG_IRQ_PIPELINE) && defined(CONFIG_GENERIC_PENDING_IRQ) + struct irq_work move_work; +#endif void *chip_data; }; @@ -221,6 +233,7 @@ struct irq_data { * irq_chip::irq_set_affinity() when deactivated. * IRQD_IRQ_ENABLED_ON_SUSPEND - Interrupt is enabled on suspend by irq pm if * irqchip have flag IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND set. + * IRQD_SETAFFINITY_BLOCKED - Pending affinity setting on hold (IRQ_PIPELINE) */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -247,6 +260,7 @@ enum { IRQD_HANDLE_ENFORCE_IRQCTX = (1 << 28), IRQD_AFFINITY_ON_ACTIVATE = (1 << 29), IRQD_IRQ_ENABLED_ON_SUSPEND = (1 << 30), + IRQD_SETAFFINITY_BLOCKED = (1 << 31), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -256,6 +270,21 @@ static inline bool irqd_is_setaffinity_p return __irqd_to_state(d) & IRQD_SETAFFINITY_PENDING; } +static inline void irqd_set_move_blocked(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_SETAFFINITY_BLOCKED; +} + +static inline void irqd_clr_move_blocked(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_SETAFFINITY_BLOCKED; +} + +static inline bool irqd_is_setaffinity_blocked(struct irq_data *d) +{ + return irqs_pipelined() && __irqd_to_state(d) & IRQD_SETAFFINITY_BLOCKED; +} + static inline bool irqd_is_per_cpu(struct irq_data *d) { return __irqd_to_state(d) & IRQD_PER_CPU; @@ -570,6 +599,7 @@ struct irq_chip { * IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND: Invokes __enable_irq()/__disable_irq() for wake irqs * in the suspend path if they are in disabled state * IRQCHIP_AFFINITY_PRE_STARTUP: Default affinity update before startup + * IRQCHIP_PIPELINE_SAFE: Chip can work in pipelined mode */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), @@ -583,6 +613,7 @@ enum { IRQCHIP_SUPPORTS_NMI = (1 << 8), IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND = (1 << 9), IRQCHIP_AFFINITY_PRE_STARTUP = (1 << 10), + IRQCHIP_PIPELINE_SAFE = (1 << 11), }; #include @@ -659,6 +690,7 @@ extern void handle_percpu_irq(struct irq extern void handle_percpu_devid_irq(struct irq_desc *desc); extern void handle_bad_irq(struct irq_desc *desc); extern void handle_nested_irq(unsigned int irq); +extern void handle_synthetic_irq(struct irq_desc *desc); extern void handle_fasteoi_nmi(struct irq_desc *desc); extern void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc); @@ -1044,7 +1076,7 @@ struct irq_chip_type { * different flow mechanisms (level/edge) for it. */ struct irq_chip_generic { - raw_spinlock_t lock; + hard_spinlock_t lock; void __iomem *reg_base; u32 (*reg_readl)(void __iomem *addr); void (*reg_writel)(u32 val, void __iomem *addr); @@ -1171,6 +1203,12 @@ static inline struct irq_chip_type *irq_ #define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX) +#ifdef CONFIG_IRQ_PIPELINE + +int irq_switch_oob(unsigned int irq, bool on); + +#endif /* !CONFIG_IRQ_PIPELINE */ + #ifdef CONFIG_SMP static inline void irq_gc_lock(struct irq_chip_generic *gc) { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/irq_pipeline.h linux-dovetail-v5.15.y-dovetail/include/linux/irq_pipeline.h --- linux-5.15.26/include/linux/irq_pipeline.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/irq_pipeline.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,132 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2002 Philippe Gerum . + * 2006 Gilles Chanteperdrix. + * 2007 Jan Kiszka. + */ +#ifndef _LINUX_IRQ_PIPELINE_H +#define _LINUX_IRQ_PIPELINE_H + +struct cpuidle_device; +struct cpuidle_state; +struct irq_desc; + +#ifdef CONFIG_IRQ_PIPELINE + +#include +#include +#include +#include +#include +#include +#include + +void irq_pipeline_init_early(void); + +void irq_pipeline_init(void); + +void arch_irq_pipeline_init(void); + +void generic_pipeline_irq_desc(struct irq_desc *desc, + struct pt_regs *regs); + +int irq_inject_pipeline(unsigned int irq); + +void synchronize_pipeline(void); + +static __always_inline void synchronize_pipeline_on_irq(void) +{ + /* + * Optimize if we preempted the high priority oob stage: we + * don't need to synchronize the pipeline unless there is a + * pending interrupt for it. + */ + if (running_inband() || + stage_irqs_pending(this_oob_staged())) + synchronize_pipeline(); +} + +bool handle_oob_irq(struct irq_desc *desc); + +void arch_do_IRQ_pipelined(struct irq_desc *desc); + +#ifdef CONFIG_SMP +void irq_send_oob_ipi(unsigned int ipi, + const struct cpumask *cpumask); +#endif /* CONFIG_SMP */ + +void irq_pipeline_oops(void); + +bool irq_cpuidle_enter(struct cpuidle_device *dev, + struct cpuidle_state *state); + +int run_oob_call(int (*fn)(void *arg), void *arg); + +static inline bool inband_irq_pending(void) +{ + check_hard_irqs_disabled(); + + return stage_irqs_pending(this_inband_staged()); +} + +struct irq_stage_data * +handle_irq_pipelined_prepare(struct pt_regs *regs); + +int handle_irq_pipelined_finish(struct irq_stage_data *prevd, + struct pt_regs *regs); + +int handle_irq_pipelined(struct pt_regs *regs); + +void sync_inband_irqs(void); + +extern struct irq_domain *synthetic_irq_domain; + +#else /* !CONFIG_IRQ_PIPELINE */ + +#include +#include + +static inline +void irq_pipeline_init_early(void) { } + +static inline +void irq_pipeline_init(void) { } + +static inline +void irq_pipeline_oops(void) { } + +static inline int +generic_pipeline_irq_desc(struct irq_desc *desc, + struct pt_regs *regs) +{ + return 0; +} + +static inline bool handle_oob_irq(struct irq_desc *desc) +{ + return false; +} + +static inline bool irq_cpuidle_enter(struct cpuidle_device *dev, + struct cpuidle_state *state) +{ + return true; +} + +static inline bool inband_irq_pending(void) +{ + return false; +} + +static inline void sync_inband_irqs(void) { } + +#endif /* !CONFIG_IRQ_PIPELINE */ + +#if !defined(CONFIG_IRQ_PIPELINE) || !defined(CONFIG_SPARSE_IRQ) +static inline void uncache_irq_desc(unsigned int irq) { } +#else +void uncache_irq_desc(unsigned int irq); +#endif + +#endif /* _LINUX_IRQ_PIPELINE_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/irqstage.h linux-dovetail-v5.15.y-dovetail/include/linux/irqstage.h --- linux-5.15.26/include/linux/irqstage.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/irqstage.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,398 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2016, 2019 Philippe Gerum . + */ +#ifndef _LINUX_IRQSTAGE_H +#define _LINUX_IRQSTAGE_H + +#ifdef CONFIG_IRQ_PIPELINE + +#include +#include +#include +#include +#include + +struct kvm_oob_notifier; + +struct irq_stage { + int index; + const char *name; +}; + +extern struct irq_stage inband_stage; + +extern struct irq_stage oob_stage; + +struct irq_event_map; + +struct irq_log { + unsigned long index_0; + struct irq_event_map *map; +}; + +/* Per-CPU, per-stage data. */ +struct irq_stage_data { + struct irq_log log; + struct irq_stage *stage; +#ifdef CONFIG_DEBUG_IRQ_PIPELINE + int cpu; +#endif +}; + +/* Per-CPU pipeline descriptor. */ +struct irq_pipeline_data { + struct irq_stage_data stages[2]; + struct pt_regs tick_regs; +#ifdef CONFIG_DOVETAIL + struct task_struct *task_inflight; + struct task_struct *rqlock_owner; +#ifdef CONFIG_KVM + struct kvm_oob_notifier *vcpu_notify; +#endif +#endif +}; + +DECLARE_PER_CPU(struct irq_pipeline_data, irq_pipeline); + +/* + * The low-level stall bit accessors. Should be used by the Dovetail + * core implementation exclusively, inband_irq_*() and oob_irq_*() + * accessors are available to common code. + */ + +#define INBAND_STALL_BIT 0 +#define OOB_STALL_BIT 1 + +static __always_inline void init_task_stall_bits(struct task_struct *p) +{ + __set_bit(INBAND_STALL_BIT, &p->stall_bits); + __clear_bit(OOB_STALL_BIT, &p->stall_bits); +} + +static __always_inline void stall_inband_nocheck(void) +{ + __set_bit(INBAND_STALL_BIT, ¤t->stall_bits); + barrier(); +} + +static __always_inline void stall_inband(void) +{ + WARN_ON_ONCE(irq_pipeline_debug() && running_oob()); + stall_inband_nocheck(); +} + +static __always_inline void unstall_inband_nocheck(void) +{ + barrier(); + __clear_bit(INBAND_STALL_BIT, ¤t->stall_bits); +} + +static __always_inline void unstall_inband(void) +{ + WARN_ON_ONCE(irq_pipeline_debug() && running_oob()); + unstall_inband_nocheck(); +} + +static __always_inline int test_and_stall_inband_nocheck(void) +{ + return __test_and_set_bit(INBAND_STALL_BIT, ¤t->stall_bits); +} + +static __always_inline int test_and_stall_inband(void) +{ + WARN_ON_ONCE(irq_pipeline_debug() && running_oob()); + return test_and_stall_inband_nocheck(); +} + +static __always_inline int test_inband_stall(void) +{ + return test_bit(INBAND_STALL_BIT, ¤t->stall_bits); +} + +static __always_inline void stall_oob(void) +{ + __set_bit(OOB_STALL_BIT, ¤t->stall_bits); + barrier(); +} + +static __always_inline void unstall_oob(void) +{ + barrier(); + __clear_bit(OOB_STALL_BIT, ¤t->stall_bits); +} + +static __always_inline int test_and_stall_oob(void) +{ + return __test_and_set_bit(OOB_STALL_BIT, ¤t->stall_bits); +} + +static __always_inline int test_oob_stall(void) +{ + return test_bit(OOB_STALL_BIT, ¤t->stall_bits); +} + +/** + * this_staged - IRQ stage data on the current CPU + * + * Return the address of @stage's data on the current CPU. IRQs must + * be hard disabled to prevent CPU migration. + */ +static __always_inline +struct irq_stage_data *this_staged(struct irq_stage *stage) +{ + return &raw_cpu_ptr(irq_pipeline.stages)[stage->index]; +} + +/** + * percpu_inband_staged - IRQ stage data on specified CPU + * + * Return the address of @stage's data on @cpu. + * + * This is the slowest accessor, use it carefully. Prefer + * this_staged() for requests referring to the current + * CPU. Additionally, if the target stage is known at build time, + * consider using this_{inband, oob}_staged() instead. + */ +static __always_inline +struct irq_stage_data *percpu_inband_staged(struct irq_stage *stage, int cpu) +{ + return &per_cpu(irq_pipeline.stages, cpu)[stage->index]; +} + +/** + * this_inband_staged - return the address of the pipeline context + * data for the inband stage on the current CPU. CPU migration must be + * disabled. + * + * This accessor is recommended when the stage we refer to is known at + * build time to be the inband one. + */ +static __always_inline struct irq_stage_data *this_inband_staged(void) +{ + return raw_cpu_ptr(&irq_pipeline.stages[0]); +} + +/** + * this_oob_staged - return the address of the pipeline context data + * for the registered oob stage on the current CPU. CPU migration must + * be disabled. + * + * This accessor is recommended when the stage we refer to is known at + * build time to be the registered oob stage. This address is always + * different from the context data of the inband stage, even in + * absence of registered oob stage. + */ +static __always_inline struct irq_stage_data *this_oob_staged(void) +{ + return raw_cpu_ptr(&irq_pipeline.stages[1]); +} + +static __always_inline struct irq_stage_data *__current_irq_staged(void) +{ + return &raw_cpu_ptr(irq_pipeline.stages)[stage_level()]; +} + +/** + * current_irq_staged - return the address of the pipeline context + * data for the current stage. CPU migration must be disabled. + */ +#define current_irq_staged __current_irq_staged() + +static __always_inline +void check_staged_locality(struct irq_stage_data *pd) +{ +#ifdef CONFIG_DEBUG_IRQ_PIPELINE + /* + * Setting our context with another processor's is a really + * bad idea, our caller definitely went loopy. + */ + WARN_ON_ONCE(raw_smp_processor_id() != pd->cpu); +#endif +} + +/** + * switch_oob(), switch_inband() - switch the current CPU to the + * specified stage context. CPU migration must be disabled. + * + * Calling these routines is the only sane and safe way to change the + * interrupt stage for the current CPU. Don't bypass them, ever. + * Really. + */ +static __always_inline +void switch_oob(struct irq_stage_data *pd) +{ + check_staged_locality(pd); + if (!(preempt_count() & STAGE_MASK)) + preempt_count_add(STAGE_OFFSET); +} + +static __always_inline +void switch_inband(struct irq_stage_data *pd) +{ + check_staged_locality(pd); + if (preempt_count() & STAGE_MASK) + preempt_count_sub(STAGE_OFFSET); +} + +static __always_inline +void set_current_irq_staged(struct irq_stage_data *pd) +{ + if (pd->stage == &inband_stage) + switch_inband(pd); + else + switch_oob(pd); +} + +static __always_inline struct irq_stage *__current_irq_stage(void) +{ + /* + * We don't have to hard disable irqs while accessing the + * per-CPU stage data here, because there is no way we could + * switch stage and CPU at the same time. + */ + return __current_irq_staged()->stage; +} + +#define current_irq_stage __current_irq_stage() + +static __always_inline bool oob_stage_present(void) +{ + return oob_stage.index != 0; +} + +/** + * stage_irqs_pending() - Whether we have interrupts pending + * (i.e. logged) on the current CPU for the given stage. Hard IRQs + * must be disabled. + */ +static __always_inline int stage_irqs_pending(struct irq_stage_data *pd) +{ + return pd->log.index_0 != 0; +} + +void sync_current_irq_stage(void); + +void sync_irq_stage(struct irq_stage *top); + +void irq_post_stage(struct irq_stage *stage, + unsigned int irq); + +static __always_inline void irq_post_oob(unsigned int irq) +{ + irq_post_stage(&oob_stage, irq); +} + +static __always_inline void irq_post_inband(unsigned int irq) +{ + irq_post_stage(&inband_stage, irq); +} + +static __always_inline void oob_irq_disable(void) +{ + hard_local_irq_disable(); + stall_oob(); +} + +static __always_inline unsigned long oob_irq_save(void) +{ + hard_local_irq_disable(); + return test_and_stall_oob(); +} + +static __always_inline int oob_irqs_disabled(void) +{ + return test_oob_stall(); +} + +void oob_irq_enable(void); + +void __oob_irq_restore(unsigned long x); + +static __always_inline void oob_irq_restore(unsigned long x) +{ + if ((x ^ test_oob_stall()) & 1) + __oob_irq_restore(x); +} + +bool stage_disabled(void); + +unsigned long test_and_lock_stage(int *irqsoff); + +void unlock_stage(unsigned long irqstate); + +#define stage_save_flags(__irqstate) \ + do { \ + unsigned long __flags = hard_local_save_flags(); \ + (__irqstate) = irqs_merge_flags(__flags, \ + irqs_disabled()); \ + } while (0) + +int enable_oob_stage(const char *name); + +int arch_enable_oob_stage(void); + +void disable_oob_stage(void); + +#else /* !CONFIG_IRQ_PIPELINE */ + +#include + +void call_is_nop_without_pipelining(void); + +static __always_inline void stall_inband(void) { } + +static __always_inline void unstall_inband(void) { } + +static __always_inline int test_and_stall_inband(void) +{ + return false; +} + +static __always_inline int test_inband_stall(void) +{ + return false; +} + +static __always_inline bool oob_stage_present(void) +{ + return false; +} + +static __always_inline bool stage_disabled(void) +{ + return irqs_disabled(); +} + +static __always_inline void irq_post_inband(unsigned int irq) +{ + call_is_nop_without_pipelining(); +} + +#define test_and_lock_stage(__irqsoff) \ + ({ \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ + *(__irqsoff) = irqs_disabled_flags(__flags); \ + __flags; \ + }) + +#define unlock_stage(__flags) raw_local_irq_restore(__flags) + +#define stage_save_flags(__flags) raw_local_save_flags(__flags) + +static __always_inline void stall_inband_nocheck(void) +{ } + +static __always_inline void unstall_inband_nocheck(void) +{ } + +static __always_inline int test_and_stall_inband_nocheck(void) +{ + return irqs_disabled(); +} + +#endif /* !CONFIG_IRQ_PIPELINE */ + +#endif /* !_LINUX_IRQSTAGE_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/kernel.h linux-dovetail-v5.15.y-dovetail/include/linux/kernel.h --- linux-5.15.26/include/linux/kernel.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/kernel.h 2022-03-10 09:47:50.000000000 +0100 @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -106,7 +107,7 @@ static __always_inline void might_resche #else -# define might_resched() do { } while (0) +# define might_resched() check_inband_stage() #endif /* CONFIG_PREEMPT_* */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/kvm_host.h linux-dovetail-v5.15.y-dovetail/include/linux/kvm_host.h --- linux-5.15.26/include/linux/kvm_host.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/kvm_host.h 2022-03-10 09:47:50.000000000 +0100 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -294,11 +295,24 @@ struct kvm_mmio_fragment { unsigned len; }; +/* + * Called when the host is about to leave the inband stage. Typically + * used for switching the current vcpu out of guest mode before a + * companion core reinstates an oob task context. + */ +struct kvm_oob_notifier { + void (*handler)(struct kvm_oob_notifier *nfy); + bool put_vcpu; +}; + struct kvm_vcpu { struct kvm *kvm; #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier preempt_notifier; #endif +#ifdef CONFIG_DOVETAIL + struct kvm_oob_notifier oob_notifier; +#endif int cpu; int vcpu_id; /* id given by userspace at creation */ int vcpu_idx; /* index in kvm->vcpus array */ @@ -1921,6 +1935,47 @@ static inline int kvm_arch_vcpu_run_pid_ } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ +#if defined(CONFIG_DOVETAIL) && defined(CONFIG_KVM) +static inline void inband_init_vcpu(struct kvm_vcpu *vcpu, + void (*preempt_handler)(struct kvm_oob_notifier *nfy)) +{ + vcpu->oob_notifier.handler = preempt_handler; + vcpu->oob_notifier.put_vcpu = false; +} + +static inline void inband_enter_guest(struct kvm_vcpu *vcpu) +{ + struct irq_pipeline_data *p = raw_cpu_ptr(&irq_pipeline); + WRITE_ONCE(p->vcpu_notify, &vcpu->oob_notifier); +} + +static inline void inband_exit_guest(void) +{ + struct irq_pipeline_data *p = raw_cpu_ptr(&irq_pipeline); + WRITE_ONCE(p->vcpu_notify, NULL); +} + +static inline void inband_set_vcpu_release_state(struct kvm_vcpu *vcpu, + bool pending) +{ + vcpu->oob_notifier.put_vcpu = pending; +} +#else +static inline void inband_init_vcpu(struct kvm_vcpu *vcpu, + void (*preempt_handler)(struct kvm_oob_notifier *nfy)) +{ } + +static inline void inband_enter_guest(struct kvm_vcpu *vcpu) +{ } + +static inline void inband_exit_guest(void) +{ } + +static inline void inband_set_vcpu_release_state(struct kvm_vcpu *vcpu, + bool pending) +{ } +#endif + typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data); int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/lockdep.h linux-dovetail-v5.15.y-dovetail/include/linux/lockdep.h --- linux-5.15.26/include/linux/lockdep.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/lockdep.h 2022-03-10 09:47:50.000000000 +0100 @@ -215,22 +215,22 @@ static inline void lockdep_init_map(stru * or they are too narrow (they suffer from a false class-split): */ #define lockdep_set_class(lock, key) \ - lockdep_init_map_waits(&(lock)->dep_map, #key, key, 0, \ + lockdep_init_map_waits(LOCKDEP_ALT_DEPMAP(lock), #key, key, 0, \ (lock)->dep_map.wait_type_inner, \ (lock)->dep_map.wait_type_outer) #define lockdep_set_class_and_name(lock, key, name) \ - lockdep_init_map_waits(&(lock)->dep_map, name, key, 0, \ + lockdep_init_map_waits(LOCKDEP_ALT_DEPMAP(lock), name, key, 0, \ (lock)->dep_map.wait_type_inner, \ (lock)->dep_map.wait_type_outer) #define lockdep_set_class_and_subclass(lock, key, sub) \ - lockdep_init_map_waits(&(lock)->dep_map, #key, key, sub,\ + lockdep_init_map_waits(LOCKDEP_ALT_DEPMAP(lock), #key, key, sub,\ (lock)->dep_map.wait_type_inner, \ (lock)->dep_map.wait_type_outer) #define lockdep_set_subclass(lock, sub) \ - lockdep_init_map_waits(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ + lockdep_init_map_waits(LOCKDEP_ALT_DEPMAP(lock), #lock, (lock)->dep_map.key, sub,\ (lock)->dep_map.wait_type_inner, \ (lock)->dep_map.wait_type_outer) @@ -240,7 +240,8 @@ static inline void lockdep_init_map(stru /* * Compare locking classes */ -#define lockdep_match_class(lock, key) lockdep_match_key(&(lock)->dep_map, key) +#define lockdep_match_class(lock, key) \ + lockdep_match_key(LOCKDEP_ALT_DEPMAP(lock), key) static inline int lockdep_match_key(struct lockdep_map *lock, struct lock_class_key *key) @@ -283,8 +284,8 @@ static inline int lock_is_held(const str return lock_is_held_type(lock, -1); } -#define lockdep_is_held(lock) lock_is_held(&(lock)->dep_map) -#define lockdep_is_held_type(lock, r) lock_is_held_type(&(lock)->dep_map, (r)) +#define lockdep_is_held(lock) lock_is_held(LOCKDEP_ALT_DEPMAP(lock)) +#define lockdep_is_held_type(lock, r) lock_is_held_type(LOCKDEP_ALT_DEPMAP(lock), (r)) extern void lock_set_class(struct lockdep_map *lock, const char *name, struct lock_class_key *key, unsigned int subclass, @@ -313,28 +314,34 @@ extern void lock_unpin_lock(struct lockd do { WARN_ON_ONCE(debug_locks && !(cond)); } while (0) #define lockdep_assert_held(l) \ - lockdep_assert(lockdep_is_held(l) != LOCK_STATE_NOT_HELD) + lockdep_assert(LOCKDEP_HARD_DEBUG_RET(l, 1, \ + lockdep_is_held(l) != LOCK_STATE_NOT_HELD)) #define lockdep_assert_not_held(l) \ - lockdep_assert(lockdep_is_held(l) != LOCK_STATE_HELD) + lockdep_assert(LOCKDEP_HARD_DEBUG_RET(l, 1, \ + lockdep_is_held(l) != LOCK_STATE_HELD)) #define lockdep_assert_held_write(l) \ - lockdep_assert(lockdep_is_held_type(l, 0)) + lockdep_assert(LOCKDEP_HARD_DEBUG_RET(l, 1, \ + lockdep_is_held_type(l, 0))) #define lockdep_assert_held_read(l) \ - lockdep_assert(lockdep_is_held_type(l, 1)) + lockdep_assert(LOCKDEP_HARD_DEBUG_RET(l, 1, \ + lockdep_is_held_type(l, 1))) #define lockdep_assert_held_once(l) \ - lockdep_assert_once(lockdep_is_held(l) != LOCK_STATE_NOT_HELD) + lockdep_assert_once(LOCKDEP_HARD_DEBUG_RET(l, 1, \ + lockdep_is_held(l) != LOCK_STATE_NOT_HELD)) #define lockdep_assert_none_held_once() \ lockdep_assert_once(!current->lockdep_depth) #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) -#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map) -#define lockdep_repin_lock(l,c) lock_repin_lock(&(l)->dep_map, (c)) -#define lockdep_unpin_lock(l,c) lock_unpin_lock(&(l)->dep_map, (c)) +#define lockdep_pin_lock(l) LOCKDEP_HARD_DEBUG_RET(l, ({ struct pin_cookie cookie; cookie;} ), \ + lock_pin_lock(LOCKDEP_ALT_DEPMAP(l))) +#define lockdep_repin_lock(l,c) LOCKDEP_HARD_DEBUG(l,, lock_repin_lock(LOCKDEP_ALT_DEPMAP(l), (c))) +#define lockdep_unpin_lock(l,c) LOCKDEP_HARD_DEBUG(l,, lock_unpin_lock(LOCKDEP_ALT_DEPMAP(l), (c))) #else /* !CONFIG_LOCKDEP */ @@ -598,12 +605,23 @@ DECLARE_PER_CPU(unsigned int, lockdep_re #define lockdep_assert_irqs_enabled() \ do { \ - WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirqs_enabled)); \ + WARN_ON_ONCE(__lockdep_enabled && \ + ((running_oob() && hard_irqs_disabled()) || \ + (running_inband() && !this_cpu_read(hardirqs_enabled)))); \ } while (0) #define lockdep_assert_irqs_disabled() \ do { \ - WARN_ON_ONCE(__lockdep_enabled && this_cpu_read(hardirqs_enabled)); \ + WARN_ON_ONCE(__lockdep_enabled && !hard_irqs_disabled() && \ + (running_oob() || this_cpu_read(hardirqs_enabled))); \ +} while (0) + +#define lockdep_read_irqs_state() \ + ({ this_cpu_read(hardirqs_enabled); }) + +#define lockdep_write_irqs_state(__state) \ +do { \ + this_cpu_write(hardirqs_enabled, __state); \ } while (0) #define lockdep_assert_in_irq() \ @@ -644,6 +662,8 @@ do { \ # define lockdep_assert_irqs_enabled() do { } while (0) # define lockdep_assert_irqs_disabled() do { } while (0) +# define lockdep_read_irqs_state() 0 +# define lockdep_write_irqs_state(__state) do { (void)(__state); } while (0) # define lockdep_assert_in_irq() do { } while (0) # define lockdep_assert_preemption_enabled() do { } while (0) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/mm.h linux-dovetail-v5.15.y-dovetail/include/linux/mm.h --- linux-5.15.26/include/linux/mm.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/mm.h 2022-03-10 09:47:50.000000000 +0100 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -1339,12 +1340,21 @@ static inline bool is_cow_mapping(vm_fla * This should most likely only be called during fork() to see whether we * should break the cow immediately for a page on the src mm. */ -static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, - struct page *page) +static inline bool page_needs_cow(struct vm_area_struct *vma, + struct page *page) { if (!is_cow_mapping(vma->vm_flags)) return false; + /* + * Dovetail: If the source mm belongs to a dovetailed process, + * we don't want to impose the COW-induced latency on it: make + * sure the child gets its own copy of the page. + */ + if (IS_ENABLED(CONFIG_DOVETAIL) && + test_bit(MMF_DOVETAILED, &vma->vm_mm->flags)) + return true; + if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)) return false; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/mm_types.h linux-dovetail-v5.15.y-dovetail/include/linux/mm_types.h --- linux-5.15.26/include/linux/mm_types.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/mm_types.h 2022-03-10 09:47:50.000000000 +0100 @@ -18,6 +18,8 @@ #include +#include + #ifndef AT_VECTOR_SIZE_ARCH #define AT_VECTOR_SIZE_ARCH 0 #endif @@ -575,6 +577,9 @@ struct mm_struct { #ifdef CONFIG_HUGETLB_PAGE atomic_long_t hugetlb_usage; #endif +#ifdef CONFIG_DOVETAIL + struct oob_mm_state oob_state; +#endif struct work_struct async_put_work; #ifdef CONFIG_IOMMU_SUPPORT diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/netdevice.h linux-dovetail-v5.15.y-dovetail/include/linux/netdevice.h --- linux-5.15.26/include/linux/netdevice.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/netdevice.h 2022-03-10 09:47:50.000000000 +0100 @@ -40,6 +40,7 @@ #endif #include #include +#include #include #include @@ -297,6 +298,7 @@ enum netdev_state_t { __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, __LINK_STATE_TESTING, + __LINK_STATE_OOB, }; @@ -1563,6 +1565,13 @@ struct net_device_ops { struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path); +#ifdef CONFIG_NET_OOB + struct sk_buff * (*ndo_alloc_oob_skb)(struct net_device *dev, + dma_addr_t *dma_addr); + void (*ndo_free_oob_skb)(struct net_device *dev, + struct sk_buff *skb, + dma_addr_t dma_addr); +#endif }; /** @@ -1759,6 +1768,7 @@ enum netdev_ml_priv_type { * @tlsdev_ops: Transport Layer Security offload operations * @header_ops: Includes callbacks for creating,parsing,caching,etc * of Layer 2 headers. + * @net_oob_context: Out-of-band networking context (oob stage diversion) * * @flags: Interface flags (a la BSD) * @priv_flags: Like 'flags' but invisible to userspace, @@ -2041,6 +2051,10 @@ struct net_device { const struct tlsdev_ops *tlsdev_ops; #endif +#ifdef CONFIG_NET_OOB + struct oob_netdev_context oob_context; +#endif + const struct header_ops *header_ops; unsigned char operstate; @@ -4326,6 +4340,86 @@ void netif_device_detach(struct net_devi void netif_device_attach(struct net_device *dev); +#ifdef CONFIG_NET_OOB + +static inline bool netif_oob_diversion(const struct net_device *dev) +{ + return test_bit(__LINK_STATE_OOB, &dev->state); +} + +static inline void netif_enable_oob_diversion(struct net_device *dev) +{ + return set_bit(__LINK_STATE_OOB, &dev->state); +} + +static inline void netif_disable_oob_diversion(struct net_device *dev) +{ + clear_bit(__LINK_STATE_OOB, &dev->state); + smp_mb__after_atomic(); +} + +int netif_xmit_oob(struct sk_buff *skb); + +static inline bool netdev_is_oob_capable(struct net_device *dev) +{ + return !!(dev->oob_context.flags & IFF_OOB_CAPABLE); +} + +static inline void netdev_enable_oob_port(struct net_device *dev) +{ + dev->oob_context.flags |= IFF_OOB_PORT; +} + +static inline void netdev_disable_oob_port(struct net_device *dev) +{ + dev->oob_context.flags &= ~IFF_OOB_PORT; +} + +static inline bool netdev_is_oob_port(struct net_device *dev) +{ + return !!(dev->oob_context.flags & IFF_OOB_PORT); +} + +static inline struct sk_buff *netdev_alloc_oob_skb(struct net_device *dev, + dma_addr_t *dma_addr) +{ + return dev->netdev_ops->ndo_alloc_oob_skb(dev, dma_addr); +} + +static inline void netdev_free_oob_skb(struct net_device *dev, + struct sk_buff *skb, + dma_addr_t dma_addr) +{ + dev->netdev_ops->ndo_free_oob_skb(dev, skb, dma_addr); +} + +#else + +static inline bool netif_oob_diversion(const struct net_device *dev) +{ + return false; +} + +static inline bool netdev_is_oob_capable(struct net_device *dev) +{ + return false; +} + +static inline void netdev_enable_oob_port(struct net_device *dev) +{ +} + +static inline void netdev_disable_oob_port(struct net_device *dev) +{ +} + +static inline bool netdev_is_oob_port(struct net_device *dev) +{ + return false; +} + +#endif + /* * Network interface message level settings */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/net.h linux-dovetail-v5.15.y-dovetail/include/linux/net.h --- linux-5.15.26/include/linux/net.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/net.h 2022-03-10 09:47:50.000000000 +0100 @@ -78,6 +78,7 @@ enum sock_type { #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif +#define SOCK_OOB O_OOB #endif /* ARCH_HAS_SOCKET_TYPES */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/poll.h linux-dovetail-v5.15.y-dovetail/include/linux/poll.h --- linux-5.15.26/include/linux/poll.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/poll.h 2022-03-10 09:47:50.000000000 +0100 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/preempt.h linux-dovetail-v5.15.y-dovetail/include/linux/preempt.h --- linux-5.15.26/include/linux/preempt.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/preempt.h 2022-03-10 09:47:50.000000000 +0100 @@ -27,17 +27,23 @@ * SOFTIRQ_MASK: 0x0000ff00 * HARDIRQ_MASK: 0x000f0000 * NMI_MASK: 0x00f00000 + * PIPELINE_MASK: 0x01000000 + * STAGE_MASK: 0x02000000 * PREEMPT_NEED_RESCHED: 0x80000000 */ #define PREEMPT_BITS 8 #define SOFTIRQ_BITS 8 #define HARDIRQ_BITS 4 #define NMI_BITS 4 +#define PIPELINE_BITS 1 +#define STAGE_BITS 1 #define PREEMPT_SHIFT 0 #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) #define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) +#define PIPELINE_SHIFT (NMI_SHIFT + NMI_BITS) +#define STAGE_SHIFT (PIPELINE_SHIFT + PIPELINE_BITS) #define __IRQ_MASK(x) ((1UL << (x))-1) @@ -45,11 +51,15 @@ #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) #define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) +#define PIPELINE_MASK (__IRQ_MASK(PIPELINE_BITS) << PIPELINE_SHIFT) +#define STAGE_MASK (__IRQ_MASK(STAGE_BITS) << STAGE_SHIFT) #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) #define NMI_OFFSET (1UL << NMI_SHIFT) +#define PIPELINE_OFFSET (1UL << PIPELINE_SHIFT) +#define STAGE_OFFSET (1UL << STAGE_SHIFT) #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) @@ -86,6 +96,9 @@ #endif #define irq_count() (nmi_count() | hardirq_count() | softirq_count()) +/* The current IRQ stage level: 0=inband, 1=oob */ +#define stage_level() ((preempt_count() & STAGE_MASK) >> STAGE_SHIFT) + /* * Macros to retrieve the current execution context: * @@ -104,10 +117,12 @@ * in_irq() - Obsolete version of in_hardirq() * in_softirq() - We have BH disabled, or are processing softirqs * in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled + * in_pipeline() - We're on pipeline entry */ #define in_irq() (hardirq_count()) #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) +#define in_pipeline() (preempt_count() & PIPELINE_MASK) /* * The preempt_count offset after preempt_disable(); @@ -190,7 +205,8 @@ do { \ #define preempt_enable_no_resched() sched_preempt_enable_no_resched() -#define preemptible() (preempt_count() == 0 && !irqs_disabled()) +#define preemptible() (preempt_count() == 0 && \ + !hard_irqs_disabled() && !irqs_disabled()) #ifdef CONFIG_PREEMPTION #define preempt_enable() \ @@ -399,4 +415,43 @@ static inline void migrate_enable(void) #endif /* CONFIG_SMP */ +#ifdef CONFIG_IRQ_PIPELINE + +static __always_inline bool running_inband(void) +{ + return stage_level() == 0; +} + +static __always_inline bool running_oob(void) +{ + return !running_inband(); +} + +unsigned long hard_preempt_disable(void); +void hard_preempt_enable(unsigned long flags); + +#else + +static __always_inline bool running_inband(void) +{ + return true; +} + +static __always_inline bool running_oob(void) +{ + return false; +} + +#define hard_preempt_disable() \ +({ \ + preempt_disable(); \ + 0; \ +}) +#define hard_preempt_enable(__flags) \ + do { \ + preempt_enable(); \ + (void)(__flags); \ + } while (0) +#endif + #endif /* __LINUX_PREEMPT_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/printk.h linux-dovetail-v5.15.y-dovetail/include/linux/printk.h --- linux-5.15.26/include/linux/printk.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/printk.h 2022-03-10 09:47:50.000000000 +0100 @@ -143,6 +143,20 @@ void early_printk(const char *s, ...) { struct dev_printk_info; +#ifdef CONFIG_RAW_PRINTK +void raw_puts(const char *s, size_t len); +void raw_vprintk(const char *fmt, va_list ap); +asmlinkage __printf(1, 2) +void raw_printk(const char *fmt, ...); +#else +static inline __cold +void raw_puts(const char *s, size_t len) { } +static inline __cold +void raw_vprintk(const char *s, va_list ap) { } +static inline __printf(1, 2) __cold +void raw_printk(const char *s, ...) { } +#endif + #ifdef CONFIG_PRINTK asmlinkage __printf(4, 0) int vprintk_emit(int facility, int level, @@ -293,14 +307,22 @@ extern void __printk_cpu_unlock(void); * * If the lock is owned by another CPU, spin until it becomes available. * Interrupts are restored while spinning. + * + * irq_pipeline: we neither need nor want to disable in-band IRQs over + * the oob stage or pipeline entry contexts, where CPU migration can't + * happen. Conversely, we neither need nor want to disable hard IRQs + * from the oob stage, so that latency won't skyrocket as a result of + * holding the print lock. */ -#define printk_cpu_lock_irqsave(flags) \ - for (;;) { \ - local_irq_save(flags); \ - if (__printk_cpu_trylock()) \ - break; \ - local_irq_restore(flags); \ - __printk_wait_on_cpu_lock(); \ +#define printk_cpu_lock_irqsave(flags) \ + for (;;) { \ + if (running_inband() && !on_pipeline_entry()) \ + local_irq_save(flags); \ + if (__printk_cpu_trylock()) \ + break; \ + if (running_inband() && !on_pipeline_entry()) \ + local_irq_restore(flags); \ + __printk_wait_on_cpu_lock(); \ } /** @@ -308,11 +330,12 @@ extern void __printk_cpu_unlock(void); * lock and restore interrupts. * @flags: Caller's saved interrupt state, from printk_cpu_lock_irqsave(). */ -#define printk_cpu_unlock_irqrestore(flags) \ - do { \ - __printk_cpu_unlock(); \ - local_irq_restore(flags); \ - } while (0) \ +#define printk_cpu_unlock_irqrestore(flags) \ + do { \ + __printk_cpu_unlock(); \ + if (running_inband() && !on_pipeline_entry()) \ + local_irq_restore(flags); \ + } while (0) \ #else diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/rcupdate.h linux-dovetail-v5.15.y-dovetail/include/linux/rcupdate.h --- linux-5.15.26/include/linux/rcupdate.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/rcupdate.h 2022-03-10 09:47:50.000000000 +0100 @@ -295,7 +295,7 @@ static inline int rcu_read_lock_bh_held( static inline int rcu_read_lock_sched_held(void) { - return !preemptible(); + return !running_inband() || !preemptible(); } static inline int rcu_read_lock_any_held(void) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/regmap.h linux-dovetail-v5.15.y-dovetail/include/linux/regmap.h --- linux-5.15.26/include/linux/regmap.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/regmap.h 2022-03-10 09:47:50.000000000 +0100 @@ -374,6 +374,7 @@ struct regmap_config { int (*reg_write)(void *context, unsigned int reg, unsigned int val); bool fast_io; + bool oob_io; unsigned int max_register; const struct regmap_access_table *wr_table; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/sched/coredump.h linux-dovetail-v5.15.y-dovetail/include/linux/sched/coredump.h --- linux-5.15.26/include/linux/sched/coredump.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/sched/coredump.h 2022-03-10 09:47:50.000000000 +0100 @@ -82,6 +82,7 @@ static inline int get_dumpable(struct mm */ #define MMF_HAS_PINNED 28 /* FOLL_PIN has run, never cleared */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) +#define MMF_DOVETAILED 31 /* mm belongs to a dovetailed process */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ MMF_DISABLE_THP_MASK) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/sched.h linux-dovetail-v5.15.y-dovetail/include/linux/sched.h --- linux-5.15.26/include/linux/sched.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/sched.h 2022-03-10 09:47:50.000000000 +0100 @@ -124,6 +124,12 @@ struct task_group; #define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0) +#ifdef CONFIG_DOVETAIL +#define task_is_off_stage(task) test_ti_local_flags(task_thread_info(task), _TLF_OFFSTAGE) +#else +#define task_is_off_stage(task) 0 +#endif + /* * Special states are those that do not use the normal wait-loop pattern. See * the comment with set_special_state(). @@ -1142,6 +1148,10 @@ struct task_struct { int softirq_disable_cnt; #endif +#ifdef CONFIG_IRQ_PIPELINE + unsigned long stall_bits; +#endif + #ifdef CONFIG_LOCKDEP # define MAX_LOCK_DEPTH 48UL u64 curr_chain_key; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/skbuff.h linux-dovetail-v5.15.y-dovetail/include/linux/skbuff.h --- linux-5.15.26/include/linux/skbuff.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/skbuff.h 2022-03-10 09:47:50.000000000 +0100 @@ -875,6 +875,11 @@ struct sk_buff { __u8 decrypted:1; #endif __u8 slow_gro:1; +#ifdef CONFIG_NET_OOB + __u8 oob:1; + __u8 oob_clone:1; + __u8 oob_cloned:1; +#endif #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -1103,6 +1108,54 @@ struct sk_buff *__build_skb(void *data, struct sk_buff *build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size); +#ifdef CONFIG_NET_OOB + +static inline bool skb_is_oob(const struct sk_buff *skb) +{ + return skb->oob; +} + +static inline bool skb_is_oob_clone(const struct sk_buff *skb) +{ + return skb->oob_clone; +} + +static inline bool skb_has_oob_clone(const struct sk_buff *skb) +{ + return skb->oob_cloned; +} + +struct sk_buff *__netdev_alloc_oob_skb(struct net_device *dev, + size_t len, gfp_t gfp_mask); +void __netdev_free_oob_skb(struct net_device *dev, struct sk_buff *skb); +void netdev_reset_oob_skb(struct net_device *dev, struct sk_buff *skb); +struct sk_buff *skb_alloc_oob_head(gfp_t gfp_mask); +void skb_morph_oob_skb(struct sk_buff *n, struct sk_buff *skb); +bool skb_release_oob_skb(struct sk_buff *skb, int *dref); + +static inline bool recycle_oob_skb(struct sk_buff *skb) +{ + bool skb_oob_recycle(struct sk_buff *skb); + + if (!skb->oob) + return false; + + return skb_oob_recycle(skb); +} + +#else + +static inline bool skb_is_oob(const struct sk_buff *skb) +{ + return false; +} + +static inline bool recycle_oob_skb(struct sk_buff *skb) +{ + return false; +} + +#endif struct sk_buff *napi_build_skb(void *data, unsigned int frag_size); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/smp.h linux-dovetail-v5.15.y-dovetail/include/linux/smp.h --- linux-5.15.26/include/linux/smp.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/smp.h 2022-03-10 09:47:50.000000000 +0100 @@ -268,6 +268,21 @@ static inline int get_boot_cpu_id(void) #define get_cpu() ({ preempt_disable(); __smp_processor_id(); }) #define put_cpu() preempt_enable() +#ifdef CONFIG_IRQ_PIPELINE +#define hard_get_cpu(flags) ({ \ + (flags) = hard_preempt_disable(); \ + raw_smp_processor_id(); \ + }) +#define hard_put_cpu(flags) hard_preempt_enable(flags) +#else +#define hard_get_cpu(flags) ({ (void)(flags); get_cpu(); }) +#define hard_put_cpu(flags) \ + do { \ + (void)(flags); \ + put_cpu(); \ + } while (0) +#endif + /* * Callback to arch code if there's nosmp or maxcpus=0 on the * boot command line: diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/socket.h linux-dovetail-v5.15.y-dovetail/include/linux/socket.h --- linux-5.15.26/include/linux/socket.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/socket.h 2022-03-10 09:47:50.000000000 +0100 @@ -226,8 +226,9 @@ struct ucred { #define AF_MCTP 45 /* Management component * transport protocol */ +#define AF_OOB 46 /* Out-of-band domain sockets */ -#define AF_MAX 46 /* For now.. */ +#define AF_MAX 47 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -278,6 +279,7 @@ struct ucred { #define PF_SMC AF_SMC #define PF_XDP AF_XDP #define PF_MCTP AF_MCTP +#define PF_OOB AF_OOB #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/spi/spi.h linux-dovetail-v5.15.y-dovetail/include/linux/spi/spi.h --- linux-5.15.26/include/linux/spi/spi.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/spi/spi.h 2022-03-10 09:47:50.000000000 +0100 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ struct dma_chan; struct software_node; struct spi_controller; struct spi_transfer; +struct spi_oob_transfer; struct spi_controller_mem_ops; /* @@ -355,6 +357,7 @@ extern struct spi_device *spi_new_ancill * @io_mutex: mutex for physical bus access * @bus_lock_spinlock: spinlock for SPI bus locking * @bus_lock_mutex: mutex for exclusion of multiple callers + * @bus_oob_lock_sem: semaphore for exclusion during oob operations * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use * @setup: updates the device mode and clocking records used by a * device's SPI controller; protocol code may call this. This @@ -538,6 +541,10 @@ struct spi_controller { spinlock_t bus_lock_spinlock; struct mutex bus_lock_mutex; +#ifdef CONFIG_SPI_OOB + struct semaphore bus_oob_lock_sem; +#endif + /* flag indicating that the SPI bus is locked for exclusive use */ bool bus_lock_flag; @@ -630,6 +637,14 @@ struct spi_controller { int (*unprepare_message)(struct spi_controller *ctlr, struct spi_message *message); int (*slave_abort)(struct spi_controller *ctlr); + int (*prepare_oob_transfer)(struct spi_controller *ctlr, + struct spi_oob_transfer *xfer); + void (*start_oob_transfer)(struct spi_controller *ctlr, + struct spi_oob_transfer *xfer); + void (*pulse_oob_transfer)(struct spi_controller *ctlr, + struct spi_oob_transfer *xfer); + void (*terminate_oob_transfer)(struct spi_controller *ctlr, + struct spi_oob_transfer *xfer); /* * These hooks are for drivers that use a generic implementation @@ -1112,6 +1127,90 @@ static inline void spi_message_free(stru kfree(m); } +struct spi_oob_transfer { + struct spi_device *spi; + dma_addr_t dma_addr; + size_t aligned_frame_len; + void *io_buffer; /* 2 x aligned_frame_len */ + struct dma_async_tx_descriptor *txd; + struct dma_async_tx_descriptor *rxd; + u32 effective_speed_hz; + /* + * Caller-defined settings for the transfer. + */ + struct spi_oob_setup { + u32 frame_len; + u32 speed_hz; + u8 bits_per_word; + dma_async_tx_callback xfer_done; + } setup; +}; + +static inline off_t spi_get_oob_rxoff(struct spi_oob_transfer *xfer) +{ + /* RX area is in first half of the I/O buffer. */ + return 0; +} + +static inline off_t spi_get_oob_txoff(struct spi_oob_transfer *xfer) +{ + /* TX area is in second half of the I/O buffer. */ + return xfer->aligned_frame_len; +} + +static inline size_t spi_get_oob_iolen(struct spi_oob_transfer *xfer) +{ + return xfer->aligned_frame_len * 2; +} + +#ifdef CONFIG_SPI_OOB + +struct vm_area_struct; + +int spi_prepare_oob_transfer(struct spi_device *spi, + struct spi_oob_transfer *xfer); + +void spi_start_oob_transfer(struct spi_oob_transfer *xfer); + +int spi_pulse_oob_transfer(struct spi_oob_transfer *xfer); + +void spi_terminate_oob_transfer(struct spi_oob_transfer *xfer); + +int spi_mmap_oob_transfer(struct vm_area_struct *vma, + struct spi_oob_transfer *xfer); + +#else + +static inline +int spi_prepare_oob_transfer(struct spi_device *spi, + struct spi_oob_transfer *xfer) +{ + return -ENOTSUPP; +} + +static inline +void spi_start_oob_transfer(struct spi_oob_transfer *xfer) +{ } + +static inline +int spi_pulse_oob_transfer(struct spi_oob_transfer *xfer) +{ + return -EIO; +} + +static inline +void spi_terminate_oob_transfer(struct spi_oob_transfer *xfer) +{ } + +static inline +int spi_mmap_oob_transfer(struct vm_area_struct *vma, + struct spi_oob_transfer *xfer) +{ + return -ENXIO; +} + +#endif + extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); extern int spi_async_locked(struct spi_device *spi, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/spinlock_api_up.h linux-dovetail-v5.15.y-dovetail/include/linux/spinlock_api_up.h --- linux-5.15.26/include/linux/spinlock_api_up.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/spinlock_api_up.h 2022-03-10 09:47:50.000000000 +0100 @@ -30,21 +30,33 @@ #define __LOCK(lock) \ do { preempt_disable(); ___LOCK(lock); } while (0) +#define __HARD_LOCK(lock) \ + do { ___LOCK(lock); } while (0) + #define __LOCK_BH(lock) \ do { __local_bh_disable_ip(_THIS_IP_, SOFTIRQ_LOCK_OFFSET); ___LOCK(lock); } while (0) #define __LOCK_IRQ(lock) \ do { local_irq_disable(); __LOCK(lock); } while (0) +#define __HARD_LOCK_IRQ(lock) \ + do { hard_local_irq_disable(); __HARD_LOCK(lock); } while (0) + #define __LOCK_IRQSAVE(lock, flags) \ do { local_irq_save(flags); __LOCK(lock); } while (0) +#define __HARD_LOCK_IRQSAVE(lock, flags) \ + do { flags = hard_local_irq_save(); __HARD_LOCK(lock); } while (0) + #define ___UNLOCK(lock) \ do { __release(lock); (void)(lock); } while (0) #define __UNLOCK(lock) \ do { preempt_enable(); ___UNLOCK(lock); } while (0) +#define __HARD_UNLOCK(lock) \ + do { ___UNLOCK(lock); } while (0) + #define __UNLOCK_BH(lock) \ do { __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_LOCK_OFFSET); \ ___UNLOCK(lock); } while (0) @@ -52,9 +64,15 @@ #define __UNLOCK_IRQ(lock) \ do { local_irq_enable(); __UNLOCK(lock); } while (0) +#define __HARD_UNLOCK_IRQ(lock) \ + do { hard_local_irq_enable(); __HARD_UNLOCK(lock); } while (0) + #define __UNLOCK_IRQRESTORE(lock, flags) \ do { local_irq_restore(flags); __UNLOCK(lock); } while (0) +#define __HARD_UNLOCK_IRQRESTORE(lock, flags) \ + do { hard_local_irq_restore(flags); __HARD_UNLOCK(lock); } while (0) + #define _raw_spin_lock(lock) __LOCK(lock) #define _raw_spin_lock_nested(lock, subclass) __LOCK(lock) #define _raw_read_lock(lock) __LOCK(lock) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/spinlock.h linux-dovetail-v5.15.y-dovetail/include/linux/spinlock.h --- linux-5.15.26/include/linux/spinlock.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/spinlock.h 2022-03-10 09:47:50.000000000 +0100 @@ -101,21 +101,27 @@ struct lock_class_key *key, short inner); # define raw_spin_lock_init(lock) \ + LOCK_ALTERNATIVES(lock, spin_lock_init, \ do { \ static struct lock_class_key __key; \ \ - __raw_spin_lock_init((lock), #lock, &__key, LD_WAIT_SPIN); \ -} while (0) + __raw_spin_lock_init(__RAWLOCK(lock), #lock, &__key, LD_WAIT_SPIN); \ +} while (0)) #else # define raw_spin_lock_init(lock) \ - do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0) + LOCK_ALTERNATIVES(lock, spin_lock_init, \ + do { *(__RAWLOCK(lock)) = __RAW_SPIN_LOCK_UNLOCKED(__RAWLOCK(lock)); } while (0)) #endif -#define raw_spin_is_locked(lock) arch_spin_is_locked(&(lock)->raw_lock) +#define raw_spin_is_locked(lock) \ + LOCK_ALTERNATIVES_RET(lock, spin_is_locked, \ + arch_spin_is_locked(&(__RAWLOCK(lock))->raw_lock)) #ifdef arch_spin_is_contended -#define raw_spin_is_contended(lock) arch_spin_is_contended(&(lock)->raw_lock) +#define raw_spin_is_contended(lock) \ + LOCK_ALTERNATIVES_RET(lock, spin_is_contended, \ + arch_spin_is_contended(&(__RAWLOCK(lock))->raw_lock)) #else #define raw_spin_is_contended(lock) (((void)(lock), 0)) #endif /*arch_spin_is_contended*/ @@ -224,13 +230,19 @@ static inline void do_raw_spin_unlock(ra * various methods are defined as nops in the case they are not * required. */ -#define raw_spin_trylock(lock) __cond_lock(lock, _raw_spin_trylock(lock)) +#define raw_spin_trylock(lock) \ + __cond_lock(lock, \ + LOCK_ALTERNATIVES_RET(lock, \ + spin_trylock, _raw_spin_trylock(__RAWLOCK(lock)))) -#define raw_spin_lock(lock) _raw_spin_lock(lock) +#define raw_spin_lock(lock) \ + LOCK_ALTERNATIVES(lock, spin_lock, _raw_spin_lock(__RAWLOCK(lock))) #ifdef CONFIG_DEBUG_LOCK_ALLOC + # define raw_spin_lock_nested(lock, subclass) \ - _raw_spin_lock_nested(lock, subclass) + LOCK_ALTERNATIVES(lock, spin_lock_nested, \ + _raw_spin_lock_nested(__RAWLOCK(lock), subclass), subclass) # define raw_spin_lock_nest_lock(lock, nest_lock) \ do { \ @@ -243,18 +255,20 @@ static inline void do_raw_spin_unlock(ra * warns about set-but-not-used variables when building with * CONFIG_DEBUG_LOCK_ALLOC=n and with W=1. */ -# define raw_spin_lock_nested(lock, subclass) \ - _raw_spin_lock(((void)(subclass), (lock))) +# define raw_spin_lock_nested(lock, subclass) \ + LOCK_ALTERNATIVES(lock, spin_lock_nested, \ + _raw_spin_lock(((void)(subclass), __RAWLOCK(lock))), subclass) # define raw_spin_lock_nest_lock(lock, nest_lock) _raw_spin_lock(lock) #endif #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -#define raw_spin_lock_irqsave(lock, flags) \ - do { \ - typecheck(unsigned long, flags); \ - flags = _raw_spin_lock_irqsave(lock); \ - } while (0) +#define raw_spin_lock_irqsave(lock, flags) \ + LOCK_ALTERNATIVES(lock, spin_lock_irqsave, \ + do { \ + typecheck(unsigned long, flags); \ + flags = _raw_spin_lock_irqsave(__RAWLOCK(lock)); \ + } while (0), flags) #ifdef CONFIG_DEBUG_LOCK_ALLOC #define raw_spin_lock_irqsave_nested(lock, flags, subclass) \ @@ -272,45 +286,55 @@ static inline void do_raw_spin_unlock(ra #else -#define raw_spin_lock_irqsave(lock, flags) \ - do { \ - typecheck(unsigned long, flags); \ - _raw_spin_lock_irqsave(lock, flags); \ - } while (0) +#define raw_spin_lock_irqsave(lock, flags) \ + LOCK_ALTERNATIVES(lock, spin_lock_irqsave, \ + do { \ + typecheck(unsigned long, flags); \ + _raw_spin_lock_irqsave(__RAWLOCK(lock), flags); \ + } while (0), flags) #define raw_spin_lock_irqsave_nested(lock, flags, subclass) \ raw_spin_lock_irqsave(lock, flags) #endif -#define raw_spin_lock_irq(lock) _raw_spin_lock_irq(lock) +#define raw_spin_lock_irq(lock) \ + LOCK_ALTERNATIVES(lock, spin_lock_irq, \ + _raw_spin_lock_irq(__RAWLOCK(lock))) #define raw_spin_lock_bh(lock) _raw_spin_lock_bh(lock) -#define raw_spin_unlock(lock) _raw_spin_unlock(lock) -#define raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock) +#define raw_spin_unlock(lock) \ + LOCK_ALTERNATIVES(lock, spin_unlock, \ + _raw_spin_unlock(__RAWLOCK(lock))) +#define raw_spin_unlock_irq(lock) \ + LOCK_ALTERNATIVES(lock, spin_unlock_irq, \ + _raw_spin_unlock_irq(__RAWLOCK(lock))) -#define raw_spin_unlock_irqrestore(lock, flags) \ - do { \ - typecheck(unsigned long, flags); \ - _raw_spin_unlock_irqrestore(lock, flags); \ - } while (0) +#define raw_spin_unlock_irqrestore(lock, flags) \ + LOCK_ALTERNATIVES(lock, spin_unlock_irqrestore, \ + do { \ + typecheck(unsigned long, flags); \ + _raw_spin_unlock_irqrestore(__RAWLOCK(lock), flags); \ + } while (0), flags) #define raw_spin_unlock_bh(lock) _raw_spin_unlock_bh(lock) #define raw_spin_trylock_bh(lock) \ __cond_lock(lock, _raw_spin_trylock_bh(lock)) #define raw_spin_trylock_irq(lock) \ + LOCK_ALTERNATIVES_RET(lock, spin_trylock_irq, \ ({ \ local_irq_disable(); \ - raw_spin_trylock(lock) ? \ + raw_spin_trylock(__RAWLOCK(lock)) ? \ 1 : ({ local_irq_enable(); 0; }); \ -}) +})) #define raw_spin_trylock_irqsave(lock, flags) \ + LOCK_ALTERNATIVES_RET(lock, spin_trylock_irqsave, \ ({ \ local_irq_save(flags); \ - raw_spin_trylock(lock) ? \ + raw_spin_trylock(__RAWLOCK(lock)) ? \ 1 : ({ local_irq_restore(flags); 0; }); \ -}) +}), flags) #ifndef CONFIG_PREEMPT_RT /* Include rwlock functions for !RT */ @@ -326,15 +350,25 @@ static inline void do_raw_spin_unlock(ra # include #endif +/* Pull the lock types specific to the IRQ pipeline. */ +#ifdef CONFIG_IRQ_PIPELINE +#include +#endif + /* Non PREEMPT_RT kernel, map to raw spinlocks: */ #ifndef CONFIG_PREEMPT_RT +#ifndef CONFIG_IRQ_PIPELINE +static inline void check_spinlock_context(void) { } +#endif + /* * Map the spin_lock functions to the raw variants for PREEMPT_RT=n */ static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock) { + check_spinlock_context(); return &lock->rlock; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/spinlock_pipeline.h linux-dovetail-v5.15.y-dovetail/include/linux/spinlock_pipeline.h --- linux-5.15.26/include/linux/spinlock_pipeline.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/spinlock_pipeline.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,379 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2016 Philippe Gerum . + */ +#ifndef __LINUX_SPINLOCK_PIPELINE_H +#define __LINUX_SPINLOCK_PIPELINE_H + +#ifndef __LINUX_SPINLOCK_H +# error "Please don't include this file directly. Use spinlock.h." +#endif + +#include + +#define hard_spin_lock_irqsave(__rlock, __flags) \ + do { \ + (__flags) = __hard_spin_lock_irqsave(__rlock); \ + } while (0) + +#define hard_spin_trylock_irqsave(__rlock, __flags) \ + ({ \ + int __locked; \ + (__flags) = __hard_spin_trylock_irqsave(__rlock, &__locked); \ + __locked; \ + }) + +#define hybrid_spin_lock_init(__rlock) hard_spin_lock_init(__rlock) + +/* + * CAUTION: We don't want the hand-coded irq-enable of + * do_raw_spin_lock_flags(), hard locked sections assume that + * interrupts are not re-enabled during lock-acquire. + */ +#define hard_lock_acquire(__rlock, __try, __ip) \ + do { \ + hard_spin_lock_prepare(__rlock); \ + if (irq_pipeline_debug_locking()) { \ + spin_acquire(&(__rlock)->dep_map, 0, __try, __ip); \ + LOCK_CONTENDED(__rlock, do_raw_spin_trylock, do_raw_spin_lock); \ + } else { \ + do_raw_spin_lock(__rlock); \ + } \ + } while (0) + +#define hard_lock_acquire_nested(__rlock, __subclass, __ip) \ + do { \ + hard_spin_lock_prepare(__rlock); \ + if (irq_pipeline_debug_locking()) { \ + spin_acquire(&(__rlock)->dep_map, __subclass, 0, __ip); \ + LOCK_CONTENDED(__rlock, do_raw_spin_trylock, do_raw_spin_lock); \ + } else { \ + do_raw_spin_lock(__rlock); \ + } \ + } while (0) + +#define hard_trylock_acquire(__rlock, __try, __ip) \ + do { \ + if (irq_pipeline_debug_locking()) \ + spin_acquire(&(__rlock)->dep_map, 0, __try, __ip); \ + } while (0) + +#define hard_lock_release(__rlock, __ip) \ + do { \ + if (irq_pipeline_debug_locking()) \ + spin_release(&(__rlock)->dep_map, __ip); \ + do_raw_spin_unlock(__rlock); \ + hard_spin_unlock_finish(__rlock); \ + } while (0) + +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) + +#ifdef CONFIG_DEBUG_SPINLOCK +#define hard_spin_lock_init(__lock) \ + do { \ + static struct lock_class_key __key; \ + __raw_spin_lock_init((raw_spinlock_t *)__lock, #__lock, &__key, LD_WAIT_SPIN); \ + } while (0) +#else +#define hard_spin_lock_init(__rlock) \ + do { *(__rlock) = __HARD_SPIN_LOCK_UNLOCKED(__rlock); } while (0) +#endif + +/* + * XXX: no preempt_enable/disable when hard locking. + */ + +static inline +void hard_spin_lock(struct raw_spinlock *rlock) +{ + hard_lock_acquire(rlock, 0, _THIS_IP_); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline +void hard_spin_lock_nested(struct raw_spinlock *rlock, int subclass) +{ + hard_lock_acquire_nested(rlock, subclass, _THIS_IP_); +} +#else +static inline +void hard_spin_lock_nested(struct raw_spinlock *rlock, int subclass) +{ + hard_spin_lock(rlock); +} +#endif + +static inline +void hard_spin_unlock(struct raw_spinlock *rlock) +{ + hard_lock_release(rlock, _THIS_IP_); +} + +static inline +void hard_spin_lock_irq(struct raw_spinlock *rlock) +{ + hard_local_irq_disable(); + hard_lock_acquire(rlock, 0, _THIS_IP_); +} + +static inline +void hard_spin_unlock_irq(struct raw_spinlock *rlock) +{ + hard_lock_release(rlock, _THIS_IP_); + hard_local_irq_enable(); +} + +static inline +void hard_spin_unlock_irqrestore(struct raw_spinlock *rlock, + unsigned long flags) +{ + hard_lock_release(rlock, _THIS_IP_); + hard_local_irq_restore(flags); +} + +static inline +unsigned long __hard_spin_lock_irqsave(struct raw_spinlock *rlock) +{ + unsigned long flags = hard_local_irq_save(); + + hard_lock_acquire(rlock, 0, _THIS_IP_); + + return flags; +} + +static inline +int hard_spin_trylock(struct raw_spinlock *rlock) +{ + hard_spin_trylock_prepare(rlock); + + if (do_raw_spin_trylock(rlock)) { + hard_trylock_acquire(rlock, 1, _THIS_IP_); + return 1; + } + + hard_spin_trylock_fail(rlock); + + return 0; +} + +static inline +unsigned long __hard_spin_trylock_irqsave(struct raw_spinlock *rlock, + int *locked) +{ + unsigned long flags = hard_local_irq_save(); + *locked = hard_spin_trylock(rlock); + return *locked ? flags : ({ hard_local_irq_restore(flags); flags; }); +} + +static inline +int hard_spin_trylock_irq(struct raw_spinlock *rlock) +{ + hard_local_irq_disable(); + return hard_spin_trylock(rlock) ? : ({ hard_local_irq_enable(); 0; }); +} + +static inline +int hard_spin_is_locked(struct raw_spinlock *rlock) +{ + return arch_spin_is_locked(&rlock->raw_lock); +} + +static inline +int hard_spin_is_contended(struct raw_spinlock *rlock) +{ +#ifdef CONFIG_GENERIC_LOCKBREAK + return rlock->break_lock; +#elif defined(arch_spin_is_contended) + return arch_spin_is_contended(&rlock->raw_lock); +#else + return 0; +#endif +} + +#else /* !SMP && !DEBUG_SPINLOCK */ + +#define hard_spin_lock_init(__rlock) do { (void)(__rlock); } while (0) +#define hard_spin_lock(__rlock) __HARD_LOCK(__rlock) +#define hard_spin_lock_nested(__rlock, __subclass) \ + do { __HARD_LOCK(__rlock); (void)(__subclass); } while (0) +#define hard_spin_unlock(__rlock) __HARD_UNLOCK(__rlock) +#define hard_spin_lock_irq(__rlock) __HARD_LOCK_IRQ(__rlock) +#define hard_spin_unlock_irq(__rlock) __HARD_UNLOCK_IRQ(__rlock) +#define hard_spin_unlock_irqrestore(__rlock, __flags) \ + __HARD_UNLOCK_IRQRESTORE(__rlock, __flags) +#define __hard_spin_lock_irqsave(__rlock) \ + ({ \ + unsigned long __flags; \ + __HARD_LOCK_IRQSAVE(__rlock, __flags); \ + __flags; \ + }) +#define __hard_spin_trylock_irqsave(__rlock, __locked) \ + ({ \ + unsigned long __flags; \ + __HARD_LOCK_IRQSAVE(__rlock, __flags); \ + *(__locked) = 1; \ + __flags; \ + }) +#define hard_spin_trylock(__rlock) ({ __HARD_LOCK(__rlock); 1; }) +#define hard_spin_trylock_irq(__rlock) ({ __HARD_LOCK_IRQ(__rlock); 1; }) +#define hard_spin_is_locked(__rlock) ((void)(__rlock), 0) +#define hard_spin_is_contended(__rlock) ((void)(__rlock), 0) +#endif /* !SMP && !DEBUG_SPINLOCK */ + +/* + * In the pipeline entry context, the regular preemption and root + * stall logic do not apply since we may actually have preempted any + * critical section of the kernel which is protected by regular + * locking (spin or stall), or we may even have preempted the + * out-of-band stage. Therefore, we just need to grab the raw spinlock + * underlying a hybrid spinlock to exclude other CPUs. + * + * NOTE: When entering the pipeline, IRQs are already hard disabled. + */ + +void __hybrid_spin_lock(struct raw_spinlock *rlock); +void __hybrid_spin_lock_nested(struct raw_spinlock *rlock, int subclass); + +static inline void hybrid_spin_lock(struct raw_spinlock *rlock) +{ + if (in_pipeline()) + hard_lock_acquire(rlock, 0, _THIS_IP_); + else + __hybrid_spin_lock(rlock); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline +void hybrid_spin_lock_nested(struct raw_spinlock *rlock, int subclass) +{ + if (in_pipeline()) + hard_lock_acquire_nested(rlock, subclass, _THIS_IP_); + else + __hybrid_spin_lock_nested(rlock, subclass); +} +#else +static inline +void hybrid_spin_lock_nested(struct raw_spinlock *rlock, int subclass) +{ + hybrid_spin_lock(rlock); +} +#endif + +void __hybrid_spin_unlock(struct raw_spinlock *rlock); + +static inline void hybrid_spin_unlock(struct raw_spinlock *rlock) +{ + if (in_pipeline()) + hard_lock_release(rlock, _THIS_IP_); + else + __hybrid_spin_unlock(rlock); +} + +void __hybrid_spin_lock_irq(struct raw_spinlock *rlock); + +static inline void hybrid_spin_lock_irq(struct raw_spinlock *rlock) +{ + if (in_pipeline()) + hard_lock_acquire(rlock, 0, _THIS_IP_); + else + __hybrid_spin_lock_irq(rlock); +} + +void __hybrid_spin_unlock_irq(struct raw_spinlock *rlock); + +static inline void hybrid_spin_unlock_irq(struct raw_spinlock *rlock) +{ + if (in_pipeline()) + hard_lock_release(rlock, _THIS_IP_); + else + __hybrid_spin_unlock_irq(rlock); +} + +unsigned long __hybrid_spin_lock_irqsave(struct raw_spinlock *rlock); + +#define hybrid_spin_lock_irqsave(__rlock, __flags) \ + do { \ + if (in_pipeline()) { \ + hard_lock_acquire(__rlock, 0, _THIS_IP_); \ + (__flags) = hard_local_save_flags(); \ + } else \ + (__flags) = __hybrid_spin_lock_irqsave(__rlock); \ + } while (0) + +void __hybrid_spin_unlock_irqrestore(struct raw_spinlock *rlock, + unsigned long flags); + +static inline void hybrid_spin_unlock_irqrestore(struct raw_spinlock *rlock, + unsigned long flags) +{ + + if (in_pipeline()) + hard_lock_release(rlock, _THIS_IP_); + else + __hybrid_spin_unlock_irqrestore(rlock, flags); +} + +int __hybrid_spin_trylock(struct raw_spinlock *rlock); + +static inline int hybrid_spin_trylock(struct raw_spinlock *rlock) +{ + if (in_pipeline()) { + hard_spin_trylock_prepare(rlock); + if (do_raw_spin_trylock(rlock)) { + hard_trylock_acquire(rlock, 1, _THIS_IP_); + return 1; + } + hard_spin_trylock_fail(rlock); + return 0; + } + + return __hybrid_spin_trylock(rlock); +} + +int __hybrid_spin_trylock_irqsave(struct raw_spinlock *rlock, + unsigned long *flags); + +#define hybrid_spin_trylock_irqsave(__rlock, __flags) \ + ({ \ + int __ret = 1; \ + if (in_pipeline()) { \ + hard_spin_trylock_prepare(__rlock); \ + if (do_raw_spin_trylock(__rlock)) { \ + hard_trylock_acquire(__rlock, 1, _THIS_IP_); \ + (__flags) = hard_local_save_flags(); \ + } else { \ + hard_spin_trylock_fail(__rlock); \ + __ret = 0; \ + } \ + } else { \ + __ret = __hybrid_spin_trylock_irqsave(__rlock, &(__flags)); \ + } \ + __ret; \ + }) + +static inline int hybrid_spin_trylock_irq(struct raw_spinlock *rlock) +{ + unsigned long flags; + return hybrid_spin_trylock_irqsave(rlock, flags); +} + +static inline +int hybrid_spin_is_locked(struct raw_spinlock *rlock) +{ + return hard_spin_is_locked(rlock); +} + +static inline +int hybrid_spin_is_contended(struct raw_spinlock *rlock) +{ + return hard_spin_is_contended(rlock); +} + +#ifdef CONFIG_DEBUG_IRQ_PIPELINE +void check_spinlock_context(void); +#else +static inline void check_spinlock_context(void) { } +#endif + +#endif /* __LINUX_SPINLOCK_PIPELINE_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/spinlock_types.h linux-dovetail-v5.15.y-dovetail/include/linux/spinlock_types.h --- linux-5.15.26/include/linux/spinlock_types.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/spinlock_types.h 2022-03-10 09:47:50.000000000 +0100 @@ -71,6 +71,151 @@ typedef struct spinlock { #endif /* CONFIG_PREEMPT_RT */ +#ifdef CONFIG_IRQ_PIPELINE + +void __bad_spinlock_type(void); + +#define __RAWLOCK(x) ((struct raw_spinlock *)(x)) + +#define LOCK_ALTERNATIVES(__lock, __base_op, __raw_form, __args...) \ + do { \ + if (__builtin_types_compatible_p(typeof(__lock), \ + raw_spinlock_t *)) \ + __raw_form; \ + else if (__builtin_types_compatible_p(typeof(__lock), \ + hard_spinlock_t *)) \ + hard_ ## __base_op(__RAWLOCK(__lock), ##__args); \ + else if (__builtin_types_compatible_p(typeof(__lock), \ + hybrid_spinlock_t *)) \ + hybrid_ ## __base_op(__RAWLOCK(__lock), ##__args); \ + else \ + __bad_spinlock_type(); \ + } while (0) + +#define LOCK_ALTERNATIVES_RET(__lock, __base_op, __raw_form, __args...) \ + ({ \ + long __ret = 0; \ + if (__builtin_types_compatible_p(typeof(__lock), \ + raw_spinlock_t *)) \ + __ret = __raw_form; \ + else if (__builtin_types_compatible_p(typeof(__lock), \ + hard_spinlock_t *)) \ + __ret = hard_ ## __base_op(__RAWLOCK(__lock), ##__args); \ + else if (__builtin_types_compatible_p(typeof(__lock), \ + hybrid_spinlock_t *)) \ + __ret = hybrid_ ## __base_op(__RAWLOCK(__lock), ##__args); \ + else \ + __bad_spinlock_type(); \ + __ret; \ + }) + +#define LOCKDEP_ALT_DEPMAP(__lock) \ + ({ \ + struct lockdep_map *__ret; \ + if (__builtin_types_compatible_p(typeof(&(__lock)->dep_map), \ + struct phony_lockdep_map *)) \ + __ret = &__RAWLOCK(__lock)->dep_map; \ + else \ + __ret = (struct lockdep_map *)(&(__lock)->dep_map); \ + __ret; \ + }) + +#define LOCKDEP_HARD_DEBUG(__lock, __nodebug, __debug) \ + do { \ + if (__builtin_types_compatible_p(typeof(__lock), \ + raw_spinlock_t *) || \ + irq_pipeline_debug_locking()) { \ + __debug; \ + } else { \ + __nodebug; \ + } \ + } while (0) + +#define LOCKDEP_HARD_DEBUG_RET(__lock, __nodebug, __debug) \ + ({ \ + typeof(__nodebug) __ret; \ + if (__builtin_types_compatible_p(typeof(__lock), \ + raw_spinlock_t *) || \ + irq_pipeline_debug_locking()) { \ + __ret = (__debug); \ + } else { \ + __ret = (__nodebug); \ + } \ + __ret; \ + }) + +#define __HARD_SPIN_LOCK_UNLOCKED(__rlock) \ + __RAW_SPIN_LOCK_UNLOCKED(__rlock) + +#define __HARD_SPIN_LOCK_INITIALIZER(__lock) \ + { \ + .rlock = __HARD_SPIN_LOCK_UNLOCKED((__lock).rlock), \ + } + +#define DEFINE_HARD_SPINLOCK(x) hard_spinlock_t x = { \ + .rlock = __HARD_SPIN_LOCK_UNLOCKED(x), \ + } + +#define DECLARE_HARD_SPINLOCK(x) hard_spinlock_t x + +struct phony_lockdep_map { + short wait_type_outer; + short wait_type_inner; +}; + +typedef struct hard_spinlock { + /* XXX: offset_of(struct hard_spinlock, rlock) == 0 */ + struct raw_spinlock rlock; + struct phony_lockdep_map dep_map; +} hard_spinlock_t; + +#define DEFINE_MUTABLE_SPINLOCK(x) hybrid_spinlock_t x = { \ + .rlock = __RAW_SPIN_LOCK_UNLOCKED(x), \ + } + +#define DECLARE_MUTABLE_SPINLOCK(x) hybrid_spinlock_t x + +typedef struct hybrid_spinlock { + /* XXX: offset_of(struct hybrid_spinlock, rlock) == 0 */ + struct raw_spinlock rlock; + unsigned long hwflags; + struct phony_lockdep_map dep_map; +} hybrid_spinlock_t; + +#else + +typedef raw_spinlock_t hard_spinlock_t; + +typedef raw_spinlock_t hybrid_spinlock_t; + +#define LOCK_ALTERNATIVES(__lock, __base_op, __raw_form, __args...) \ + __raw_form + +#define LOCK_ALTERNATIVES_RET(__lock, __base_op, __raw_form, __args...) \ + __raw_form + +#define LOCKDEP_ALT_DEPMAP(__lock) (&(__lock)->dep_map) + +#define LOCKDEP_HARD_DEBUG(__lock, __nondebug, __debug) do { __debug; } while (0) + +#define LOCKDEP_HARD_DEBUG_RET(__lock, __nondebug, __debug) ({ __debug; }) + +#define DEFINE_HARD_SPINLOCK(x) DEFINE_RAW_SPINLOCK(x) + +#define DECLARE_HARD_SPINLOCK(x) raw_spinlock_t x + +#define DEFINE_MUTABLE_SPINLOCK(x) DEFINE_RAW_SPINLOCK(x) + +#define DECLARE_MUTABLE_SPINLOCK(x) raw_spinlock_t x + +#define __RAWLOCK(x) (x) + +#define __HARD_SPIN_LOCK_UNLOCKED(__lock) __RAW_SPIN_LOCK_UNLOCKED(__lock) + +#define __HARD_SPIN_LOCK_INITIALIZER(__lock) __RAW_SPIN_LOCK_UNLOCKED(__lock) + +#endif /* CONFIG_IRQ_PIPELINE */ + #include #endif /* __LINUX_SPINLOCK_TYPES_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/stop_machine.h linux-dovetail-v5.15.y-dovetail/include/linux/stop_machine.h --- linux-5.15.26/include/linux/stop_machine.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/stop_machine.h 2022-03-10 09:47:50.000000000 +0100 @@ -6,6 +6,7 @@ #include #include #include +#include /* * stop_cpu[s]() is simplistic per-cpu maximum priority cpu @@ -134,7 +135,9 @@ static __always_inline int stop_machine_ unsigned long flags; int ret; local_irq_save(flags); + hard_irq_disable(); ret = fn(data); + hard_irq_enable(); local_irq_restore(flags); return ret; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/thread_info.h linux-dovetail-v5.15.y-dovetail/include/linux/thread_info.h --- linux-5.15.26/include/linux/thread_info.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/thread_info.h 2022-03-10 09:47:50.000000000 +0100 @@ -223,6 +223,72 @@ check_copy_size(const void *addr, size_t static inline void arch_setup_new_exec(void) { } #endif +#ifdef ti_local_flags +/* + * If the arch defines a set of per-thread synchronous flags, provide + * generic accessors to them. + */ +static __always_inline +void set_ti_local_flags(struct thread_info *ti, unsigned int mask) +{ + ti_local_flags(ti) |= mask; +} + +static __always_inline void set_thread_local_flags(unsigned int mask) +{ + set_ti_local_flags(current_thread_info(), mask); +} + +static __always_inline +int test_and_set_ti_local_flags(struct thread_info *ti, unsigned int mask) +{ + int old = ti_local_flags(ti) & mask; + ti_local_flags(ti) |= mask; + return old != 0; +} + +static __always_inline int test_and_set_thread_local_flags(unsigned int mask) +{ + return test_and_set_ti_local_flags(current_thread_info(), mask); +} + +static __always_inline +void clear_ti_local_flags(struct thread_info *ti, unsigned int mask) +{ + ti_local_flags(ti) &= ~mask; +} + +static __always_inline +int test_and_clear_ti_local_flags(struct thread_info *ti, unsigned int mask) +{ + int old = ti_local_flags(ti) & mask; + ti_local_flags(ti) &= ~mask; + return old != 0; +} + +static __always_inline int test_and_clear_thread_local_flags(unsigned int mask) +{ + return test_and_clear_ti_local_flags(current_thread_info(), mask); +} + +static __always_inline void clear_thread_local_flags(unsigned int mask) +{ + clear_ti_local_flags(current_thread_info(), mask); +} + +static __always_inline +bool test_ti_local_flags(struct thread_info *ti, unsigned int mask) +{ + return (ti_local_flags(ti) & mask) != 0; +} + +static __always_inline bool test_thread_local_flags(unsigned int mask) +{ + return test_ti_local_flags(current_thread_info(), mask); +} + +#endif /* ti_local_flags */ + #endif /* __KERNEL__ */ #endif /* _LINUX_THREAD_INFO_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/tick.h linux-dovetail-v5.15.y-dovetail/include/linux/tick.h --- linux-5.15.26/include/linux/tick.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/tick.h 2022-03-10 09:47:50.000000000 +0100 @@ -21,6 +21,14 @@ extern void tick_suspend_local(void); extern void tick_resume_local(void); extern void tick_handover_do_timer(void); extern void tick_cleanup_dead_cpu(int cpu); + +#ifdef CONFIG_IRQ_PIPELINE +int tick_install_proxy(void (*setup_proxy)(struct clock_proxy_device *dev), + const struct cpumask *cpumask); +void tick_uninstall_proxy(const struct cpumask *cpumask); +void tick_notify_proxy(void); +#endif + #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_suspend_local(void) { } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/trace_events.h linux-dovetail-v5.15.y-dovetail/include/linux/trace_events.h --- linux-5.15.26/include/linux/trace_events.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/trace_events.h 2022-03-10 09:47:50.000000000 +0100 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -172,6 +173,8 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, + TRACE_FLAG_OOB_STAGE = 0x80, + TRACE_FLAG_IRQS_HARDOFF = TRACE_FLAG_IRQS_NOSUPPORT, }; #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT @@ -185,7 +188,7 @@ static inline unsigned int tracing_gen_c { unsigned long irqflags; - local_save_flags(irqflags); + stage_save_flags(irqflags); return tracing_gen_ctx_flags(irqflags); } #else diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/tracepoint.h linux-dovetail-v5.15.y-dovetail/include/linux/tracepoint.h --- linux-5.15.26/include/linux/tracepoint.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/tracepoint.h 2022-03-10 09:47:50.000000000 +0100 @@ -180,6 +180,10 @@ static inline struct tracepoint *tracepo /* * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. + * + * IRQ pipeline: we may not depend on RCU for data which may be + * manipulated from the out-of-band stage, so rcuidle has to be false + * if running_oob(). */ #define __DO_TRACE(name, args, cond, rcuidle) \ do { \ @@ -220,7 +224,7 @@ static inline struct tracepoint *tracepo if (static_key_false(&__tracepoint_##name.key)) \ __DO_TRACE(name, \ TP_ARGS(args), \ - TP_CONDITION(cond), 1); \ + TP_CONDITION(cond), running_inband()); \ } #else #define __DECLARE_TRACE_RCU(name, proto, args, cond) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/linux/vmalloc.h linux-dovetail-v5.15.y-dovetail/include/linux/vmalloc.h --- linux-5.15.26/include/linux/vmalloc.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/linux/vmalloc.h 2022-03-10 09:47:50.000000000 +0100 @@ -281,6 +281,7 @@ pcpu_free_vm_areas(struct vm_struct **vm int register_vmap_purge_notifier(struct notifier_block *nb); int unregister_vmap_purge_notifier(struct notifier_block *nb); +void arch_advertise_page_mapping(unsigned long start, unsigned long end); #if defined(CONFIG_MMU) && defined(CONFIG_PRINTK) bool vmalloc_dump_obj(void *object); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/net/netoob.h linux-dovetail-v5.15.y-dovetail/include/net/netoob.h --- linux-5.15.26/include/net/netoob.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/net/netoob.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NET_OOBNET_H +#define _NET_OOBNET_H + +#include + +/* Device supports direct out-of-band operations (RX & TX) */ +#define IFF_OOB_CAPABLE BIT(0) +/* Device is an out-of-band port */ +#define IFF_OOB_PORT BIT(1) + +struct oob_netdev_context { + int flags; + struct oob_netdev_state dev_state; +}; + +#endif /* !_NET_OOBNET_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/net/sock.h linux-dovetail-v5.15.y-dovetail/include/net/sock.h --- linux-5.15.26/include/net/sock.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/net/sock.h 2022-03-10 09:47:50.000000000 +0100 @@ -538,6 +538,9 @@ struct sock { struct bpf_local_storage __rcu *sk_bpf_storage; #endif struct rcu_head sk_rcu; +#ifdef CONFIG_NET_OOB + void *oob_data; +#endif }; enum sk_pacing { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/trace/events/irq.h linux-dovetail-v5.15.y-dovetail/include/trace/events/irq.h --- linux-5.15.26/include/trace/events/irq.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/trace/events/irq.h 2022-03-10 09:47:50.000000000 +0100 @@ -100,6 +100,48 @@ TRACE_EVENT(irq_handler_exit, __entry->irq, __entry->ret ? "handled" : "unhandled") ); +/** + * irq_pipeline_entry - called when an external irq enters the pipeline + * @irq: irq number + */ +TRACE_EVENT(irq_pipeline_entry, + + TP_PROTO(int irq), + + TP_ARGS(irq), + + TP_STRUCT__entry( + __field( int, irq ) + ), + + TP_fast_assign( + __entry->irq = irq; + ), + + TP_printk("irq=%d", __entry->irq) +); + +/** + * irq_pipeline_exit - called when an external irq leaves the pipeline + * @irq: irq number + */ +TRACE_EVENT(irq_pipeline_exit, + + TP_PROTO(int irq), + + TP_ARGS(irq), + + TP_STRUCT__entry( + __field( int, irq ) + ), + + TP_fast_assign( + __entry->irq = irq; + ), + + TP_printk("irq=%d", __entry->irq) +); + DECLARE_EVENT_CLASS(softirq, TP_PROTO(unsigned int vec_nr), diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/uapi/asm-generic/dovetail.h linux-dovetail-v5.15.y-dovetail/include/uapi/asm-generic/dovetail.h --- linux-5.15.26/include/uapi/asm-generic/dovetail.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/uapi/asm-generic/dovetail.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_GENERIC_DOVETAIL_H +#define __ASM_GENERIC_DOVETAIL_H + +#define __OOB_SYSCALL_BIT 0x10000000 + +#endif /* !__ASM_GENERIC_DOVETAIL_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/uapi/asm-generic/fcntl.h linux-dovetail-v5.15.y-dovetail/include/uapi/asm-generic/fcntl.h --- linux-5.15.26/include/uapi/asm-generic/fcntl.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/uapi/asm-generic/fcntl.h 2022-03-10 09:47:50.000000000 +0100 @@ -89,6 +89,15 @@ #define __O_TMPFILE 020000000 #endif +/* + * Tells the open call that out-of-band operations should be enabled + * for the file (if supported). Can also be passed along to socket(2) + * via the type argument as SOCK_OOB. + */ +#ifndef O_OOB +#define O_OOB 010000000000 +#endif + /* a horrid kludge trying to make sure that this will fail on old kernels */ #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) #define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/uapi/linux/clocksource.h linux-dovetail-v5.15.y-dovetail/include/uapi/linux/clocksource.h --- linux-5.15.26/include/uapi/linux/clocksource.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/uapi/linux/clocksource.h 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,33 @@ +/* + * Definitions for user-mappable clock sources. + * + * Gilles Chanteperdrix + */ +#ifndef _UAPI_LINUX_CLOCKSOURCE_H +#define _UAPI_LINUX_CLOCKSOURCE_H + +enum clksrc_user_mmio_type { + CLKSRC_MMIO_L_UP, + CLKSRC_MMIO_L_DOWN, + CLKSRC_MMIO_W_UP, + CLKSRC_MMIO_W_DOWN, + CLKSRC_DMMIO_L_UP, + CLKSRC_DMMIO_W_UP, + + CLKSRC_MMIO_TYPE_NR, +}; + +struct clksrc_user_mmio_info { + enum clksrc_user_mmio_type type; + void *reg_lower; + unsigned int mask_lower; + unsigned int bits_lower; + void *reg_upper; + unsigned int mask_upper; +}; + +#define CLKSRC_USER_MMIO_MAX 16 + +#define CLKSRC_USER_MMIO_MAP _IOWR(0xC1, 0, struct clksrc_user_mmio_info) + +#endif /* _UAPI_LINUX_CLOCKSOURCE_H */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/vdso/datapage.h linux-dovetail-v5.15.y-dovetail/include/vdso/datapage.h --- linux-5.15.26/include/vdso/datapage.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/vdso/datapage.h 2022-03-10 09:47:50.000000000 +0100 @@ -106,9 +106,34 @@ struct vdso_data { u32 hrtimer_res; u32 __unused; +#ifdef CONFIG_GENERIC_CLOCKSOURCE_VDSO + u32 cs_type_seq; + char cs_mmdev[16]; +#endif + struct arch_vdso_data arch_data; }; +#ifdef CONFIG_GENERIC_CLOCKSOURCE_VDSO + +#include + +struct clksrc_info; + +typedef u64 vdso_read_cycles_t(const struct clksrc_info *info); + +struct clksrc_info { + vdso_read_cycles_t *read_cycles; + struct clksrc_user_mmio_info mmio; +}; + +struct vdso_priv { + u32 current_cs_type_seq; + struct clksrc_info clksrc_info[CLOCKSOURCE_VDSO_MMIO + CLKSRC_USER_MMIO_MAX]; +}; + +#endif /* !CONFIG_GENERIC_CLOCKSOURCE_VDSO */ + /* * We use the hidden visibility to prevent the compiler from generating a GOT * relocation. Not only is going through a GOT useless (the entry couldn't and diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/include/vdso/helpers.h linux-dovetail-v5.15.y-dovetail/include/vdso/helpers.h --- linux-5.15.26/include/vdso/helpers.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/include/vdso/helpers.h 2022-03-10 09:47:50.000000000 +0100 @@ -5,6 +5,7 @@ #ifndef __ASSEMBLY__ #include +#include static __always_inline u32 vdso_read_begin(const struct vdso_data *vd) { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/init/Kconfig linux-dovetail-v5.15.y-dovetail/init/Kconfig --- linux-5.15.26/init/Kconfig 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/init/Kconfig 2022-03-10 09:47:50.000000000 +0100 @@ -1537,6 +1537,18 @@ config PRINTK very difficult to diagnose system problems, saying N here is strongly discouraged. +config RAW_PRINTK + bool "Enable support for raw printk" + default n + help + This option enables a printk variant called raw_printk() for + writing all output unmodified to a raw console channel + immediately, without any header or preparation whatsoever, + usable from any context. + + Unlike early_printk() console devices, raw_printk() devices + can live past the boot sequence. + config BUG bool "BUG() support" if EXPERT default y diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/init/main.c linux-dovetail-v5.15.y-dovetail/init/main.c --- linux-5.15.26/init/main.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/init/main.c 2022-03-10 09:47:50.000000000 +0100 @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -935,6 +936,7 @@ asmlinkage __visible void __init __no_sa char *command_line; char *after_dashes; + stall_inband_nocheck(); set_task_stack_end_magic(&init_task); smp_setup_processor_id(); debug_objects_early_init(); @@ -942,7 +944,7 @@ asmlinkage __visible void __init __no_sa cgroup_init_early(); - local_irq_disable(); + local_irq_disable_full(); early_boot_irqs_disabled = true; /* @@ -987,6 +989,7 @@ asmlinkage __visible void __init __no_sa setup_log_buf(0); vfs_caches_init_early(); sort_main_extable(); + irq_pipeline_init_early(); trap_init(); mm_init(); @@ -1032,6 +1035,7 @@ asmlinkage __visible void __init __no_sa /* init some links before init_ISA_irqs() */ early_irq_init(); init_IRQ(); + irq_pipeline_init(); tick_init(); rcu_init_nohz(); init_timers(); @@ -1061,7 +1065,7 @@ asmlinkage __visible void __init __no_sa WARN(!irqs_disabled(), "Interrupts were enabled early\n"); early_boot_irqs_disabled = false; - local_irq_enable(); + local_irq_enable_full(); kmem_cache_init_late(); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/init/Makefile linux-dovetail-v5.15.y-dovetail/init/Makefile --- linux-5.15.26/init/Makefile 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/init/Makefile 2022-03-10 09:47:50.000000000 +0100 @@ -31,7 +31,7 @@ quiet_cmd_compile.h = CHK $@ cmd_compile.h = \ $(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" \ - "$(CONFIG_PREEMPT_RT)" $(CONFIG_CC_VERSION_TEXT) "$(LD)" + "$(CONFIG_PREEMPT_RT)" "$(CONFIG_IRQ_PIPELINE)" $(CONFIG_CC_VERSION_TEXT) "$(LD)" include/generated/compile.h: FORCE $(call cmd,compile.h) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/debug/debug_core.c linux-dovetail-v5.15.y-dovetail/kernel/debug/debug_core.c --- linux-5.15.26/kernel/debug/debug_core.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/debug/debug_core.c 2022-03-10 09:47:50.000000000 +0100 @@ -107,8 +107,8 @@ static struct kgdb_bkpt kgdb_break[KGDB */ atomic_t kgdb_active = ATOMIC_INIT(-1); EXPORT_SYMBOL_GPL(kgdb_active); -static DEFINE_RAW_SPINLOCK(dbg_master_lock); -static DEFINE_RAW_SPINLOCK(dbg_slave_lock); +static DEFINE_HARD_SPINLOCK(dbg_master_lock); +static DEFINE_HARD_SPINLOCK(dbg_slave_lock); /* * We use NR_CPUs not PERCPU, in case kgdb is used to debug early @@ -607,7 +607,7 @@ acquirelock: * Interrupts will be restored by the 'trap return' code, except when * single stepping. */ - local_irq_save(flags); + flags = hard_local_irq_save(); cpu = ks->cpu; kgdb_info[cpu].debuggerinfo = regs; @@ -661,7 +661,7 @@ return_normal: smp_mb__before_atomic(); atomic_dec(&slaves_in_kgdb); dbg_touch_watchdogs(); - local_irq_restore(flags); + hard_local_irq_restore(flags); rcu_read_unlock(); return 0; } @@ -680,7 +680,7 @@ return_normal: atomic_set(&kgdb_active, -1); raw_spin_unlock(&dbg_master_lock); dbg_touch_watchdogs(); - local_irq_restore(flags); + hard_local_irq_restore(flags); rcu_read_unlock(); goto acquirelock; @@ -716,8 +716,11 @@ return_normal: atomic_set(ks->send_ready, 1); /* Signal the other CPUs to enter kgdb_wait() */ - else if ((!kgdb_single_step) && kgdb_do_roundup) + else if ((!kgdb_single_step) && kgdb_do_roundup && running_inband()) { + hard_cond_local_irq_enable(); kgdb_roundup_cpus(); + hard_cond_local_irq_disable(); + } #endif /* @@ -806,7 +809,7 @@ kgdb_restore: atomic_set(&kgdb_active, -1); raw_spin_unlock(&dbg_master_lock); dbg_touch_watchdogs(); - local_irq_restore(flags); + hard_local_irq_restore(flags); rcu_read_unlock(); return kgdb_info[cpu].ret_state; @@ -929,7 +932,7 @@ static void kgdb_console_write(struct co if (!kgdb_connected || atomic_read(&kgdb_active) != -1 || dbg_kdb_mode) return; - local_irq_save(flags); + flags = hard_local_irq_save(); gdbstub_msg_write(s, count); local_irq_restore(flags); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/dovetail.c linux-dovetail-v5.15.y-dovetail/kernel/dovetail.c --- linux-5.15.26/kernel/dovetail.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/dovetail.c 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,448 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2016 Philippe Gerum . + */ +#include +#include +#include +#include +#include +#include +#include + +static bool dovetail_enabled; + +void __weak arch_inband_task_init(struct task_struct *p) +{ +} + +void inband_task_init(struct task_struct *p) +{ + struct thread_info *ti = task_thread_info(p); + + clear_ti_local_flags(ti, _TLF_DOVETAIL|_TLF_OOB|_TLF_OFFSTAGE); + arch_inband_task_init(p); +} + +void dovetail_init_altsched(struct dovetail_altsched_context *p) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + + check_inband_stage(); + p->task = tsk; + p->active_mm = mm; + p->borrowed_mm = false; + + /* + * Make sure the current process will not share any private + * page with its child upon fork(), sparing it the random + * latency induced by COW. MMF_DOVETAILED is never cleared once + * set. We serialize with dup_mmap() which holds the mm write + * lock. + */ + if (!(tsk->flags & PF_KTHREAD) && + !test_bit(MMF_DOVETAILED, &mm->flags)) { + mmap_write_lock(mm); + __set_bit(MMF_DOVETAILED, &mm->flags); + mmap_write_unlock(mm); + } +} +EXPORT_SYMBOL_GPL(dovetail_init_altsched); + +void dovetail_start_altsched(void) +{ + check_inband_stage(); + set_thread_local_flags(_TLF_DOVETAIL); +} +EXPORT_SYMBOL_GPL(dovetail_start_altsched); + +void dovetail_stop_altsched(void) +{ + clear_thread_local_flags(_TLF_DOVETAIL); + clear_thread_flag(TIF_MAYDAY); +} +EXPORT_SYMBOL_GPL(dovetail_stop_altsched); + +int __weak handle_oob_syscall(struct pt_regs *regs) +{ + return 0; +} + +int __weak handle_pipelined_syscall(struct irq_stage *stage, + struct pt_regs *regs) +{ + return 0; /* i.e. propagate to in-band handler. */ +} + +void __weak handle_oob_mayday(struct pt_regs *regs) +{ +} + +static inline +void call_mayday(struct thread_info *ti, struct pt_regs *regs) +{ + clear_ti_thread_flag(ti, TIF_MAYDAY); + handle_oob_mayday(regs); +} + +void dovetail_call_mayday(struct pt_regs *regs) +{ + struct thread_info *ti = current_thread_info(); + unsigned long flags; + + flags = hard_local_irq_save(); + call_mayday(ti, regs); + hard_local_irq_restore(flags); +} + +void inband_retuser_notify(void) +{ + clear_thread_flag(TIF_RETUSER); + inband_event_notify(INBAND_TASK_RETUSER, current); + /* CAUTION: we might have switched out-of-band here. */ +} + +int __pipeline_syscall(struct pt_regs *regs) +{ + struct thread_info *ti = current_thread_info(); + struct irq_stage *caller_stage, *target_stage; + struct irq_stage_data *p, *this_context; + unsigned long flags; + int ret = 0; + + /* + * We should definitely not pipeline a syscall through the + * slow path with IRQs off. + */ + WARN_ON_ONCE(dovetail_debug() && hard_irqs_disabled()); + + if (!dovetail_enabled) + return 0; + + flags = hard_local_irq_save(); + caller_stage = current_irq_stage; + this_context = current_irq_staged; + target_stage = &oob_stage; +next: + p = this_staged(target_stage); + set_current_irq_staged(p); + hard_local_irq_restore(flags); + ret = handle_pipelined_syscall(caller_stage, regs); + flags = hard_local_irq_save(); + /* + * Be careful about stage switching _and_ CPU migration that + * might have happened as a result of handing over the syscall + * to the out-of-band handler. + * + * - if a stage migration is detected, fetch the new + * per-stage, per-CPU context pointer. + * + * - if no stage migration happened, switch back to the + * initial call stage, on a possibly different CPU though. + */ + if (current_irq_stage != target_stage) { + this_context = current_irq_staged; + } else { + p = this_staged(this_context->stage); + set_current_irq_staged(p); + } + + if (this_context->stage == &inband_stage) { + if (target_stage != &inband_stage && ret == 0) { + target_stage = &inband_stage; + goto next; + } + p = this_inband_staged(); + if (stage_irqs_pending(p)) + sync_current_irq_stage(); + } else { + if (test_ti_thread_flag(ti, TIF_MAYDAY)) + call_mayday(ti, regs); + } + + hard_local_irq_restore(flags); + + return ret; +} + +static inline bool maybe_oob_syscall(unsigned int nr, struct pt_regs *regs) +{ + /* + * Check whether the companion core might be interested in the + * syscall call. If the old syscall form is handled, pass the + * request to the core if __OOB_SYSCALL_BIT is set in + * @nr. Otherwise, only check whether an oob syscall is folded + * into a prctl() request. + */ + if (IS_ENABLED(CONFIG_DOVETAIL_LEGACY_SYSCALL_RANGE)) { + if (nr & __OOB_SYSCALL_BIT) + return true; + } + + return nr == __NR_prctl && syscall_get_arg0(regs) & __OOB_SYSCALL_BIT; +} + +int pipeline_syscall(unsigned int nr, struct pt_regs *regs) +{ + struct thread_info *ti = current_thread_info(); + unsigned long local_flags = READ_ONCE(ti_local_flags(ti)); + int ret; + + WARN_ON_ONCE(dovetail_debug() && hard_irqs_disabled()); + + /* + * If the syscall signature belongs to the out-of-band syscall + * set and we are running out-of-band, pass the request + * directly to the companion core by calling the oob syscall + * handler. + * + * Otherwise, if this is an out-of-band syscall or alternate + * scheduling is enabled for the caller, propagate the syscall + * through the pipeline stages, so that: + * + * - the core can manipulate the current execution stage for + * handling the request, which includes switching the current + * thread back to the in-band context if the syscall is a + * native one, or promoting it to the oob stage if handling an + * oob syscall requires this. + * + * - the core can receive the initial oob syscall a thread + * might have to emit for enabling dovetailing from the + * in-band stage. + * + * Native syscalls from common (non-dovetailed) threads are + * not subject to pipelining, but flow down to the in-band + * system call handler directly. + * + * Sanity check: we bark on returning from a syscall on a + * stalled in-band stage, which combined with running with + * hard irqs on might cause interrupts to linger in the log + * after exiting to user. + */ + + if ((local_flags & _TLF_OOB) && maybe_oob_syscall(nr, regs)) { + ret = handle_oob_syscall(regs); + if (!IS_ENABLED(CONFIG_DOVETAIL_LEGACY_SYSCALL_RANGE)) + WARN_ON_ONCE(dovetail_debug() && !ret); + local_flags = READ_ONCE(ti_local_flags(ti)); + if (likely(ret)) { + if (local_flags & _TLF_OOB) { + if (test_ti_thread_flag(ti, TIF_MAYDAY)) + dovetail_call_mayday(regs); + return 1; /* don't pass down, no tail work. */ + } else { + WARN_ON_ONCE(dovetail_debug() && irqs_disabled()); + return -1; /* don't pass down, do tail work. */ + } + } + } + + if ((local_flags & _TLF_DOVETAIL) || maybe_oob_syscall(nr, regs)) { + ret = __pipeline_syscall(regs); + local_flags = READ_ONCE(ti_local_flags(ti)); + if (local_flags & _TLF_OOB) + return 1; /* don't pass down, no tail work. */ + if (ret) { + WARN_ON_ONCE(dovetail_debug() && irqs_disabled()); + return -1; /* don't pass down, do tail work. */ + } + } + + return 0; /* pass syscall down to the in-band dispatcher. */ +} + +void __weak handle_oob_trap_entry(unsigned int trapnr, struct pt_regs *regs) +{ +} + +noinstr void __oob_trap_notify(unsigned int exception, + struct pt_regs *regs) +{ + unsigned long flags; + + /* + * We send a notification about exceptions raised over a + * registered oob stage only. The trap_entry handler expects + * hard irqs off on entry. It may demote the current context + * to the in-band stage, may return with hard irqs on. + */ + if (dovetail_enabled) { + set_thread_local_flags(_TLF_OOBTRAP); + flags = hard_local_irq_save(); + instrumentation_begin(); + handle_oob_trap_entry(exception, regs); + instrumentation_end(); + hard_local_irq_restore(flags); + } +} + +void __weak handle_oob_trap_exit(unsigned int trapnr, struct pt_regs *regs) +{ +} + +noinstr void __oob_trap_unwind(unsigned int exception, struct pt_regs *regs) +{ + /* + * The trap_exit handler runs only if trap_entry was called + * for the same trap occurrence. It expects hard irqs off on + * entry, may switch the current context back to the oob + * stage. Must return with hard irqs off. + */ + hard_local_irq_disable(); + clear_thread_local_flags(_TLF_OOBTRAP); + instrumentation_begin(); + handle_oob_trap_exit(exception, regs); + instrumentation_end(); +} + +void __weak handle_inband_event(enum inband_event_type event, void *data) +{ +} + +void inband_event_notify(enum inband_event_type event, void *data) +{ + check_inband_stage(); + + if (dovetail_enabled) + handle_inband_event(event, data); +} + +void __weak resume_oob_task(struct task_struct *p) +{ +} + +static void finalize_oob_transition(void) /* hard IRQs off */ +{ + struct irq_pipeline_data *pd; + struct irq_stage_data *p; + struct task_struct *t; + + pd = raw_cpu_ptr(&irq_pipeline); + t = pd->task_inflight; + if (t == NULL) + return; + + /* + * @t which is in flight to the oob stage might have received + * a signal while waiting in off-stage state to be actually + * scheduled out. We can't act upon that signal safely from + * here, we simply let the task complete the migration process + * to the oob stage. The pending signal will be handled when + * the task eventually exits the out-of-band context by the + * converse migration. + */ + pd->task_inflight = NULL; + + /* + * The transition handler in the companion core assumes the + * oob stage is stalled, fix this up. + */ + stall_oob(); + resume_oob_task(t); + unstall_oob(); + p = this_oob_staged(); + if (stage_irqs_pending(p)) + /* Current stage (in-band) != p->stage (oob). */ + sync_irq_stage(p->stage); +} + +void oob_trampoline(void) +{ + unsigned long flags; + + check_inband_stage(); + flags = hard_local_irq_save(); + finalize_oob_transition(); + hard_local_irq_restore(flags); +} + +bool inband_switch_tail(void) +{ + bool oob; + + check_hard_irqs_disabled(); + + /* + * We may run this code either over the inband or oob + * contexts. If inband, we may have a thread blocked in + * dovetail_leave_inband(), waiting for the companion core to + * schedule it back in over the oob context, in which case + * finalize_oob_transition() should take care of it. If oob, + * the core just switched us back, and we may update the + * context markers before returning to context_switch(). + * + * Since the preemption count does not reflect the active + * stage yet upon inband -> oob transition, we figure out + * which one we are on by testing _TLF_OFFSTAGE. Having this + * bit set when running the inband switch tail code means that + * we are completing such transition for the current task, + * switched in by dovetail_context_switch() over the oob + * stage. If so, update the context markers appropriately. + */ + oob = test_thread_local_flags(_TLF_OFFSTAGE); + if (oob) { + /* + * The companion core assumes a stalled stage on exit + * from dovetail_leave_inband(). + */ + stall_oob(); + set_thread_local_flags(_TLF_OOB); + if (!IS_ENABLED(CONFIG_HAVE_PERCPU_PREEMPT_COUNT)) { + WARN_ON_ONCE(dovetail_debug() && + (preempt_count() & STAGE_MASK)); + preempt_count_add(STAGE_OFFSET); + } + } else { + finalize_oob_transition(); + hard_local_irq_enable(); + } + + return oob; +} + +void __weak inband_clock_was_set(void) +{ +} + +void __weak install_inband_fd(unsigned int fd, struct file *file, + struct files_struct *files) +{ +} + +void __weak uninstall_inband_fd(unsigned int fd, struct file *file, + struct files_struct *files) +{ +} + +void __weak replace_inband_fd(unsigned int fd, struct file *file, + struct files_struct *files) +{ +} + +int dovetail_start(void) +{ + check_inband_stage(); + + if (dovetail_enabled) + return -EBUSY; + + if (!oob_stage_present()) + return -EAGAIN; + + dovetail_enabled = true; + smp_wmb(); + + return 0; +} +EXPORT_SYMBOL_GPL(dovetail_start); + +void dovetail_stop(void) +{ + check_inband_stage(); + + dovetail_enabled = false; + smp_wmb(); +} +EXPORT_SYMBOL_GPL(dovetail_stop); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/entry/common.c linux-dovetail-v5.15.y-dovetail/kernel/entry/common.c --- linux-5.15.26/kernel/entry/common.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/entry/common.c 2022-03-10 09:47:50.000000000 +0100 @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -81,10 +82,45 @@ static long syscall_trace_enter(struct p return ret ? : syscall; } +static __always_inline void +syscall_enter_from_user_enable_irqs(void) +{ + if (running_inband()) { + /* + * If pipelining interrupts, prepare for emulating a + * stall -> unstall transition (we are currently + * unstalled), fixing up the IRQ trace state in order + * to keep lockdep happy (and silent). + */ + stall_inband_nocheck(); + hard_cond_local_irq_enable(); + local_irq_enable(); + } else { + /* + * We are running on the out-of-band stage, don't mess + * with the in-band interrupt state. This is none of + * our business. We may manipulate the hardware state + * only. + */ + hard_local_irq_enable(); + } +} + static __always_inline long __syscall_enter_from_user_work(struct pt_regs *regs, long syscall) { unsigned long work = READ_ONCE(current_thread_info()->syscall_work); + int ret; + + /* + * Pipeline the syscall to the companion core if the current + * task wants this. Compiled out if not dovetailing. + */ + ret = pipeline_syscall(syscall, regs); + if (ret > 0) /* out-of-band, bail out. */ + return EXIT_SYSCALL_OOB; + if (ret < 0) /* in-band, tail work only. */ + return EXIT_SYSCALL_TAIL; if (work & SYSCALL_WORK_ENTER) syscall = syscall_trace_enter(regs, syscall, work); @@ -104,7 +140,7 @@ noinstr long syscall_enter_from_user_mod __enter_from_user_mode(regs); instrumentation_begin(); - local_irq_enable(); + syscall_enter_from_user_enable_irqs(); ret = __syscall_enter_from_user_work(regs, syscall); instrumentation_end(); @@ -115,7 +151,7 @@ noinstr void syscall_enter_from_user_mod { __enter_from_user_mode(regs); instrumentation_begin(); - local_irq_enable(); + syscall_enter_from_user_enable_irqs(); instrumentation_end(); } @@ -130,6 +166,8 @@ static __always_inline void __exit_to_us user_enter_irqoff(); arch_exit_to_user_mode(); lockdep_hardirqs_on(CALLER_ADDR0); + if (running_inband()) + unstall_inband(); } void noinstr exit_to_user_mode(void) @@ -159,6 +197,12 @@ static unsigned long exit_to_user_mode_l local_irq_enable_exit_to_user(ti_work); + /* + * Check that local_irq_enable_exit_to_user() does the + * right thing when pipelining. + */ + WARN_ON_ONCE(irq_pipeline_debug() && hard_irqs_disabled()); + if (ti_work & _TIF_NEED_RESCHED) schedule(); @@ -187,6 +231,7 @@ static unsigned long exit_to_user_mode_l /* Check if any of the above work has queued a deferred wakeup */ tick_nohz_user_enter_prepare(); + WARN_ON_ONCE(irq_pipeline_debug() && !hard_irqs_disabled()); ti_work = READ_ONCE(current_thread_info()->flags); } @@ -194,11 +239,28 @@ static unsigned long exit_to_user_mode_l return ti_work; } +static inline bool do_retuser(unsigned long ti_work) +{ + if (dovetailing() && (ti_work & _TIF_RETUSER)) { + hard_local_irq_enable(); + inband_retuser_notify(); + hard_local_irq_disable(); + /* RETUSER might have switched oob */ + return running_inband(); + } + + return false; +} + static void exit_to_user_mode_prepare(struct pt_regs *regs) { - unsigned long ti_work = READ_ONCE(current_thread_info()->flags); + unsigned long ti_work; + + check_hard_irqs_disabled(); lockdep_assert_irqs_disabled(); +again: + ti_work = READ_ONCE(current_thread_info()->flags); /* Flush pending rcuog wakeup before the last need_resched() check */ tick_nohz_user_enter_prepare(); @@ -208,6 +270,9 @@ static void exit_to_user_mode_prepare(st arch_exit_to_user_mode_prepare(regs, ti_work); + if (do_retuser(ti_work)) + goto again; + /* Ensure that the address limit is intact and no locks are held */ addr_limit_user_check(); kmap_assert_nomap(); @@ -255,6 +320,24 @@ static void syscall_exit_work(struct pt_ arch_syscall_exit_tracehook(regs, step); } +static inline bool syscall_has_exit_work(struct pt_regs *regs, + unsigned long work) +{ + /* + * Dovetail: if this does not look like an in-band syscall, it + * has to belong to the companion core. Typically, + * __OOB_SYSCALL_BIT would be set in this value. Skip the + * work for those syscalls. + */ + if (unlikely(work & SYSCALL_WORK_EXIT)) { + if (!irqs_pipelined()) + return true; + return syscall_get_nr(current, regs) < NR_syscalls; + } + + return false; +} + /* * Syscall specific exit to user mode preparation. Runs with interrupts * enabled. @@ -268,7 +351,7 @@ static void syscall_exit_to_user_mode_pr if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) - local_irq_enable(); + local_irq_enable_full(); } rseq_syscall(regs); @@ -278,7 +361,7 @@ static void syscall_exit_to_user_mode_pr * enabled, we want to run them exactly once per syscall exit with * interrupts enabled. */ - if (unlikely(work & SYSCALL_WORK_EXIT)) + if (syscall_has_exit_work(regs, work)) syscall_exit_work(regs, work); } @@ -304,6 +387,8 @@ __visible noinstr void syscall_exit_to_u noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs) { + WARN_ON_ONCE(irq_pipeline_debug() && irqs_disabled()); + stall_inband_nocheck(); __enter_from_user_mode(regs); } @@ -319,13 +404,37 @@ noinstr irqentry_state_t irqentry_enter( { irqentry_state_t ret = { .exit_rcu = false, +#ifdef CONFIG_IRQ_PIPELINE + .stage_info = IRQENTRY_INBAND_STALLED, +#endif }; +#ifdef CONFIG_IRQ_PIPELINE + if (running_oob()) { + WARN_ON_ONCE(irq_pipeline_debug() && oob_irqs_disabled()); + ret.stage_info = IRQENTRY_OOB; + return ret; + } +#endif + if (user_mode(regs)) { +#ifdef CONFIG_IRQ_PIPELINE + ret.stage_info = IRQENTRY_INBAND_UNSTALLED; +#endif irqentry_enter_from_user_mode(regs); return ret; } +#ifdef CONFIG_IRQ_PIPELINE + /* + * IRQ pipeline: If we trapped from kernel space, the virtual + * state may or may not match the hardware state. Since hard + * irqs are off on entry, we have to stall the in-band stage. + */ + if (!test_and_stall_inband_nocheck()) + ret.stage_info = IRQENTRY_INBAND_UNSTALLED; +#endif + /* * If this entry hit the idle task invoke rcu_irq_enter() whether * RCU is watching or not. @@ -395,14 +504,91 @@ void irqentry_exit_cond_resched(void) DEFINE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched); #endif +#ifdef CONFIG_IRQ_PIPELINE + +static inline +bool irqexit_may_preempt_schedule(irqentry_state_t state, + struct pt_regs *regs) +{ + return state.stage_info == IRQENTRY_INBAND_UNSTALLED; +} + +#else + +static inline +bool irqexit_may_preempt_schedule(irqentry_state_t state, + struct pt_regs *regs) +{ + return !regs_irqs_disabled(regs); +} + +#endif + +#ifdef CONFIG_IRQ_PIPELINE + +static bool irqentry_syncstage(irqentry_state_t state) /* hard irqs off */ +{ + /* + * If pipelining interrupts, enable in-band IRQs then + * synchronize the interrupt log on exit if: + * + * - irqentry_enter() stalled the stage in order to mirror the + * hardware state. + * + * - we where coming from oob, thus went through a stage migration + * that was caused by taking a CPU exception, e.g., a fault. + * + * We run before preempt_schedule_irq() may be called later on + * by preemptible kernels, so that any rescheduling request + * triggered by in-band IRQ handlers is considered. + */ + if (state.stage_info == IRQENTRY_INBAND_UNSTALLED || + state.stage_info == IRQENTRY_OOB) { + unstall_inband_nocheck(); + synchronize_pipeline_on_irq(); + stall_inband_nocheck(); + return true; + } + + return false; +} + +static void irqentry_unstall(void) +{ + unstall_inband_nocheck(); +} + +#else + +static bool irqentry_syncstage(irqentry_state_t state) +{ + return false; +} + +static void irqentry_unstall(void) +{ +} + +#endif + noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) { + bool synchronized = false; + + if (running_oob()) + return; + lockdep_assert_irqs_disabled(); /* Check whether this returns to user mode */ if (user_mode(regs)) { irqentry_exit_to_user_mode(regs); - } else if (!regs_irqs_disabled(regs)) { + return; + } + + synchronized = irqentry_syncstage(state); + + if (irqexit_may_preempt_schedule(state, regs)) { /* * If RCU was not watching on entry this needs to be done * carefully and needs the same ordering of lockdep/tracing @@ -416,7 +602,7 @@ noinstr void irqentry_exit(struct pt_reg instrumentation_end(); rcu_irq_exit(); lockdep_hardirqs_on(CALLER_ADDR0); - return; + goto out; } instrumentation_begin(); @@ -438,6 +624,12 @@ noinstr void irqentry_exit(struct pt_reg if (state.exit_rcu) rcu_irq_exit(); } + +out: + if (synchronized) + irqentry_unstall(); + + return; } irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/exit.c linux-dovetail-v5.15.y-dovetail/kernel/exit.c --- linux-5.15.26/kernel/exit.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/exit.c 2022-03-10 09:47:50.000000000 +0100 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -779,6 +780,7 @@ void __noreturn do_exit(long code) io_uring_files_cancel(); exit_signals(tsk); /* sets PF_EXITING */ + inband_exit_notify(); /* sync mm's RSS info before statistics gathering */ if (tsk->mm) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/fork.c linux-dovetail-v5.15.y-dovetail/kernel/fork.c --- linux-5.15.26/kernel/fork.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/fork.c 2022-03-10 09:47:50.000000000 +0100 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -926,6 +927,7 @@ static struct task_struct *dup_task_stru #endif setup_thread_stack(tsk, orig); + inband_task_init(tsk); clear_user_return_notifier(tsk); clear_tsk_need_resched(tsk); set_task_stack_end_magic(tsk); @@ -1064,6 +1066,9 @@ static struct mm_struct *mm_init(struct #endif mm_init_uprobes_state(mm); hugetlb_count_init(mm); +#ifdef CONFIG_DOVETAIL + memset(&mm->oob_state, 0, sizeof(mm->oob_state)); +#endif if (current->mm) { mm->flags = current->mm->flags & MMF_INIT_MASK; @@ -1112,6 +1117,7 @@ static inline void __mmput(struct mm_str exit_aio(mm); ksm_exit(mm); khugepaged_exit(mm); /* must run before exit_mmap */ + inband_cleanup_notify(mm); /* ditto. */ exit_mmap(mm); mm_put_huge_zero_page(mm); set_mm_exe_file(mm, NULL); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/chip.c linux-dovetail-v5.15.y-dovetail/kernel/irq/chip.c --- linux-5.15.26/kernel/irq/chip.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/chip.c 2022-03-10 09:47:50.000000000 +0100 @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -48,6 +49,10 @@ int irq_set_chip(unsigned int irq, struc if (!chip) chip = &no_irq_chip; + else + WARN_ONCE(irqs_pipelined() && + (chip->flags & IRQCHIP_PIPELINE_SAFE) == 0, + "irqchip %s is not pipeline-safe!", chip->name); desc->irq_data.chip = chip; irq_put_desc_unlock(desc, flags); @@ -385,7 +390,8 @@ static void __irq_disable(struct irq_des */ void irq_disable(struct irq_desc *desc) { - __irq_disable(desc, irq_settings_disable_unlazy(desc)); + __irq_disable(desc, + irq_settings_disable_unlazy(desc) || irqs_pipelined()); } void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu) @@ -517,8 +523,22 @@ static bool irq_may_run(struct irq_desc * If the interrupt is an armed wakeup source, mark it pending * and suspended, disable it and notify the pm core about the * event. + * + * When pipelining, the logic is as follows: + * + * - from a pipeline entry context, we might have preempted + * the oob stage, or irqs might be [virtually] off, so we may + * not run the in-band PM code. Just make sure any wakeup + * interrupt is detected later on when the flow handler + * re-runs from the in-band stage. + * + * - from the in-band context, run the PM wakeup check. */ - if (irq_pm_check_wakeup(desc)) + if (irqs_pipelined()) { + WARN_ON_ONCE(irq_pipeline_debug() && !in_pipeline()); + if (irqd_is_wakeup_armed(&desc->irq_data)) + return true; + } else if (irq_pm_check_wakeup(desc)) return false; /* @@ -542,8 +562,13 @@ void handle_simple_irq(struct irq_desc * { raw_spin_lock(&desc->lock); - if (!irq_may_run(desc)) + if (start_irq_flow() && !irq_may_run(desc)) + goto out_unlock; + + if (on_pipeline_entry()) { + handle_oob_irq(desc); goto out_unlock; + } desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); @@ -579,9 +604,14 @@ void handle_untracked_irq(struct irq_des raw_spin_lock(&desc->lock); - if (!irq_may_run(desc)) + if (start_irq_flow() && !irq_may_run(desc)) goto out_unlock; + if (on_pipeline_entry()) { + handle_oob_irq(desc); + goto out_unlock; + } + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { @@ -603,6 +633,20 @@ out_unlock: } EXPORT_SYMBOL_GPL(handle_untracked_irq); +static inline void cond_eoi_irq(struct irq_desc *desc) +{ + struct irq_chip *chip = desc->irq_data.chip; + + if (!(chip->flags & IRQCHIP_EOI_THREADED)) + chip->irq_eoi(&desc->irq_data); +} + +static inline void mask_cond_eoi_irq(struct irq_desc *desc) +{ + mask_irq(desc); + cond_eoi_irq(desc); +} + /* * Called unconditionally from handle_level_irq() and only for oneshot * interrupts from handle_fasteoi_irq() @@ -633,10 +677,19 @@ static void cond_unmask_irq(struct irq_d void handle_level_irq(struct irq_desc *desc) { raw_spin_lock(&desc->lock); - mask_ack_irq(desc); - if (!irq_may_run(desc)) + if (start_irq_flow()) { + mask_ack_irq(desc); + + if (!irq_may_run(desc)) + goto out_unlock; + } + + if (on_pipeline_entry()) { + if (handle_oob_irq(desc)) + goto out_unmask; goto out_unlock; + } desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); @@ -651,7 +704,7 @@ void handle_level_irq(struct irq_desc *d kstat_incr_irqs_this_cpu(desc); handle_irq_event(desc); - +out_unmask: cond_unmask_irq(desc); out_unlock: @@ -662,7 +715,10 @@ EXPORT_SYMBOL_GPL(handle_level_irq); static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) { if (!(desc->istate & IRQS_ONESHOT)) { - chip->irq_eoi(&desc->irq_data); + if (!irqs_pipelined()) + chip->irq_eoi(&desc->irq_data); + else if (!irqd_irq_disabled(&desc->irq_data)) + unmask_irq(desc); return; } /* @@ -673,9 +729,11 @@ static void cond_unmask_eoi_irq(struct i */ if (!irqd_irq_disabled(&desc->irq_data) && irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot) { - chip->irq_eoi(&desc->irq_data); + if (!irqs_pipelined()) + chip->irq_eoi(&desc->irq_data); unmask_irq(desc); - } else if (!(chip->flags & IRQCHIP_EOI_THREADED)) { + } else if (!irqs_pipelined() && + !(chip->flags & IRQCHIP_EOI_THREADED)) { chip->irq_eoi(&desc->irq_data); } } @@ -695,9 +753,17 @@ void handle_fasteoi_irq(struct irq_desc raw_spin_lock(&desc->lock); - if (!irq_may_run(desc)) + if (start_irq_flow() && !irq_may_run(desc)) goto out; + if (on_pipeline_entry()) { + if (handle_oob_irq(desc)) + chip->irq_eoi(&desc->irq_data); + else + mask_cond_eoi_irq(desc); + goto out_unlock; + } + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); /* @@ -711,13 +777,13 @@ void handle_fasteoi_irq(struct irq_desc } kstat_incr_irqs_this_cpu(desc); - if (desc->istate & IRQS_ONESHOT) + if (!irqs_pipelined() && (desc->istate & IRQS_ONESHOT)) mask_irq(desc); handle_irq_event(desc); cond_unmask_eoi_irq(desc, chip); - +out_unlock: raw_spin_unlock(&desc->lock); return; out: @@ -777,30 +843,42 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_nmi); */ void handle_edge_irq(struct irq_desc *desc) { + struct irq_chip *chip = irq_desc_get_chip(desc); + raw_spin_lock(&desc->lock); - desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + if (start_irq_flow()) { + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); - if (!irq_may_run(desc)) { - desc->istate |= IRQS_PENDING; - mask_ack_irq(desc); - goto out_unlock; + if (!irq_may_run(desc)) { + desc->istate |= IRQS_PENDING; + mask_ack_irq(desc); + goto out_unlock; + } + + /* + * If its disabled or no action available then mask it + * and get out of here. + */ + if (irqd_irq_disabled(&desc->irq_data) || !desc->action) { + desc->istate |= IRQS_PENDING; + mask_ack_irq(desc); + goto out_unlock; + } } - /* - * If its disabled or no action available then mask it and get - * out of here. - */ - if (irqd_irq_disabled(&desc->irq_data) || !desc->action) { - desc->istate |= IRQS_PENDING; - mask_ack_irq(desc); + if (on_pipeline_entry()) { + chip->irq_ack(&desc->irq_data); + desc->istate |= IRQS_EDGE; + handle_oob_irq(desc); goto out_unlock; } kstat_incr_irqs_this_cpu(desc); /* Start handling the irq */ - desc->irq_data.chip->irq_ack(&desc->irq_data); + if (!irqs_pipelined()) + chip->irq_ack(&desc->irq_data); do { if (unlikely(!desc->action)) { @@ -825,6 +903,8 @@ void handle_edge_irq(struct irq_desc *de !irqd_irq_disabled(&desc->irq_data)); out_unlock: + if (on_pipeline_entry()) + desc->istate &= ~IRQS_EDGE; raw_spin_unlock(&desc->lock); } EXPORT_SYMBOL(handle_edge_irq); @@ -843,11 +923,20 @@ void handle_edge_eoi_irq(struct irq_desc raw_spin_lock(&desc->lock); - desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + if (start_irq_flow()) { + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); - if (!irq_may_run(desc)) { - desc->istate |= IRQS_PENDING; - goto out_eoi; + if (!irq_may_run(desc)) { + desc->istate |= IRQS_PENDING; + goto out_eoi; + } + } + + if (on_pipeline_entry()) { + desc->istate |= IRQS_EDGE; + if (handle_oob_irq(desc)) + goto out_eoi; + goto out; } /* @@ -872,6 +961,9 @@ void handle_edge_eoi_irq(struct irq_desc out_eoi: chip->irq_eoi(&desc->irq_data); +out: + if (on_pipeline_entry()) + desc->istate &= ~IRQS_EDGE; raw_spin_unlock(&desc->lock); } #endif @@ -885,6 +977,18 @@ out_eoi: void handle_percpu_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); + bool handled; + + if (on_pipeline_entry()) { + if (chip->irq_ack) + chip->irq_ack(&desc->irq_data); + handled = handle_oob_irq(desc); + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); + if (!handled && chip->irq_mask) + chip->irq_mask(&desc->irq_data); + return; + } /* * PER CPU interrupts are not serialized. Do not touch @@ -892,13 +996,17 @@ void handle_percpu_irq(struct irq_desc * */ __kstat_incr_irqs_this_cpu(desc); - if (chip->irq_ack) - chip->irq_ack(&desc->irq_data); - - handle_irq_event_percpu(desc); - - if (chip->irq_eoi) - chip->irq_eoi(&desc->irq_data); + if (irqs_pipelined()) { + handle_irq_event_percpu(desc); + if (chip->irq_unmask) + chip->irq_unmask(&desc->irq_data); + } else { + if (chip->irq_ack) + chip->irq_ack(&desc->irq_data); + handle_irq_event_percpu(desc); + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); + } } /** @@ -918,6 +1026,18 @@ void handle_percpu_devid_irq(struct irq_ struct irqaction *action = desc->action; unsigned int irq = irq_desc_get_irq(desc); irqreturn_t res; + bool handled; + + if (on_pipeline_entry()) { + if (chip->irq_ack) + chip->irq_ack(&desc->irq_data); + handled = handle_oob_irq(desc); + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); + if (!handled && chip->irq_mask) + chip->irq_mask(&desc->irq_data); + return; + } /* * PER CPU interrupts are not serialized. Do not touch @@ -925,7 +1045,7 @@ void handle_percpu_devid_irq(struct irq_ */ __kstat_incr_irqs_this_cpu(desc); - if (chip->irq_ack) + if (!irqs_pipelined() && chip->irq_ack) chip->irq_ack(&desc->irq_data); if (likely(action)) { @@ -943,8 +1063,11 @@ void handle_percpu_devid_irq(struct irq_ enabled ? " and unmasked" : "", irq, cpu); } - if (chip->irq_eoi) - chip->irq_eoi(&desc->irq_data); + if (irqs_pipelined()) { + if (chip->irq_unmask) + chip->irq_unmask(&desc->irq_data); + } else if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); } /** @@ -1034,6 +1157,7 @@ __irq_do_set_handler(struct irq_desc *de desc->handle_irq = handle; } + irq_settings_set_chained(desc); irq_settings_set_noprobe(desc); irq_settings_set_norequest(desc); irq_settings_set_nothread(desc); @@ -1201,9 +1325,18 @@ void handle_fasteoi_ack_irq(struct irq_d raw_spin_lock(&desc->lock); - if (!irq_may_run(desc)) + if (start_irq_flow() && !irq_may_run(desc)) goto out; + if (on_pipeline_entry()) { + chip->irq_ack(&desc->irq_data); + if (handle_oob_irq(desc)) + chip->irq_eoi(&desc->irq_data); + else + mask_cond_eoi_irq(desc); + goto out_unlock; + } + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); /* @@ -1217,11 +1350,13 @@ void handle_fasteoi_ack_irq(struct irq_d } kstat_incr_irqs_this_cpu(desc); - if (desc->istate & IRQS_ONESHOT) - mask_irq(desc); + if (!irqs_pipelined()) { + if (desc->istate & IRQS_ONESHOT) + mask_irq(desc); - /* Start handling the irq */ - desc->irq_data.chip->irq_ack(&desc->irq_data); + /* Start handling the irq */ + chip->irq_ack(&desc->irq_data); + } handle_irq_event(desc); @@ -1232,6 +1367,7 @@ void handle_fasteoi_ack_irq(struct irq_d out: if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED)) chip->irq_eoi(&desc->irq_data); +out_unlock: raw_spin_unlock(&desc->lock); } EXPORT_SYMBOL_GPL(handle_fasteoi_ack_irq); @@ -1251,10 +1387,21 @@ void handle_fasteoi_mask_irq(struct irq_ struct irq_chip *chip = desc->irq_data.chip; raw_spin_lock(&desc->lock); - mask_ack_irq(desc); - if (!irq_may_run(desc)) - goto out; + if (start_irq_flow()) { + mask_ack_irq(desc); + + if (!irq_may_run(desc)) + goto out; + } + + if (on_pipeline_entry()) { + if (handle_oob_irq(desc)) + chip->irq_eoi(&desc->irq_data); + else + cond_eoi_irq(desc); + goto out_unlock; + } desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); @@ -1269,7 +1416,7 @@ void handle_fasteoi_mask_irq(struct irq_ } kstat_incr_irqs_this_cpu(desc); - if (desc->istate & IRQS_ONESHOT) + if (!irqs_pipelined() && (desc->istate & IRQS_ONESHOT)) mask_irq(desc); handle_irq_event(desc); @@ -1281,6 +1428,7 @@ void handle_fasteoi_mask_irq(struct irq_ out: if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED)) chip->irq_eoi(&desc->irq_data); +out_unlock: raw_spin_unlock(&desc->lock); } EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/cpuhotplug.c linux-dovetail-v5.15.y-dovetail/kernel/irq/cpuhotplug.c --- linux-5.15.26/kernel/irq/cpuhotplug.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/cpuhotplug.c 2022-03-10 09:47:50.000000000 +0100 @@ -156,6 +156,9 @@ void irq_migrate_all_off_this_cpu(void) { struct irq_desc *desc; unsigned int irq; + unsigned long flags; + + flags = hard_local_irq_save(); for_each_active_irq(irq) { bool affinity_broken; @@ -170,6 +173,8 @@ void irq_migrate_all_off_this_cpu(void) irq, smp_processor_id()); } } + + hard_local_irq_restore(flags); } static bool hk_should_isolate(struct irq_data *data, unsigned int cpu) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/debug.h linux-dovetail-v5.15.y-dovetail/kernel/irq/debug.h --- linux-5.15.26/kernel/irq/debug.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/debug.h 2022-03-10 09:47:50.000000000 +0100 @@ -33,6 +33,8 @@ static inline void print_irq_desc(unsign ___P(IRQ_NOREQUEST); ___P(IRQ_NOTHREAD); ___P(IRQ_NOAUTOEN); + ___P(IRQ_OOB); + ___P(IRQ_CHAINED); ___PS(IRQS_AUTODETECT); ___PS(IRQS_REPLAY); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/dummychip.c linux-dovetail-v5.15.y-dovetail/kernel/irq/dummychip.c --- linux-5.15.26/kernel/irq/dummychip.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/dummychip.c 2022-03-10 09:47:50.000000000 +0100 @@ -43,7 +43,7 @@ struct irq_chip no_irq_chip = { .irq_enable = noop, .irq_disable = noop, .irq_ack = ack_bad, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, }; /* @@ -59,6 +59,6 @@ struct irq_chip dummy_irq_chip = { .irq_ack = noop, .irq_mask = noop, .irq_unmask = noop, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, }; EXPORT_SYMBOL_GPL(dummy_irq_chip); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/generic-chip.c linux-dovetail-v5.15.y-dovetail/kernel/irq/generic-chip.c --- linux-5.15.26/kernel/irq/generic-chip.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/generic-chip.c 2022-03-10 09:47:50.000000000 +0100 @@ -16,7 +16,7 @@ #include "internals.h" static LIST_HEAD(gc_list); -static DEFINE_RAW_SPINLOCK(gc_lock); +static DEFINE_HARD_SPINLOCK(gc_lock); /** * irq_gc_noop - NOOP function diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/handle.c linux-dovetail-v5.15.y-dovetail/kernel/irq/handle.c --- linux-5.15.26/kernel/irq/handle.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/handle.c 2022-03-10 09:47:50.000000000 +0100 @@ -32,9 +32,16 @@ void handle_bad_irq(struct irq_desc *des { unsigned int irq = irq_desc_get_irq(desc); + /* Let the in-band stage report the issue. */ + if (on_pipeline_entry()) { + ack_bad_irq(irq); + return; + } + print_irq_desc(irq, desc); kstat_incr_irqs_this_cpu(desc); - ack_bad_irq(irq); + if (!irqs_pipelined()) + ack_bad_irq(irq); } EXPORT_SYMBOL_GPL(handle_bad_irq); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/internals.h linux-dovetail-v5.15.y-dovetail/kernel/irq/internals.h --- linux-5.15.26/kernel/irq/internals.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/internals.h 2022-03-10 09:47:50.000000000 +0100 @@ -50,6 +50,7 @@ enum { * IRQS_PENDING - irq is pending and replayed later * IRQS_SUSPENDED - irq is suspended * IRQS_NMI - irq line is used to deliver NMIs + * IRQS_EDGE - irq line received an edge event */ enum { IRQS_AUTODETECT = 0x00000001, @@ -62,6 +63,7 @@ enum { IRQS_SUSPENDED = 0x00000800, IRQS_TIMINGS = 0x00001000, IRQS_NMI = 0x00002000, + IRQS_EDGE = 0x00004000, }; #include "debug.h" diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/irqdesc.c linux-dovetail-v5.15.y-dovetail/kernel/irq/irqdesc.c --- linux-5.15.26/kernel/irq/irqdesc.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/irqdesc.c 2022-03-10 09:47:50.000000000 +0100 @@ -16,6 +16,7 @@ #include #include #include +#include #include "internals.h" @@ -454,6 +455,7 @@ static void free_desc(unsigned int irq) * irq_sysfs_init() as well. */ irq_sysfs_del(desc); + uncache_irq_desc(irq); delete_irq_desc(irq); /* @@ -640,7 +642,7 @@ int handle_irq_desc(struct irq_desc *des return -EINVAL; data = irq_desc_get_irq_data(desc); - if (WARN_ON_ONCE(!in_irq() && handle_enforce_irqctx(data))) + if (WARN_ON_ONCE(!in_hard_irq() && handle_enforce_irqctx(data))) return -EPERM; generic_handle_irq_desc(desc); @@ -649,9 +651,12 @@ int handle_irq_desc(struct irq_desc *des EXPORT_SYMBOL_GPL(handle_irq_desc); /** - * generic_handle_irq - Invoke the handler for a particular irq + * generic_handle_irq - Handle a particular irq * @irq: The irq number to handle * + * The handler is invoked, unless we are entering the interrupt + * pipeline, in which case the incoming IRQ is only scheduled for + * deferred delivery. */ int generic_handle_irq(unsigned int irq) { @@ -693,6 +698,16 @@ int handle_domain_irq(struct irq_domain struct irq_desc *desc; int ret = 0; + if (irqs_pipelined()) { + desc = irq_resolve_mapping(domain, hwirq); + if (likely(desc)) + generic_pipeline_irq_desc(desc, regs); + else + ret = -EINVAL; + set_irq_regs(old_regs); + return ret; + } + irq_enter(); /* The irqdomain code provides boundary checks */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/irqptorture.c linux-dovetail-v5.15.y-dovetail/kernel/irq/irqptorture.c --- linux-5.15.26/kernel/irq/irqptorture.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/irqptorture.c 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,255 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2017 Philippe Gerum . + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "settings.h" + +static void torture_event_handler(struct clock_event_device *dev) +{ + /* + * We are running on the oob stage, in NMI-like mode. Schedule + * a tick on the proxy device to satisfy the corresponding + * timing request asap. + */ + tick_notify_proxy(); +} + +static void setup_proxy(struct clock_proxy_device *dev) +{ + dev->handle_oob_event = torture_event_handler; +} + +static int start_tick_takeover_test(void) +{ + return tick_install_proxy(setup_proxy, cpu_online_mask); +} + +static void stop_tick_takeover_test(void) +{ + tick_uninstall_proxy(cpu_online_mask); +} + +struct stop_machine_p_data { + int origin_cpu; + cpumask_var_t disable_mask; +}; + +static int stop_machine_handler(void *arg) +{ + struct stop_machine_p_data *p = arg; + int cpu = raw_smp_processor_id(); + + /* + * The stop_machine() handler must run with hard + * IRQs off, note the current state in the result mask. + */ + if (hard_irqs_disabled()) + cpumask_set_cpu(cpu, p->disable_mask); + + if (cpu != p->origin_cpu) + pr_alert("irq_pipeline" TORTURE_FLAG + " CPU%d responds to stop_machine()\n", cpu); + return 0; +} + +/* + * We test stop_machine() as a way to validate IPI handling in a + * pipelined interrupt context. + */ +static int test_stop_machine(void) +{ + struct stop_machine_p_data d; + cpumask_var_t tmp_mask; + int ret = -ENOMEM, cpu; + + if (!zalloc_cpumask_var(&d.disable_mask, GFP_KERNEL)) { + WARN_ON(1); + return ret; + } + + if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL)) { + WARN_ON(1); + goto fail; + } + + ret = -EINVAL; + d.origin_cpu = raw_smp_processor_id(); + pr_alert("irq_pipeline" TORTURE_FLAG + " CPU%d initiates stop_machine()\n", + d.origin_cpu); + + ret = stop_machine(stop_machine_handler, &d, cpu_online_mask); + WARN_ON(ret); + if (ret) + goto fail; + + /* + * Check whether all handlers did run with hard IRQs off. If + * some of them did not, then we have a problem with the stop + * IRQ delivery. + */ + cpumask_xor(tmp_mask, cpu_online_mask, d.disable_mask); + if (!cpumask_empty(tmp_mask)) { + for_each_cpu(cpu, tmp_mask) + pr_alert("irq_pipeline" TORTURE_FLAG + " CPU%d: hard IRQs ON in stop_machine()" + " handler!\n", cpu); + } + + free_cpumask_var(tmp_mask); +fail: + free_cpumask_var(d.disable_mask); + + return ret; +} + +static struct irq_work_tester { + struct irq_work work; + struct completion done; +} irq_work_tester; + +static void irq_work_handler(struct irq_work *work) +{ + int cpu = raw_smp_processor_id(); + + if (!running_inband()) { + pr_alert("irq_pipeline" TORTURE_FLAG + " CPU%d: irq_work handler not running on" + " in-band stage?!\n", cpu); + return; + } + + if (work != &irq_work_tester.work) + pr_alert("irq_pipeline" TORTURE_FLAG + " CPU%d: irq_work handler received broken" + " arg?!\n", cpu); + else { + pr_alert("irq_pipeline" TORTURE_FLAG + " CPU%d: irq_work handled\n", cpu); + complete(&irq_work_tester.done); + } +} + +static int trigger_oob_work(void *arg) +{ + int cpu = raw_smp_processor_id(); + + if (!running_oob()) { + pr_alert("irq_pipeline" TORTURE_FLAG + " CPU%d: escalated request not running on" + " oob stage?!\n", cpu); + return -EINVAL; + } + + if ((struct irq_work_tester *)arg != &irq_work_tester) { + pr_alert("irq_pipeline" TORTURE_FLAG + " CPU%d: escalation handler received broken" + " arg?!\n", cpu); + return -EINVAL; + } + + irq_work_queue(&irq_work_tester.work); + pr_alert("irq_pipeline" TORTURE_FLAG + " CPU%d: stage escalation request works\n", + cpu); + + return 0; +} + +static int test_interstage_work_injection(void) +{ + struct irq_work_tester *p = &irq_work_tester; + int ret, cpu = raw_smp_processor_id(); + unsigned long rem; + + init_completion(&p->done); + init_irq_work(&p->work, irq_work_handler); + + /* Trigger over the in-band stage. */ + irq_work_queue(&p->work); + rem = wait_for_completion_timeout(&p->done, HZ / 10); + if (!rem) { + pr_alert("irq_pipeline" TORTURE_FLAG + " CPU%d: irq_work trigger from in-band stage not handled!\n", + cpu); + return -EINVAL; + } + + pr_alert("irq_pipeline" TORTURE_FLAG + " CPU%d: in-band->in-band irq_work trigger works\n", cpu); + + reinit_completion(&p->done); + + /* Now try over the oob stage. */ + ret = run_oob_call(trigger_oob_work, p); + if (ret) + return ret; + + ret = wait_for_completion_timeout(&p->done, HZ / 10); + if (!rem) { + pr_alert("irq_pipeline" TORTURE_FLAG + " CPU%d: irq_work trigger from oob" + " stage not handled!\n", cpu); + return -EINVAL; + } + + pr_alert("irq_pipeline" TORTURE_FLAG + " CPU%d: oob->in-band irq_work trigger works\n", + cpu); + + return 0; +} + +static int __init irqp_torture_init(void) +{ + int ret; + + pr_info("Starting IRQ pipeline tests..."); + + ret = enable_oob_stage("torture"); + if (ret) { + if (ret == -EBUSY) + pr_alert("irq_pipeline" TORTURE_FLAG + " won't run, oob stage '%s' is already installed", + oob_stage.name); + + return ret; + } + + ret = test_stop_machine(); + if (ret) + goto out; + + ret = start_tick_takeover_test(); + if (ret) + goto out; + + ret = test_interstage_work_injection(); + if (!ret) + msleep(1000); + + stop_tick_takeover_test(); +out: + disable_oob_stage(); + pr_info("IRQ pipeline tests %s.", ret ? "FAILED" : "OK"); + + return 0; +} +late_initcall(irqp_torture_init); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/Kconfig linux-dovetail-v5.15.y-dovetail/kernel/irq/Kconfig --- linux-5.15.26/kernel/irq/Kconfig 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/Kconfig 2022-03-10 09:47:50.000000000 +0100 @@ -138,6 +138,20 @@ config GENERIC_IRQ_DEBUGFS If you don't know what to do here, say N. +# Interrupt pipeline +config HAVE_IRQ_PIPELINE + bool + +config IRQ_PIPELINE + bool "Interrupt pipeline" + depends on HAVE_IRQ_PIPELINE + select IRQ_DOMAIN + select IRQ_DOMAIN_NOMAP + default n + help + Activate this option if you want the interrupt pipeline to be + compiled in. + endmenu config GENERIC_IRQ_MULTI_HANDLER diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/Makefile linux-dovetail-v5.15.y-dovetail/kernel/irq/Makefile --- linux-5.15.26/kernel/irq/Makefile 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/Makefile 2022-03-10 09:47:50.000000000 +0100 @@ -9,6 +9,8 @@ obj-$(CONFIG_GENERIC_IRQ_CHIP) += generi obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o obj-$(CONFIG_IRQ_SIM) += irq_sim.o +obj-$(CONFIG_IRQ_PIPELINE) += pipeline.o +obj-$(CONFIG_IRQ_PIPELINE_TORTURE_TEST) += irqptorture.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/manage.c linux-dovetail-v5.15.y-dovetail/kernel/irq/manage.c --- linux-5.15.26/kernel/irq/manage.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/manage.c 2022-03-10 09:47:50.000000000 +0100 @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -913,6 +914,50 @@ out_unlock: } EXPORT_SYMBOL(irq_set_irq_wake); +#ifdef CONFIG_IRQ_PIPELINE + +/** + * irq_switch_oob - Control out-of-band setting for a registered IRQ descriptor + * @irq: interrupt to control + * @on: enable/disable pipelining + * + * Enable/disable out-of-band handling for an IRQ. At least one + * action must have been previously registered for such + * interrupt. + * + * The previously registered action(s) need(s) not bearing the + * IRQF_OOB flag for the IRQ to be switched to out-of-band + * handling. This call enables switching pre-installed IRQs from + * in-band to out-of-band handling. + * + * NOTE: This routine affects all action handlers sharing the + * IRQ. + */ +int irq_switch_oob(unsigned int irq, bool on) +{ + struct irq_desc *desc; + unsigned long flags; + int ret = 0; + + desc = irq_get_desc_lock(irq, &flags, 0); + if (!desc) + return -EINVAL; + + if (!desc->action) + ret = -EINVAL; + else if (on) + irq_settings_set_oob(desc); + else + irq_settings_clr_oob(desc); + + irq_put_desc_unlock(desc, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(irq_switch_oob); + +#endif /* CONFIG_IRQ_PIPELINE */ + /* * Internal function that tells the architecture code whether a * particular irq has been exclusively allocated or is available @@ -929,7 +974,8 @@ int can_request_irq(unsigned int irq, un if (irq_settings_can_request(desc)) { if (!desc->action || - irqflags & desc->action->flags & IRQF_SHARED) + ((irqflags & desc->action->flags & IRQF_SHARED) && + !((irqflags ^ desc->action->flags) & IRQF_OOB))) canrequest = 1; } irq_put_desc_unlock(desc, flags); @@ -1476,6 +1522,21 @@ __setup_irq(unsigned int irq, struct irq new->irq = irq; + ret = -EINVAL; + /* + * Out-of-band interrupts can be shared but not threaded. We + * silently ignore the OOB setting if interrupt pipelining is + * disabled. + */ + if (!irqs_pipelined()) + new->flags &= ~IRQF_OOB; + else if (new->flags & IRQF_OOB) { + if (new->thread_fn) + goto out_mput; + new->flags |= IRQF_NO_THREAD; + new->flags &= ~IRQF_ONESHOT; + } + /* * If the trigger type is not specified by the caller, * then use the default for this interrupt. @@ -1489,10 +1550,8 @@ __setup_irq(unsigned int irq, struct irq */ nested = irq_settings_is_nested_thread(desc); if (nested) { - if (!new->thread_fn) { - ret = -EINVAL; + if (!new->thread_fn) goto out_mput; - } /* * Replace the primary handler which was provided from * the driver for non nested interrupt handling by the @@ -1576,7 +1635,7 @@ __setup_irq(unsigned int irq, struct irq * the same type (level, edge, polarity). So both flag * fields must have IRQF_SHARED set and the bits which * set the trigger type must match. Also all must - * agree on ONESHOT. + * agree on ONESHOT and OOB. * Interrupt lines used for NMIs cannot be shared. */ unsigned int oldtype; @@ -1601,7 +1660,7 @@ __setup_irq(unsigned int irq, struct irq if (!((old->flags & new->flags) & IRQF_SHARED) || (oldtype != (new->flags & IRQF_TRIGGER_MASK)) || - ((old->flags ^ new->flags) & IRQF_ONESHOT)) + ((old->flags ^ new->flags) & (IRQF_OOB|IRQF_ONESHOT))) goto mismatch; /* All handlers must agree on per-cpuness */ @@ -1726,6 +1785,9 @@ __setup_irq(unsigned int irq, struct irq if (new->flags & IRQF_ONESHOT) desc->istate |= IRQS_ONESHOT; + if (new->flags & IRQF_OOB) + irq_settings_set_oob(desc); + /* Exclude IRQ from balancing if requested */ if (new->flags & IRQF_NOBALANCING) { irq_settings_set_no_balancing(desc); @@ -1880,6 +1942,8 @@ static struct irqaction *__free_irq(stru irq_settings_clr_disable_unlazy(desc); /* Only shutdown. Deactivate after synchronize_hardirq() */ irq_shutdown(desc); + /* Turn off OOB handling (after shutdown). */ + irq_settings_clr_oob(desc); } #ifdef CONFIG_SMP @@ -1916,14 +1980,15 @@ static struct irqaction *__free_irq(stru #ifdef CONFIG_DEBUG_SHIRQ /* - * It's a shared IRQ -- the driver ought to be prepared for an IRQ - * event to happen even now it's being freed, so let's make sure that - * is so by doing an extra call to the handler .... + * It's a shared IRQ (with in-band handler) -- the driver + * ought to be prepared for an IRQ event to happen even now + * it's being freed, so let's make sure that is so by doing an + * extra call to the handler .... * * ( We do this after actually deregistering it, to make sure that a * 'real' IRQ doesn't run in parallel with our fake. ) */ - if (action->flags & IRQF_SHARED) { + if ((action->flags & (IRQF_SHARED|IRQF_OOB)) == IRQF_SHARED) { local_irq_save(flags); action->handler(irq, dev_id); local_irq_restore(flags); @@ -2550,7 +2615,7 @@ int setup_percpu_irq(unsigned int irq, s * __request_percpu_irq - allocate a percpu interrupt line * @irq: Interrupt line to allocate * @handler: Function to be called when the IRQ occurs. - * @flags: Interrupt type flags (IRQF_TIMER only) + * @flags: Interrupt type flags (IRQF_TIMER and/or IRQF_OOB only) * @devname: An ascii name for the claiming device * @dev_id: A percpu cookie passed back to the handler function * @@ -2579,7 +2644,7 @@ int __request_percpu_irq(unsigned int ir !irq_settings_is_per_cpu_devid(desc)) return -EINVAL; - if (flags && flags != IRQF_TIMER) + if (flags & ~(IRQF_TIMER|IRQF_OOB)) return -EINVAL; action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/msi.c linux-dovetail-v5.15.y-dovetail/kernel/irq/msi.c --- linux-5.15.26/kernel/irq/msi.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/msi.c 2022-03-10 09:47:50.000000000 +0100 @@ -410,6 +410,9 @@ static void msi_domain_update_chip_ops(s struct irq_chip *chip = info->chip; BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask); + WARN_ONCE(IS_ENABLED(CONFIG_IRQ_PIPELINE) && + (chip->flags & IRQCHIP_PIPELINE_SAFE) == 0, + "MSI domain irqchip %s is not pipeline-safe!", chip->name); if (!chip->irq_set_affinity) chip->irq_set_affinity = msi_domain_set_affinity; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/pipeline.c linux-dovetail-v5.15.y-dovetail/kernel/irq/pipeline.c --- linux-5.15.26/kernel/irq/pipeline.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/pipeline.c 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,1754 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2016 Philippe Gerum . + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internals.h" + +#ifdef CONFIG_DEBUG_IRQ_PIPELINE +#define trace_on_debug +#else +#define trace_on_debug notrace +#endif + +struct irq_stage inband_stage = { + .name = "Linux", +}; +EXPORT_SYMBOL_GPL(inband_stage); + +struct irq_stage oob_stage; +EXPORT_SYMBOL_GPL(oob_stage); + +struct irq_domain *synthetic_irq_domain; +EXPORT_SYMBOL_GPL(synthetic_irq_domain); + +bool irq_pipeline_oopsing; +EXPORT_SYMBOL_GPL(irq_pipeline_oopsing); + +bool irq_pipeline_active; +EXPORT_SYMBOL_GPL(irq_pipeline_active); + +#define IRQ_L1_MAPSZ BITS_PER_LONG +#define IRQ_L2_MAPSZ (BITS_PER_LONG * BITS_PER_LONG) +#define IRQ_FLAT_MAPSZ DIV_ROUND_UP(IRQ_BITMAP_BITS, BITS_PER_LONG) + +#if IRQ_FLAT_MAPSZ > IRQ_L2_MAPSZ +#define __IRQ_STAGE_MAP_LEVELS 4 /* up to 4/16M vectors */ +#elif IRQ_FLAT_MAPSZ > IRQ_L1_MAPSZ +#define __IRQ_STAGE_MAP_LEVELS 3 /* up to 64/256M vectors */ +#else +#define __IRQ_STAGE_MAP_LEVELS 2 /* up to 1024/4096 vectors */ +#endif + +struct irq_event_map { +#if __IRQ_STAGE_MAP_LEVELS >= 3 + unsigned long index_1[IRQ_L1_MAPSZ]; +#if __IRQ_STAGE_MAP_LEVELS >= 4 + unsigned long index_2[IRQ_L2_MAPSZ]; +#endif +#endif + unsigned long flat[IRQ_FLAT_MAPSZ]; +}; + +#ifdef CONFIG_SMP + +static struct irq_event_map bootup_irq_map __initdata; + +static DEFINE_PER_CPU(struct irq_event_map, irq_map_array[2]); + +DEFINE_PER_CPU(struct irq_pipeline_data, irq_pipeline) = { + .stages = { + [0] = { + .log = { + .map = &bootup_irq_map, + }, + .stage = &inband_stage, + }, + }, +}; + +#else /* !CONFIG_SMP */ + +static struct irq_event_map inband_irq_map; + +static struct irq_event_map oob_irq_map; + +DEFINE_PER_CPU(struct irq_pipeline_data, irq_pipeline) = { + .stages = { + [0] = { + .log = { + .map = &inband_irq_map, + }, + .stage = &inband_stage, + }, + [1] = { + .log = { + .map = &oob_irq_map, + }, + }, + }, +}; + +#endif /* !CONFIG_SMP */ + +EXPORT_PER_CPU_SYMBOL(irq_pipeline); + +static void sirq_noop(struct irq_data *data) { } + +/* Virtual interrupt controller for synthetic IRQs. */ +static struct irq_chip sirq_chip = { + .name = "SIRQC", + .irq_enable = sirq_noop, + .irq_disable = sirq_noop, + .flags = IRQCHIP_PIPELINE_SAFE | IRQCHIP_SKIP_SET_WAKE, +}; + +static int sirq_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_percpu_devid(irq); + irq_set_chip_and_handler(irq, &sirq_chip, handle_synthetic_irq); + + return 0; +} + +static struct irq_domain_ops sirq_domain_ops = { + .map = sirq_map, +}; + +#ifdef CONFIG_SPARSE_IRQ +/* + * The performances of the radix tree in sparse mode are really ugly + * under mm stress on some hw, use a local descriptor cache to ease + * the pain. + */ +#define DESC_CACHE_SZ 128 + +static struct irq_desc *desc_cache[DESC_CACHE_SZ] __cacheline_aligned; + +static inline u32 hash_irq(unsigned int irq) +{ + return jhash(&irq, sizeof(irq), irq) % DESC_CACHE_SZ; +} + +static __always_inline +struct irq_desc *irq_to_cached_desc(unsigned int irq) +{ + int hval = hash_irq(irq); + struct irq_desc *desc = desc_cache[hval]; + + if (unlikely(desc == NULL || irq_desc_get_irq(desc) != irq)) { + desc = irq_to_desc(irq); + desc_cache[hval] = desc; + } + + return desc; +} + +void uncache_irq_desc(unsigned int irq) +{ + int hval = hash_irq(irq); + + desc_cache[hval] = NULL; +} + +#else + +static struct irq_desc *irq_to_cached_desc(unsigned int irq) +{ + return irq_to_desc(irq); +} + +#endif + +/** + * handle_synthetic_irq - synthetic irq handler + * @desc: the interrupt description structure for this irq + * + * Handles synthetic interrupts flowing down the IRQ pipeline + * with per-CPU semantics. + * + * CAUTION: synthetic IRQs may be used to map hardware-generated + * events (e.g. IPIs or traps), we must start handling them as + * common interrupts. + */ +void handle_synthetic_irq(struct irq_desc *desc) +{ + unsigned int irq = irq_desc_get_irq(desc); + struct irqaction *action; + irqreturn_t ret; + void *dev_id; + + if (on_pipeline_entry()) { + handle_oob_irq(desc); + return; + } + + action = desc->action; + if (action == NULL) { + if (printk_ratelimit()) + printk(KERN_WARNING + "CPU%d: WARNING: synthetic IRQ%d has no action.\n", + smp_processor_id(), irq); + return; + } + + __kstat_incr_irqs_this_cpu(desc); + trace_irq_handler_entry(irq, action); + dev_id = raw_cpu_ptr(action->percpu_dev_id); + ret = action->handler(irq, dev_id); + trace_irq_handler_exit(irq, action, ret); +} + +void sync_irq_stage(struct irq_stage *top) +{ + struct irq_stage_data *p; + struct irq_stage *stage; + + /* We must enter over the inband stage with hardirqs off. */ + if (irq_pipeline_debug()) { + WARN_ON_ONCE(!hard_irqs_disabled()); + WARN_ON_ONCE(current_irq_stage != &inband_stage); + } + + stage = top; + + for (;;) { + if (stage == &inband_stage) { + if (test_inband_stall()) + break; + } else { + if (test_oob_stall()) + break; + } + + p = this_staged(stage); + if (stage_irqs_pending(p)) { + if (stage == &inband_stage) + sync_current_irq_stage(); + else { + /* Switch to oob before synchronizing. */ + switch_oob(p); + sync_current_irq_stage(); + /* Then back to the inband stage. */ + switch_inband(this_inband_staged()); + } + } + + if (stage == &inband_stage) + break; + + stage = &inband_stage; + } +} + +void synchronize_pipeline(void) /* hardirqs off */ +{ + struct irq_stage *top = &oob_stage; + int stalled = test_oob_stall(); + + if (unlikely(!oob_stage_present())) { + top = &inband_stage; + stalled = test_inband_stall(); + } + + if (current_irq_stage != top) + sync_irq_stage(top); + else if (!stalled) + sync_current_irq_stage(); +} + +static void __inband_irq_enable(void) +{ + struct irq_stage_data *p; + unsigned long flags; + + check_inband_stage(); + + flags = hard_local_irq_save(); + + unstall_inband_nocheck(); + + p = this_inband_staged(); + if (unlikely(stage_irqs_pending(p) && !in_pipeline())) { + sync_current_irq_stage(); + hard_local_irq_restore(flags); + preempt_check_resched(); + } else { + hard_local_irq_restore(flags); + } +} + +/** + * inband_irq_enable - enable interrupts for the inband stage + * + * Enable interrupts for the inband stage, allowing interrupts to + * preempt the in-band code. If in-band IRQs are pending for the + * inband stage in the per-CPU log at the time of this call, they + * are played back. + * + * The caller is expected to tell the tracer about the change, by + * calling trace_hardirqs_on(). + */ +notrace void inband_irq_enable(void) +{ + /* + * We are NOT supposed to enter this code with hard IRQs off. + * If we do, then the caller might be wrongly assuming that + * invoking local_irq_enable() implies enabling hard + * interrupts like the legacy I-pipe did, which is not the + * case anymore. Relax this requirement when oopsing, since + * the kernel may be in a weird state. + */ + WARN_ON_ONCE(irq_pipeline_debug() && hard_irqs_disabled()); + __inband_irq_enable(); +} +EXPORT_SYMBOL(inband_irq_enable); + +/** + * inband_irq_disable - disable interrupts for the inband stage + * + * Disable interrupts for the inband stage, disabling in-band + * interrupts. Out-of-band interrupts can still be taken and + * delivered to their respective handlers though. + */ +notrace void inband_irq_disable(void) +{ + check_inband_stage(); + stall_inband_nocheck(); +} +EXPORT_SYMBOL(inband_irq_disable); + +/** + * inband_irqs_disabled - test the virtual interrupt state + * + * Returns non-zero if interrupts are currently disabled for the + * inband stage, zero otherwise. + * + * May be used from the oob stage too (e.g. for tracing + * purpose). + */ +noinstr int inband_irqs_disabled(void) +{ + return test_inband_stall(); +} +EXPORT_SYMBOL(inband_irqs_disabled); + +/** + * inband_irq_save - test and disable (virtual) interrupts + * + * Save the virtual interrupt state then disables interrupts for + * the inband stage. + * + * Returns the original interrupt state. + */ +trace_on_debug unsigned long inband_irq_save(void) +{ + check_inband_stage(); + return test_and_stall_inband_nocheck(); +} +EXPORT_SYMBOL(inband_irq_save); + +/** + * inband_irq_restore - restore the (virtual) interrupt state + * @x: Interrupt state to restore + * + * Restore the virtual interrupt state from x. If the inband + * stage is unstalled as a consequence of this operation, any + * interrupt pending for the inband stage in the per-CPU log is + * played back. + */ +trace_on_debug void inband_irq_restore(unsigned long flags) +{ + if (flags) + inband_irq_disable(); + else + __inband_irq_enable(); +} +EXPORT_SYMBOL(inband_irq_restore); + +/** + * oob_irq_enable - enable interrupts in the CPU + * + * Enable interrupts in the CPU, allowing out-of-band interrupts + * to preempt any code. If out-of-band IRQs are pending in the + * per-CPU log for the oob stage at the time of this call, they + * are played back. + */ +trace_on_debug void oob_irq_enable(void) +{ + struct irq_stage_data *p; + + hard_local_irq_disable(); + + unstall_oob(); + + p = this_oob_staged(); + if (unlikely(stage_irqs_pending(p))) + synchronize_pipeline(); + + hard_local_irq_enable(); +} +EXPORT_SYMBOL(oob_irq_enable); + +/** + * oob_irq_restore - restore the hardware interrupt state + * @x: Interrupt state to restore + * + * Restore the harware interrupt state from x. If the oob stage + * is unstalled as a consequence of this operation, any interrupt + * pending for the oob stage in the per-CPU log is played back + * prior to turning IRQs on. + * + * NOTE: Stalling the oob stage must always be paired with + * disabling hard irqs and conversely when calling + * oob_irq_restore(), otherwise the latter would badly misbehave + * in unbalanced conditions. + */ +trace_on_debug void __oob_irq_restore(unsigned long flags) /* hw interrupt off */ +{ + struct irq_stage_data *p = this_oob_staged(); + + check_hard_irqs_disabled(); + + if (!flags) { + unstall_oob(); + if (unlikely(stage_irqs_pending(p))) + synchronize_pipeline(); + hard_local_irq_enable(); + } +} +EXPORT_SYMBOL(__oob_irq_restore); + +/** + * stage_disabled - test the interrupt state of the current stage + * + * Returns non-zero if interrupts are currently disabled for the + * current interrupt stage, zero otherwise. + * In other words, returns non-zero either if: + * - interrupts are disabled for the OOB context (i.e. hard disabled), + * - the inband stage is current and inband interrupts are disabled. + */ +noinstr bool stage_disabled(void) +{ + bool ret = true; + + if (!hard_irqs_disabled()) { + ret = false; + if (running_inband()) + ret = test_inband_stall(); + } + + return ret; +} +EXPORT_SYMBOL_GPL(stage_disabled); + +/** + * test_and_lock_stage - test and disable interrupts for the current stage + * @irqsoff: Pointer to boolean denoting stage_disabled() + * on entry + * + * Fully disables interrupts for the current stage. When the + * inband stage is current, the stall bit is raised and hardware + * IRQs are masked as well. Only the latter operation is + * performed when the oob stage is current. + * + * Returns the combined interrupt state on entry including the + * real/hardware (in CPU) and virtual (inband stage) states. For + * this reason, [test_and_]lock_stage() must be paired with + * unlock_stage() exclusively. The combined irq state returned by + * the former may NOT be passed to hard_local_irq_restore(). + * + * The interrupt state of the current stage in the return value + * (i.e. stall bit for the inband stage, hardware interrupt bit + * for the oob stage) must be testable using + * arch_irqs_disabled_flags(). + * + * Notice that test_and_lock_stage(), unlock_stage() are raw + * level ops, which substitute to raw_local_irq_save(), + * raw_local_irq_restore() in lockdep code. Therefore, changes to + * the in-band stall bit must not be propagated to the tracing + * core (i.e. no trace_hardirqs_*() annotations). + */ +noinstr unsigned long test_and_lock_stage(int *irqsoff) +{ + unsigned long flags; + int stalled, dummy; + + if (irqsoff == NULL) + irqsoff = &dummy; + + /* + * Combine the hard irq flag and the stall bit into a single + * state word. We need to fill in the stall bit only if the + * inband stage is current, otherwise it is not relevant. + */ + flags = hard_local_irq_save(); + *irqsoff = hard_irqs_disabled_flags(flags); + if (running_inband()) { + stalled = test_and_stall_inband_nocheck(); + flags = irqs_merge_flags(flags, stalled); + if (stalled) + *irqsoff = 1; + } + + /* + * CAUTION: don't ever pass this verbatim to + * hard_local_irq_restore(). Only unlock_stage() knows how to + * decode and use a combined state word. + */ + return flags; +} +EXPORT_SYMBOL_GPL(test_and_lock_stage); + +/** + * unlock_stage - restore interrupts for the current stage + * @flags: Combined interrupt state to restore as received from + * test_and_lock_stage() + * + * Restore the virtual interrupt state if the inband stage is + * current, and the hardware interrupt state unconditionally. + * The per-CPU log is not played for any stage. + */ +noinstr void unlock_stage(unsigned long irqstate) +{ + unsigned long flags = irqstate; + int stalled; + + WARN_ON_ONCE(irq_pipeline_debug_locking() && !hard_irqs_disabled()); + + if (running_inband()) { + flags = irqs_split_flags(irqstate, &stalled); + if (!stalled) + unstall_inband_nocheck(); + } + + /* + * The hardware interrupt bit is the only flag which may be + * present in the combined state at this point, all other + * status bits have been cleared by irqs_merge_flags(), so + * don't ever try to reload the hardware status register with + * such value directly! + */ + if (!hard_irqs_disabled_flags(flags)) + hard_local_irq_enable(); +} +EXPORT_SYMBOL_GPL(unlock_stage); + +/** + * sync_inband_irqs - Synchronize the inband log + * + * Play any deferred interrupt which might have been logged for the + * in-band stage while running with hard irqs on but stalled. + * + * Called from the unstalled in-band stage. Returns with hard irqs off. + */ +void sync_inband_irqs(void) +{ + struct irq_stage_data *p; + + check_inband_stage(); + WARN_ON_ONCE(irq_pipeline_debug() && irqs_disabled()); + + if (!hard_irqs_disabled()) + hard_local_irq_disable(); + + p = this_inband_staged(); + if (unlikely(stage_irqs_pending(p))) { + /* Do not pile up preemption frames. */ + preempt_disable_notrace(); + sync_current_irq_stage(); + preempt_enable_no_resched_notrace(); + } +} + +static inline bool irq_post_check(struct irq_stage *stage, unsigned int irq) +{ + if (irq_pipeline_debug()) { + if (WARN_ONCE(!hard_irqs_disabled(), + "hard irqs on posting IRQ%u to %s\n", + irq, stage->name)) + return true; + if (WARN_ONCE(irq >= IRQ_BITMAP_BITS, + "cannot post invalid IRQ%u to %s\n", + irq, stage->name)) + return true; + } + + return false; +} + +#if __IRQ_STAGE_MAP_LEVELS == 4 + +/* Must be called hard irqs off. */ +void irq_post_stage(struct irq_stage *stage, unsigned int irq) +{ + struct irq_stage_data *p = this_staged(stage); + int l0b, l1b, l2b; + + if (irq_post_check(stage, irq)) + return; + + l0b = irq / (BITS_PER_LONG * BITS_PER_LONG * BITS_PER_LONG); + l1b = irq / (BITS_PER_LONG * BITS_PER_LONG); + l2b = irq / BITS_PER_LONG; + + __set_bit(irq, p->log.map->flat); + __set_bit(l2b, p->log.map->index_2); + __set_bit(l1b, p->log.map->index_1); + __set_bit(l0b, &p->log.index_0); +} +EXPORT_SYMBOL_GPL(irq_post_stage); + +#define ltob_1(__n) ((__n) * BITS_PER_LONG) +#define ltob_2(__n) (ltob_1(__n) * BITS_PER_LONG) +#define ltob_3(__n) (ltob_2(__n) * BITS_PER_LONG) + +static inline int pull_next_irq(struct irq_stage_data *p) +{ + unsigned long l0m, l1m, l2m, l3m; + int l0b, l1b, l2b, l3b; + unsigned int irq; + + l0m = p->log.index_0; + if (l0m == 0) + return -1; + l0b = __ffs(l0m); + irq = ltob_3(l0b); + + l1m = p->log.map->index_1[l0b]; + if (unlikely(l1m == 0)) { + WARN_ON_ONCE(1); + return -1; + } + l1b = __ffs(l1m); + irq += ltob_2(l1b); + + l2m = p->log.map->index_2[ltob_1(l0b) + l1b]; + if (unlikely(l2m == 0)) { + WARN_ON_ONCE(1); + return -1; + } + l2b = __ffs(l2m); + irq += ltob_1(l2b); + + l3m = p->log.map->flat[ltob_2(l0b) + ltob_1(l1b) + l2b]; + if (unlikely(l3m == 0)) + return -1; + l3b = __ffs(l3m); + irq += l3b; + + __clear_bit(irq, p->log.map->flat); + if (p->log.map->flat[irq / BITS_PER_LONG] == 0) { + __clear_bit(l2b, &p->log.map->index_2[ltob_1(l0b) + l1b]); + if (p->log.map->index_2[ltob_1(l0b) + l1b] == 0) { + __clear_bit(l1b, &p->log.map->index_1[l0b]); + if (p->log.map->index_1[l0b] == 0) + __clear_bit(l0b, &p->log.index_0); + } + } + + return irq; +} + +#elif __IRQ_STAGE_MAP_LEVELS == 3 + +/* Must be called hard irqs off. */ +void irq_post_stage(struct irq_stage *stage, unsigned int irq) +{ + struct irq_stage_data *p = this_staged(stage); + int l0b, l1b; + + if (irq_post_check(stage, irq)) + return; + + l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); + l1b = irq / BITS_PER_LONG; + + __set_bit(irq, p->log.map->flat); + __set_bit(l1b, p->log.map->index_1); + __set_bit(l0b, &p->log.index_0); +} +EXPORT_SYMBOL_GPL(irq_post_stage); + +static inline int pull_next_irq(struct irq_stage_data *p) +{ + unsigned long l0m, l1m, l2m; + int l0b, l1b, l2b, irq; + + l0m = p->log.index_0; + if (unlikely(l0m == 0)) + return -1; + + l0b = __ffs(l0m); + l1m = p->log.map->index_1[l0b]; + if (l1m == 0) + return -1; + + l1b = __ffs(l1m) + l0b * BITS_PER_LONG; + l2m = p->log.map->flat[l1b]; + if (unlikely(l2m == 0)) { + WARN_ON_ONCE(1); + return -1; + } + + l2b = __ffs(l2m); + irq = l1b * BITS_PER_LONG + l2b; + + __clear_bit(irq, p->log.map->flat); + if (p->log.map->flat[l1b] == 0) { + __clear_bit(l1b, p->log.map->index_1); + if (p->log.map->index_1[l0b] == 0) + __clear_bit(l0b, &p->log.index_0); + } + + return irq; +} + +#else /* __IRQ_STAGE_MAP_LEVELS == 2 */ + +/* Must be called hard irqs off. */ +void irq_post_stage(struct irq_stage *stage, unsigned int irq) +{ + struct irq_stage_data *p = this_staged(stage); + int l0b = irq / BITS_PER_LONG; + + if (irq_post_check(stage, irq)) + return; + + __set_bit(irq, p->log.map->flat); + __set_bit(l0b, &p->log.index_0); +} +EXPORT_SYMBOL_GPL(irq_post_stage); + +static inline int pull_next_irq(struct irq_stage_data *p) +{ + unsigned long l0m, l1m; + int l0b, l1b; + + l0m = p->log.index_0; + if (l0m == 0) + return -1; + + l0b = __ffs(l0m); + l1m = p->log.map->flat[l0b]; + if (unlikely(l1m == 0)) { + WARN_ON_ONCE(1); + return -1; + } + + l1b = __ffs(l1m); + __clear_bit(l1b, &p->log.map->flat[l0b]); + if (p->log.map->flat[l0b] == 0) + __clear_bit(l0b, &p->log.index_0); + + return l0b * BITS_PER_LONG + l1b; +} + +#endif /* __IRQ_STAGE_MAP_LEVELS == 2 */ + +/** + * hard_preempt_disable - Disable preemption the hard way + * + * Disable hardware interrupts in the CPU, and disable preemption + * if currently running in-band code on the inband stage. + * + * Return the hardware interrupt state. + */ +unsigned long hard_preempt_disable(void) +{ + unsigned long flags = hard_local_irq_save(); + + if (running_inband()) + preempt_disable(); + + return flags; +} +EXPORT_SYMBOL_GPL(hard_preempt_disable); + +/** + * hard_preempt_enable - Enable preemption the hard way + * + * Enable preemption if currently running in-band code on the + * inband stage, restoring the hardware interrupt state in the CPU. + * The per-CPU log is not played for the oob stage. + */ +void hard_preempt_enable(unsigned long flags) +{ + if (running_inband()) { + preempt_enable_no_resched(); + hard_local_irq_restore(flags); + if (!hard_irqs_disabled_flags(flags)) + preempt_check_resched(); + } else + hard_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(hard_preempt_enable); + +static void handle_unexpected_irq(struct irq_desc *desc, irqreturn_t ret) +{ + unsigned int irq = irq_desc_get_irq(desc); + struct irqaction *action; + + /* + * Since IRQ_HANDLED was not received from any handler, we may + * have a problem dealing with an OOB interrupt. The error + * detection logic is as follows: + * + * - check and complain about any bogus return value from a + * out-of-band IRQ handler: we only allow IRQ_HANDLED and + * IRQ_NONE from those routines. + * + * - filter out spurious IRQs which may have been due to bus + * asynchronicity, those tend to happen infrequently and + * should not cause us to pull the break (see + * note_interrupt()). + * + * - otherwise, stop pipelining the IRQ line after a thousand + * consecutive unhandled events. + * + * NOTE: we should already be holding desc->lock for non + * per-cpu IRQs, since we should only get there from the + * pipeline entry context. + */ + + WARN_ON_ONCE(irq_pipeline_debug() && + !irq_settings_is_per_cpu(desc) && + !raw_spin_is_locked(&desc->lock)); + + if (ret != IRQ_NONE) { + printk(KERN_ERR "out-of-band irq event %d: bogus return value %x\n", + irq, ret); + for_each_action_of_desc(desc, action) + printk(KERN_ERR "[<%p>] %pf", + action->handler, action->handler); + printk(KERN_CONT "\n"); + return; + } + + if (time_after(jiffies, desc->last_unhandled + HZ/10)) + desc->irqs_unhandled = 0; + else + desc->irqs_unhandled++; + + desc->last_unhandled = jiffies; + + if (unlikely(desc->irqs_unhandled > 1000)) { + printk(KERN_ERR "out-of-band irq %d: stuck or unexpected\n", irq); + irq_settings_clr_oob(desc); + desc->istate |= IRQS_SPURIOUS_DISABLED; + irq_disable(desc); + } +} + +static inline void incr_irq_kstat(struct irq_desc *desc) +{ + if (irq_settings_is_per_cpu_devid(desc)) + __kstat_incr_irqs_this_cpu(desc); + else + kstat_incr_irqs_this_cpu(desc); +} + +/* + * do_oob_irq() - Handles interrupts over the oob stage. Hard irqs + * off. + */ +static void do_oob_irq(struct irq_desc *desc) +{ + bool percpu_devid = irq_settings_is_per_cpu_devid(desc); + unsigned int irq = irq_desc_get_irq(desc); + irqreturn_t ret = IRQ_NONE, res; + struct irqaction *action; + void *dev_id; + + action = desc->action; + if (unlikely(action == NULL)) + goto done; + + if (percpu_devid) { + trace_irq_handler_entry(irq, action); + dev_id = raw_cpu_ptr(action->percpu_dev_id); + ret = action->handler(irq, dev_id); + trace_irq_handler_exit(irq, action, ret); + } else { + desc->istate &= ~IRQS_PENDING; + if (unlikely(irqd_irq_disabled(&desc->irq_data))) + return; + irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); + raw_spin_unlock(&desc->lock); + for_each_action_of_desc(desc, action) { + trace_irq_handler_entry(irq, action); + dev_id = action->dev_id; + res = action->handler(irq, dev_id); + trace_irq_handler_exit(irq, action, res); + ret |= res; + } + raw_spin_lock(&desc->lock); + irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); + } +done: + incr_irq_kstat(desc); + + if (likely(ret & IRQ_HANDLED)) { + desc->irqs_unhandled = 0; + return; + } + + handle_unexpected_irq(desc, ret); +} + +/* + * Over the inband stage, IRQs must be dispatched by the arch-specific + * arch_do_IRQ_pipelined() routine. + * + * Entered with hardirqs on, inband stalled. + */ +static inline +void do_inband_irq(struct irq_desc *desc) +{ + arch_do_IRQ_pipelined(desc); + WARN_ON_ONCE(irq_pipeline_debug() && !irqs_disabled()); +} + +static inline bool is_active_edge_event(struct irq_desc *desc) +{ + return (desc->istate & IRQS_PENDING) && + !irqd_irq_disabled(&desc->irq_data); +} + +bool handle_oob_irq(struct irq_desc *desc) /* hardirqs off */ +{ + struct irq_stage_data *oobd = this_oob_staged(); + unsigned int irq = irq_desc_get_irq(desc); + int stalled; + + /* + * Flow handlers of chained interrupts have no business + * running here: they should decode the event, invoking + * generic_handle_irq() for each cascaded IRQ. + */ + if (WARN_ON_ONCE(irq_pipeline_debug() && + irq_settings_is_chained(desc))) + return false; + + /* + * If no oob stage is present, all interrupts must go to the + * inband stage through the interrupt log. Otherwise, + * out-of-band IRQs are immediately delivered to the oob + * stage, while in-band IRQs still go through the inband stage + * log. + * + * This routine returns a boolean status telling the caller + * whether an out-of-band interrupt was delivered. + */ + if (!oob_stage_present() || !irq_settings_is_oob(desc)) { + irq_post_stage(&inband_stage, irq); + return false; + } + + if (WARN_ON_ONCE(irq_pipeline_debug() && running_inband())) + return false; + + stalled = test_and_stall_oob(); + + if (unlikely(desc->istate & IRQS_EDGE)) { + do { + if (is_active_edge_event(desc)) { + if (irqd_irq_masked(&desc->irq_data)) + unmask_irq(desc); + } + do_oob_irq(desc); + } while (is_active_edge_event(desc)); + } else { + do_oob_irq(desc); + } + + /* + * Cascaded interrupts enter handle_oob_irq() on the stalled + * out-of-band stage during the parent invocation. Make sure + * to restore the stall bit accordingly. + */ + if (likely(!stalled)) + unstall_oob(); + + /* + * CPU migration and/or stage switching over the handler are + * NOT allowed. These should take place over + * irq_exit_pipeline(). + */ + if (irq_pipeline_debug()) { + /* No CPU migration allowed. */ + WARN_ON_ONCE(this_oob_staged() != oobd); + /* No stage migration allowed. */ + WARN_ON_ONCE(current_irq_staged != oobd); + } + + return true; +} + +static inline +void copy_timer_regs(struct irq_desc *desc, struct pt_regs *regs) +{ + struct irq_pipeline_data *p; + + if (desc->action == NULL || !(desc->action->flags & __IRQF_TIMER)) + return; + /* + * Given our deferred dispatching model for regular IRQs, we + * record the preempted context registers only for the latest + * timer interrupt, so that the regular tick handler charges + * CPU times properly. It is assumed that no other interrupt + * handler cares for such information. + */ + p = raw_cpu_ptr(&irq_pipeline); + arch_save_timer_regs(&p->tick_regs, regs); +} + +static __always_inline +struct irq_stage_data *switch_stage_on_irq(void) +{ + struct irq_stage_data *prevd = current_irq_staged, *nextd; + + if (oob_stage_present()) { + nextd = this_oob_staged(); + if (prevd != nextd) + switch_oob(nextd); + } + + return prevd; +} + +static __always_inline +void restore_stage_on_irq(struct irq_stage_data *prevd) +{ + /* + * CPU migration and/or stage switching over + * irq_exit_pipeline() are allowed. Our exit logic is as + * follows: + * + * ENTRY EXIT EPILOGUE + * + * oob oob nop + * inband oob switch inband + * oob inband nop + * inband inband nop + */ + if (prevd->stage == &inband_stage && + current_irq_staged == this_oob_staged()) + switch_inband(this_inband_staged()); +} + +/** + * generic_pipeline_irq_desc - Pass an IRQ to the pipeline + * @desc: Descriptor of the IRQ to pass + * @regs: Register file coming from the low-level handling code + * + * Inject an IRQ into the pipeline from a CPU interrupt or trap + * context. A flow handler runs next for this IRQ. + * + * Hard irqs must be off on entry. Caller should have pushed the + * IRQ regs using set_irq_regs(). + */ +void generic_pipeline_irq_desc(struct irq_desc *desc, struct pt_regs *regs) +{ + int irq = irq_desc_get_irq(desc); + + if (irq_pipeline_debug() && !hard_irqs_disabled()) { + hard_local_irq_disable(); + pr_err("IRQ pipeline: interrupts enabled on entry (IRQ%u)\n", irq); + } + + trace_irq_pipeline_entry(irq); + copy_timer_regs(desc, regs); + generic_handle_irq_desc(desc); + trace_irq_pipeline_exit(irq); +} + +struct irq_stage_data *handle_irq_pipelined_prepare(struct pt_regs *regs) +{ + struct irq_stage_data *prevd; + + /* + * Running with the oob stage stalled implies hardirqs off. + * For this reason, if the oob stage is stalled when we + * receive an interrupt from the hardware, something is badly + * broken in our interrupt state. Try fixing up, but without + * great hopes. + */ + if (irq_pipeline_debug()) { + if (test_oob_stall()) { + pr_err("IRQ pipeline: out-of-band stage stalled on IRQ entry\n"); + unstall_oob(); + } + WARN_ON(on_pipeline_entry()); + } + + /* + * Switch early on to the out-of-band stage if present, + * anticipating a companion kernel is going to handle the + * incoming event. If not, never mind, we will switch back + * in-band before synchronizing interrupts. + */ + prevd = switch_stage_on_irq(); + + /* Tell the companion core about the entry. */ + irq_enter_pipeline(); + + /* + * Invariant: IRQs may not pile up in the section covered by + * the PIPELINE_OFFSET marker, because: + * + * - out-of-band handlers called from handle_oob_irq() may NOT + * re-enable hard interrupts. Ever. + * + * - synchronizing the in-band log with hard interrupts + * enabled is done outside of this section. + */ + preempt_count_add(PIPELINE_OFFSET); + + /* + * From the standpoint of the in-band context when pipelining + * is in effect, an interrupt entry is unsafe in a similar way + * a NMI is, since it may preempt almost anywhere as IRQs are + * only virtually masked most of the time, including inside + * (virtually) interrupt-free sections. Declare a NMI entry so + * that the low handling code is allowed to enter RCU read + * sides (e.g. handle_domain_irq() needs this to resolve IRQ + * mappings). + */ + rcu_nmi_enter(); + + return prevd; +} + +int handle_irq_pipelined_finish(struct irq_stage_data *prevd, + struct pt_regs *regs) +{ + /* + * Leave the (pseudo-)NMI entry for RCU before the out-of-band + * core might reschedule in irq_exit_pipeline(), and + * interrupts are hard enabled again on this CPU as a result + * of switching context. + */ + rcu_nmi_exit(); + + /* + * Make sure to leave the pipeline entry context before + * allowing the companion core to reschedule, and eventually + * synchronizing interrupts. + */ + preempt_count_sub(PIPELINE_OFFSET); + + /* Allow the companion core to reschedule. */ + irq_exit_pipeline(); + + /* Back to the preempted stage. */ + restore_stage_on_irq(prevd); + + /* + * We have to synchronize interrupts because some might have + * been logged while we were busy handling an out-of-band + * event coming from the hardware: + * + * - as a result of calling an out-of-band handler which in + * turn posted them. + * + * - because we posted them directly for scheduling the + * interrupt to happen from the in-band stage. + */ + synchronize_pipeline_on_irq(); + +#ifdef CONFIG_DOVETAIL + /* + * Sending MAYDAY is in essence a rare case, so prefer test + * then maybe clear over test_and_clear. + */ + if (user_mode(regs) && test_thread_flag(TIF_MAYDAY)) + dovetail_call_mayday(regs); +#endif + + return running_inband() && !irqs_disabled(); +} + +int handle_irq_pipelined(struct pt_regs *regs) +{ + struct irq_stage_data *prevd; + + prevd = handle_irq_pipelined_prepare(regs); + arch_handle_irq_pipelined(regs); + return handle_irq_pipelined_finish(prevd, regs); +} + +/** + * irq_inject_pipeline - Inject a software-generated IRQ into the + * pipeline @irq: IRQ to inject + * + * Inject an IRQ into the pipeline by software as if such + * hardware event had happened on the current CPU. + */ +int irq_inject_pipeline(unsigned int irq) +{ + struct irq_stage_data *oobd, *prevd; + struct irq_desc *desc; + unsigned long flags; + + desc = irq_to_cached_desc(irq); + if (desc == NULL) + return -EINVAL; + + flags = hard_local_irq_save(); + + /* + * Handle the case of an IRQ sent to a stalled oob stage here, + * which allows to trap the same condition in handle_oob_irq() + * in a debug check (see comment there). + */ + oobd = this_oob_staged(); + if (oob_stage_present() && + irq_settings_is_oob(desc) && + test_oob_stall()) { + irq_post_stage(&oob_stage, irq); + } else { + prevd = switch_stage_on_irq(); + irq_enter_pipeline(); + handle_oob_irq(desc); + irq_exit_pipeline(); + restore_stage_on_irq(prevd); + synchronize_pipeline_on_irq(); + } + + hard_local_irq_restore(flags); + + return 0; + +} +EXPORT_SYMBOL_GPL(irq_inject_pipeline); + +/* + * sync_current_irq_stage() -- Flush the pending IRQs for the current + * stage (and processor). This routine flushes the interrupt log (see + * "Optimistic interrupt protection" from D. Stodolsky et al. for more + * on the deferred interrupt scheme). Every interrupt which has + * occurred while the pipeline was stalled gets played. + * + * CAUTION: CPU migration may occur over this routine if running over + * the inband stage. + */ +void sync_current_irq_stage(void) /* hard irqs off */ +{ + struct irq_stage_data *p; + struct irq_stage *stage; + struct irq_desc *desc; + int irq; + + WARN_ON_ONCE(irq_pipeline_debug() && on_pipeline_entry()); + check_hard_irqs_disabled(); + + p = current_irq_staged; +respin: + stage = p->stage; + if (stage == &inband_stage) { + /* + * Since we manipulate the stall bit directly, we have + * to open code the IRQ state tracing. + */ + stall_inband_nocheck(); + trace_hardirqs_off(); + } else { + stall_oob(); + } + + for (;;) { + irq = pull_next_irq(p); + if (irq < 0) + break; + /* + * Make sure the compiler does not reorder wrongly, so + * that all updates to maps are done before the + * handler gets called. + */ + barrier(); + + desc = irq_to_cached_desc(irq); + + if (stage == &inband_stage) { + hard_local_irq_enable(); + do_inband_irq(desc); + hard_local_irq_disable(); + } else { + do_oob_irq(desc); + } + + /* + * We might have switched from the oob stage to the + * in-band one on return from the handler, in which + * case we might also have migrated to a different CPU + * (the converse in-band -> oob switch is NOT allowed + * though). Reload the current per-cpu context + * pointer, so that we further pull pending interrupts + * from the proper in-band log. + */ + p = current_irq_staged; + if (p->stage != stage) { + if (WARN_ON_ONCE(irq_pipeline_debug() && + stage == &inband_stage)) + break; + goto respin; + } + } + + if (stage == &inband_stage) { + trace_hardirqs_on(); + unstall_inband_nocheck(); + } else { + unstall_oob(); + } +} + +#ifndef CONFIG_GENERIC_ENTRY + +/* + * These helpers are normally called from the kernel entry/exit code + * in the asm section by architectures which do not use the generic + * kernel entry code, in order to save the interrupt and lockdep + * states for the in-band stage on entry, restoring them when leaving + * the kernel. The per-architecture arch_kentry_set/get_irqstate() + * calls determine where this information should be kept while running + * in kernel context, indexed on the current register frame. + */ + +#define KENTRY_STALL_BIT BIT(0) /* Tracks INBAND_STALL_BIT */ +#define KENTRY_LOCKDEP_BIT BIT(1) /* Tracks hardirqs_enabled */ + +asmlinkage __visible noinstr void kentry_enter_pipelined(struct pt_regs *regs) +{ + long irqstate = 0; + + WARN_ON(irq_pipeline_debug() && !hard_irqs_disabled()); + + if (!running_inband()) + return; + + if (lockdep_read_irqs_state()) + irqstate |= KENTRY_LOCKDEP_BIT; + + if (irqs_disabled()) + irqstate |= KENTRY_STALL_BIT; + else + trace_hardirqs_off(); + + arch_kentry_set_irqstate(regs, irqstate); +} + +asmlinkage void __visible noinstr kentry_exit_pipelined(struct pt_regs *regs) +{ + long irqstate; + + WARN_ON(irq_pipeline_debug() && !hard_irqs_disabled()); + + if (!running_inband()) + return; + + /* + * If the in-band stage of the kernel is current but the IRQ + * is not going to be delivered because the latter is stalled, + * keep the tracing logic unaware of the receipt, so that no + * false positive is triggered in lockdep (e.g. IN-HARDIRQ-W + * -> HARDIRQ-ON-W). In this case, we still have to restore + * the lockdep irq state independently, since it might not be + * in sync with the stall bit (e.g. raw_local_irq_disable/save + * do flip the stall bit, but are not tracked by lockdep). + */ + + irqstate = arch_kentry_get_irqstate(regs); + if (!(irqstate & KENTRY_STALL_BIT)) { + stall_inband_nocheck(); + trace_hardirqs_on(); + unstall_inband_nocheck(); + } else { + lockdep_write_irqs_state(!!(irqstate & KENTRY_LOCKDEP_BIT)); + } +} + +#endif /* !CONFIG_GENERIC_ENTRY */ + +/** + * run_oob_call - escalate function call to the oob stage + * @fn: address of routine + * @arg: routine argument + * + * Make the specified function run on the oob stage, switching + * the current stage accordingly if needed. The escalated call is + * allowed to perform a stage migration in the process. + */ +int notrace run_oob_call(int (*fn)(void *arg), void *arg) +{ + struct irq_stage_data *p, *old; + struct irq_stage *oob; + unsigned long flags; + int ret, s; + + flags = hard_local_irq_save(); + + /* Switch to the oob stage if not current. */ + p = this_oob_staged(); + oob = p->stage; + old = current_irq_staged; + if (old != p) + switch_oob(p); + + s = test_and_stall_oob(); + barrier(); + ret = fn(arg); + hard_local_irq_disable(); + if (!s) + unstall_oob(); + + /* + * The exit logic is as follows: + * + * ON-ENTRY AFTER-CALL EPILOGUE + * + * oob oob sync current stage if !stalled + * inband oob switch to inband + sync all stages + * oob inband sync all stages + * inband inband sync all stages + * + * Each path which has stalled the oob stage while running on + * the inband stage at some point during the escalation + * process must synchronize all stages of the pipeline on + * exit. Otherwise, we may restrict the synchronization scope + * to the current stage when the whole sequence ran on the oob + * stage. + */ + p = this_oob_staged(); + if (likely(current_irq_staged == p)) { + if (old->stage == oob) { + if (!s && stage_irqs_pending(p)) + sync_current_irq_stage(); + goto out; + } + switch_inband(this_inband_staged()); + } + + sync_irq_stage(oob); +out: + hard_local_irq_restore(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(run_oob_call); + +int enable_oob_stage(const char *name) +{ + struct irq_event_map *map; + struct irq_stage_data *p; + int cpu, ret; + + if (oob_stage_present()) + return -EBUSY; + + /* Set up the out-of-band interrupt stage on all CPUs. */ + + for_each_possible_cpu(cpu) { + p = &per_cpu(irq_pipeline.stages, cpu)[1]; + map = p->log.map; /* save/restore after memset(). */ + memset(p, 0, sizeof(*p)); + p->stage = &oob_stage; + memset(map, 0, sizeof(struct irq_event_map)); + p->log.map = map; +#ifdef CONFIG_DEBUG_IRQ_PIPELINE + p->cpu = cpu; +#endif + } + + ret = arch_enable_oob_stage(); + if (ret) + return ret; + + oob_stage.name = name; + smp_wmb(); + oob_stage.index = 1; + + pr_info("IRQ pipeline: high-priority %s stage added.\n", name); + + return 0; +} +EXPORT_SYMBOL_GPL(enable_oob_stage); + +void disable_oob_stage(void) +{ + const char *name = oob_stage.name; + + WARN_ON(!running_inband() || !oob_stage_present()); + + oob_stage.index = 0; + smp_wmb(); + + pr_info("IRQ pipeline: %s stage removed.\n", name); +} +EXPORT_SYMBOL_GPL(disable_oob_stage); + +void irq_pipeline_oops(void) +{ + irq_pipeline_oopsing = true; + local_irq_disable_full(); +} + +/* + * Used to save/restore the status bits of the inband stage across runs + * of NMI-triggered code, so that we can restore the original pipeline + * state before leaving NMI context. + */ +static DEFINE_PER_CPU(unsigned long, nmi_saved_stall_bits); + +noinstr void irq_pipeline_nmi_enter(void) +{ + raw_cpu_write(nmi_saved_stall_bits, current->stall_bits); + +} +EXPORT_SYMBOL(irq_pipeline_nmi_enter); + +noinstr void irq_pipeline_nmi_exit(void) +{ + current->stall_bits = raw_cpu_read(nmi_saved_stall_bits); +} +EXPORT_SYMBOL(irq_pipeline_nmi_exit); + +bool __weak irq_cpuidle_control(struct cpuidle_device *dev, + struct cpuidle_state *state) +{ + /* + * Allow entering the idle state by default, matching the + * original behavior when CPU_IDLE is turned + * on. irq_cpuidle_control() may be overriden by an + * out-of-band code for determining whether the CPU may + * actually enter the idle state. + */ + return true; +} + +/** + * irq_cpuidle_enter - Prepare for entering the next idle state + * @dev: CPUIDLE device + * @state: CPUIDLE state to be entered + * + * Flush the in-band interrupt log before the caller idles, so + * that no event lingers before we actually wait for the next + * IRQ, in which case we ask the caller to abort the idling + * process altogether. The companion core is also given the + * opportunity to block the idling process by having + * irq_cpuidle_control() return @false. + * + * Returns @true if caller may proceed with idling, @false + * otherwise. The in-band log is guaranteed empty on return, hard + * irqs left off so that no event might sneak in until the caller + * actually idles. + */ +bool irq_cpuidle_enter(struct cpuidle_device *dev, + struct cpuidle_state *state) +{ + WARN_ON_ONCE(irq_pipeline_debug() && !irqs_disabled()); + + hard_local_irq_disable(); + + if (stage_irqs_pending(this_inband_staged())) { + unstall_inband_nocheck(); + synchronize_pipeline(); + stall_inband_nocheck(); + trace_hardirqs_off(); + return false; + } + + return irq_cpuidle_control(dev, state); +} + +static unsigned int inband_work_sirq; + +static irqreturn_t inband_work_interrupt(int sirq, void *dev_id) +{ + irq_work_run(); + + return IRQ_HANDLED; +} + +static struct irqaction inband_work = { + .handler = inband_work_interrupt, + .name = "in-band work", + .flags = IRQF_NO_THREAD, +}; + +void irq_local_work_raise(void) +{ + unsigned long flags; + + /* + * irq_work_queue() may be called from the in-band stage too + * in case we want to delay a work until the hard irqs are on + * again, so we may only sync the in-band log when unstalled, + * with hard irqs on. + */ + flags = hard_local_irq_save(); + irq_post_inband(inband_work_sirq); + if (running_inband() && + !hard_irqs_disabled_flags(flags) && !irqs_disabled()) + sync_current_irq_stage(); + hard_local_irq_restore(flags); +} + +#ifdef CONFIG_DEBUG_IRQ_PIPELINE + +#ifdef CONFIG_LOCKDEP +static inline bool lockdep_on_error(void) +{ + return !debug_locks; +} +#else +static inline bool lockdep_on_error(void) +{ + return false; +} +#endif + +notrace void check_inband_stage(void) +{ + struct irq_stage *this_stage; + unsigned long flags; + + flags = hard_local_irq_save(); + + this_stage = current_irq_stage; + if (likely(this_stage == &inband_stage && !test_oob_stall())) { + hard_local_irq_restore(flags); + return; + } + + if (in_nmi() || irq_pipeline_oopsing || lockdep_on_error()) { + hard_local_irq_restore(flags); + return; + } + + /* + * This will disable all further pipeline debug checks, since + * a wrecked interrupt state is likely to trigger many of + * them, ending up in a terrible mess. IOW, the current + * situation must be fixed prior to investigating any + * subsequent issue that might still exist. + */ + irq_pipeline_oopsing = true; + + hard_local_irq_restore(flags); + + if (this_stage != &inband_stage) + pr_err("IRQ pipeline: some code running in oob context '%s'\n" + " called an in-band only routine\n", + this_stage->name); + else + pr_err("IRQ pipeline: oob stage found stalled while modifying in-band\n" + " interrupt state and/or running sleeping code\n"); + + dump_stack(); +} +EXPORT_SYMBOL(check_inband_stage); + +void check_spinlock_context(void) +{ + WARN_ON_ONCE(in_pipeline() || running_oob()); + +} +EXPORT_SYMBOL(check_spinlock_context); + +#endif /* CONFIG_DEBUG_IRQ_PIPELINE */ + +static inline void fixup_percpu_data(void) +{ +#ifdef CONFIG_SMP + struct irq_pipeline_data *p; + int cpu; + + /* + * A temporary event log is used by the inband stage during the + * early boot up (bootup_irq_map), until the per-cpu areas + * have been set up. + * + * Obviously, this code must run over the boot CPU, before SMP + * operations start, with hard IRQs off so that nothing can + * change under our feet. + */ + WARN_ON(!hard_irqs_disabled()); + + memcpy(&per_cpu(irq_map_array, 0)[0], &bootup_irq_map, + sizeof(struct irq_event_map)); + + for_each_possible_cpu(cpu) { + p = &per_cpu(irq_pipeline, cpu); + p->stages[0].stage = &inband_stage; + p->stages[0].log.map = &per_cpu(irq_map_array, cpu)[0]; + p->stages[1].log.map = &per_cpu(irq_map_array, cpu)[1]; +#ifdef CONFIG_DEBUG_IRQ_PIPELINE + p->stages[0].cpu = cpu; + p->stages[1].cpu = cpu; +#endif + } +#endif +} + +void __init irq_pipeline_init_early(void) +{ + /* + * This is called early from start_kernel(), even before the + * actual number of IRQs is known. We are running on the boot + * CPU, hw interrupts are off, and secondary CPUs are still + * lost in space. Careful. + */ + fixup_percpu_data(); +} + +/** + * irq_pipeline_init - Main pipeline core inits + * + * This is step #2 of the 3-step pipeline initialization, which + * should happen right after init_IRQ() has run. The internal + * service interrupts are created along with the synthetic IRQ + * domain, and the arch-specific init chores are performed too. + * + * Interrupt pipelining should be fully functional when this + * routine returns. + */ +void __init irq_pipeline_init(void) +{ + WARN_ON(!hard_irqs_disabled()); + + synthetic_irq_domain = irq_domain_add_nomap(NULL, ~0, + &sirq_domain_ops, + NULL); + inband_work_sirq = irq_create_direct_mapping(synthetic_irq_domain); + setup_percpu_irq(inband_work_sirq, &inband_work); + + /* + * We are running on the boot CPU, hw interrupts are off, and + * secondary CPUs are still lost in space. Now we may run + * arch-specific code for enabling the pipeline. + */ + arch_irq_pipeline_init(); + + irq_pipeline_active = true; + + pr_info("IRQ pipeline enabled\n"); +} + +#ifndef CONFIG_SPARSE_IRQ +EXPORT_SYMBOL_GPL(irq_desc); +#endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/proc.c linux-dovetail-v5.15.y-dovetail/kernel/irq/proc.c --- linux-5.15.26/kernel/irq/proc.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/proc.c 2022-03-10 09:47:50.000000000 +0100 @@ -519,6 +519,9 @@ int show_interrupts(struct seq_file *p, #ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge"); #endif +#ifdef CONFIG_IRQ_PIPELINE + seq_printf(p, " %-3s", irq_settings_is_oob(desc) ? "oob" : ""); +#endif if (desc->name) seq_printf(p, "-%-8s", desc->name); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/resend.c linux-dovetail-v5.15.y-dovetail/kernel/irq/resend.c --- linux-5.15.26/kernel/irq/resend.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/resend.c 2022-03-10 09:47:50.000000000 +0100 @@ -16,10 +16,11 @@ #include #include #include +#include #include "internals.h" -#ifdef CONFIG_HARDIRQS_SW_RESEND +#if defined(CONFIG_HARDIRQS_SW_RESEND) && !defined(CONFIG_IRQ_PIPELINE) /* Bitmap to handle software resend of interrupts: */ static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS); @@ -82,7 +83,12 @@ static int irq_sw_resend(struct irq_desc #else static int irq_sw_resend(struct irq_desc *desc) { +#if defined(CONFIG_HARDIRQS_SW_RESEND) && defined(CONFIG_IRQ_PIPELINE) + irq_inject_pipeline(irq_desc_get_irq(desc)); + return 0; +#else return -EINVAL; +#endif } #endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq/settings.h linux-dovetail-v5.15.y-dovetail/kernel/irq/settings.h --- linux-5.15.26/kernel/irq/settings.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq/settings.h 2022-03-10 09:47:50.000000000 +0100 @@ -19,6 +19,8 @@ enum { _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, _IRQ_HIDDEN = IRQ_HIDDEN, _IRQ_NO_DEBUG = IRQ_NO_DEBUG, + _IRQ_OOB = IRQ_OOB, + _IRQ_CHAINED = IRQ_CHAINED, _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, }; @@ -35,6 +37,8 @@ enum { #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON #define IRQ_HIDDEN GOT_YOU_MORON #define IRQ_NO_DEBUG GOT_YOU_MORON +#define IRQ_OOB GOT_YOU_MORON +#define IRQ_CHAINED GOT_YOU_MORON #undef IRQF_MODIFY_MASK #define IRQF_MODIFY_MASK GOT_YOU_MORON @@ -186,3 +190,33 @@ static inline bool irq_settings_no_debug { return desc->status_use_accessors & _IRQ_NO_DEBUG; } + +static inline bool irq_settings_is_oob(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_OOB; +} + +static inline void irq_settings_clr_oob(struct irq_desc *desc) +{ + desc->status_use_accessors &= ~_IRQ_OOB; +} + +static inline void irq_settings_set_oob(struct irq_desc *desc) +{ + desc->status_use_accessors |= _IRQ_OOB; +} + +static inline bool irq_settings_is_chained(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_CHAINED; +} + +static inline void irq_settings_set_chained(struct irq_desc *desc) +{ + desc->status_use_accessors |= _IRQ_CHAINED; +} + +static inline void irq_settings_clr_chained(struct irq_desc *desc) +{ + desc->status_use_accessors &= ~_IRQ_CHAINED; +} diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/irq_work.c linux-dovetail-v5.15.y-dovetail/kernel/irq_work.c --- linux-5.15.26/kernel/irq_work.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/irq_work.c 2022-03-10 09:47:50.000000000 +0100 @@ -49,6 +49,11 @@ void __weak arch_irq_work_raise(void) */ } +void __weak irq_local_work_raise(void) +{ + arch_irq_work_raise(); +} + /* Enqueue on current CPU, work must already be claimed and preempt disabled */ static void __irq_work_queue_local(struct irq_work *work) { @@ -56,10 +61,10 @@ static void __irq_work_queue_local(struc if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) { if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) && tick_nohz_tick_stopped()) - arch_irq_work_raise(); + irq_local_work_raise(); } else { if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list))) - arch_irq_work_raise(); + irq_local_work_raise(); } } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/Kconfig.dovetail linux-dovetail-v5.15.y-dovetail/kernel/Kconfig.dovetail --- linux-5.15.26/kernel/Kconfig.dovetail 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/Kconfig.dovetail 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,23 @@ + +# DOVETAIL dual-kernel interface +config HAVE_DOVETAIL + bool + +# Selecting ARCH_WANT_IRQS_OFF_ACTIVATE_MM in this generic Kconfig +# portion is ugly, but the whole ARCH_WANT_IRQS_OFF_ACTIVATE_MM logic +# is a temporary kludge which is meant to disappear anyway. See +# the related comments in exec_mmap() for details. +config DOVETAIL + bool "Dovetail interface" + depends on HAVE_DOVETAIL + select IRQ_PIPELINE + select ARCH_WANT_IRQS_OFF_ACTIVATE_MM + default n + help + Activate this option if you want to enable the interface for + running a secondary kernel side-by-side with Linux (aka + "dual kernel" configuration). + +config DOVETAIL_LEGACY_SYSCALL_RANGE + depends on DOVETAIL + def_bool y diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/kthread.c linux-dovetail-v5.15.y-dovetail/kernel/kthread.c --- linux-5.15.26/kernel/kthread.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/kthread.c 2022-03-10 09:47:50.000000000 +0100 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1346,6 +1347,7 @@ void kthread_use_mm(struct mm_struct *mm { struct mm_struct *active_mm; struct task_struct *tsk = current; + unsigned long flags; WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); WARN_ON_ONCE(tsk->mm); @@ -1354,6 +1356,7 @@ void kthread_use_mm(struct mm_struct *mm /* Hold off tlb flush IPIs while switching mm's */ local_irq_disable(); active_mm = tsk->active_mm; + protect_inband_mm(flags); if (active_mm != mm) { mmgrab(mm); tsk->active_mm = mm; @@ -1361,6 +1364,7 @@ void kthread_use_mm(struct mm_struct *mm tsk->mm = mm; membarrier_update_current_mm(mm); switch_mm_irqs_off(active_mm, mm, tsk); + unprotect_inband_mm(flags); local_irq_enable(); task_unlock(tsk); #ifdef finish_arch_post_lock_switch diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/locking/lockdep.c linux-dovetail-v5.15.y-dovetail/kernel/locking/lockdep.c --- linux-5.15.26/kernel/locking/lockdep.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/locking/lockdep.c 2022-03-10 09:47:50.000000000 +0100 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -105,9 +106,56 @@ static __always_inline bool lockdep_enab static arch_spinlock_t __lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; static struct task_struct *__owner; +static __always_inline bool lockdep_stage_disabled(void) +{ + return stage_disabled(); +} + +#ifdef CONFIG_IRQ_PIPELINE +/* + * If LOCKDEP is enabled, we want irqs to be disabled for both stages + * when traversing the lockdep code for hard and hybrid locks (at the + * expense of massive latency overhead though). + */ +static __always_inline unsigned long lockdep_stage_test_and_disable(int *irqsoff) +{ + return test_and_lock_stage(irqsoff); +} + +static __always_inline unsigned long lockdep_stage_disable(void) +{ + return lockdep_stage_test_and_disable(NULL); +} + +static __always_inline void lockdep_stage_restore(unsigned long flags) +{ + unlock_stage(flags); +} + +#else + +#define lockdep_stage_test_and_disable(__irqsoff) \ + ({ \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ + *(__irqsoff) = irqs_disabled_flags(__flags); \ + __flags; \ + }) + +#define lockdep_stage_disable() \ + ({ \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ + __flags; \ + }) + +#define lockdep_stage_restore(__flags) raw_local_irq_restore(__flags) + +#endif /* !CONFIG_IRQ_PIPELINE */ + static inline void lockdep_lock(void) { - DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + DEBUG_LOCKS_WARN_ON(!lockdep_stage_disabled()); __this_cpu_inc(lockdep_recursion); arch_spin_lock(&__lock); @@ -885,7 +933,7 @@ look_up_lock_class(const struct lockdep_ /* * We do an RCU walk of the hash, see lockdep_free_key_range(). */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + if (DEBUG_LOCKS_WARN_ON(!lockdep_stage_disabled())) return NULL; hlist_for_each_entry_rcu_notrace(class, hash_head, hash_entry) { @@ -1241,7 +1289,7 @@ register_lock_class(struct lockdep_map * struct hlist_head *hash_head; struct lock_class *class; - DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + DEBUG_LOCKS_WARN_ON(!lockdep_stage_disabled()); class = look_up_lock_class(lock, subclass); if (likely(class)) @@ -4258,7 +4306,7 @@ void lockdep_hardirqs_on_prepare(unsigne * already enabled, yet we find the hardware thinks they are in fact * enabled.. someone messed up their IRQ state tracing. */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + if (DEBUG_LOCKS_WARN_ON(!lockdep_stage_disabled())) return; /* @@ -4327,7 +4375,7 @@ void noinstr lockdep_hardirqs_on(unsigne * already enabled, yet we find the hardware thinks they are in fact * enabled.. someone messed up their IRQ state tracing. */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + if (DEBUG_LOCKS_WARN_ON(!lockdep_stage_disabled())) return; /* @@ -4369,7 +4417,7 @@ void noinstr lockdep_hardirqs_off(unsign * So we're supposed to get called after you mask local IRQs, but for * some reason the hardware doesn't quite think you did a proper job. */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + if (DEBUG_LOCKS_WARN_ON(!lockdep_stage_disabled())) return; if (lockdep_hardirqs_enabled()) { @@ -4402,7 +4450,7 @@ void lockdep_softirqs_on(unsigned long i * We fancy IRQs being disabled here, see softirq.c, avoids * funny state and nesting things. */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + if (DEBUG_LOCKS_WARN_ON(!lockdep_stage_disabled())) return; if (current->softirqs_enabled) { @@ -4439,7 +4487,7 @@ void lockdep_softirqs_off(unsigned long /* * We fancy IRQs being disabled here, see softirq.c */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + if (DEBUG_LOCKS_WARN_ON(!lockdep_stage_disabled())) return; if (current->softirqs_enabled) { @@ -5146,7 +5194,7 @@ static int reacquire_held_locks(struct t struct held_lock *hlock; int first_idx = idx; - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + if (DEBUG_LOCKS_WARN_ON(!lockdep_stage_disabled())) return 0; for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) { @@ -5457,7 +5505,13 @@ static void __lock_unpin_lock(struct loc static noinstr void check_flags(unsigned long flags) { #if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_DEBUG_LOCKDEP) - if (!debug_locks) + /* + * irq_pipeline: we can't and don't want to check the + * consistency of the irq tracer when running the interrupt + * entry prologue or oob stage code, since the inband stall + * bit does not reflect the current irq state in these cases. + */ + if (on_pipeline_entry() || running_oob() || !debug_locks) return; /* Get the warning out.. */ @@ -5592,6 +5646,7 @@ void lock_acquire(struct lockdep_map *lo struct lockdep_map *nest_lock, unsigned long ip) { unsigned long flags; + int irqsoff; trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); @@ -5618,14 +5673,14 @@ void lock_acquire(struct lockdep_map *lo return; } - raw_local_irq_save(flags); + flags = lockdep_stage_test_and_disable(&irqsoff); check_flags(flags); lockdep_recursion_inc(); __lock_acquire(lock, subclass, trylock, read, check, - irqs_disabled_flags(flags), nest_lock, ip, 0, 0); + irqsoff, nest_lock, ip, 0, 0); lockdep_recursion_finish(); - raw_local_irq_restore(flags); + lockdep_stage_restore(flags); } EXPORT_SYMBOL_GPL(lock_acquire); @@ -5638,14 +5693,14 @@ void lock_release(struct lockdep_map *lo if (unlikely(!lockdep_enabled())) return; - raw_local_irq_save(flags); + flags = lockdep_stage_disable(); check_flags(flags); lockdep_recursion_inc(); if (__lock_release(lock, ip)) check_chain_key(current); lockdep_recursion_finish(); - raw_local_irq_restore(flags); + lockdep_stage_restore(flags); } EXPORT_SYMBOL_GPL(lock_release); @@ -5661,13 +5716,13 @@ noinstr int lock_is_held_type(const stru if (unlikely(!lockdep_enabled())) return LOCK_STATE_UNKNOWN; - raw_local_irq_save(flags); + flags = lockdep_stage_disable(); check_flags(flags); lockdep_recursion_inc(); ret = __lock_is_held(lock, read); lockdep_recursion_finish(); - raw_local_irq_restore(flags); + lockdep_stage_restore(flags); return ret; } @@ -5854,12 +5909,12 @@ void lock_contended(struct lockdep_map * if (unlikely(!lock_stat || !lockdep_enabled())) return; - raw_local_irq_save(flags); + flags = lockdep_stage_disable(); check_flags(flags); lockdep_recursion_inc(); __lock_contended(lock, ip); lockdep_recursion_finish(); - raw_local_irq_restore(flags); + lockdep_stage_restore(flags); } EXPORT_SYMBOL_GPL(lock_contended); @@ -5872,12 +5927,12 @@ void lock_acquired(struct lockdep_map *l if (unlikely(!lock_stat || !lockdep_enabled())) return; - raw_local_irq_save(flags); + flags = lockdep_stage_disable(); check_flags(flags); lockdep_recursion_inc(); __lock_acquired(lock, ip); lockdep_recursion_finish(); - raw_local_irq_restore(flags); + lockdep_stage_restore(flags); } EXPORT_SYMBOL_GPL(lock_acquired); #endif @@ -6398,7 +6453,7 @@ void debug_check_no_locks_freed(const vo if (unlikely(!debug_locks)) return; - raw_local_irq_save(flags); + flags = lockdep_stage_disable(); for (i = 0; i < curr->lockdep_depth; i++) { hlock = curr->held_locks + i; @@ -6409,7 +6464,7 @@ void debug_check_no_locks_freed(const vo print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock); break; } - raw_local_irq_restore(flags); + lockdep_stage_restore(flags); } EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/locking/lockdep_internals.h linux-dovetail-v5.15.y-dovetail/kernel/locking/lockdep_internals.h --- linux-5.15.26/kernel/locking/lockdep_internals.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/locking/lockdep_internals.h 2022-03-10 09:47:50.000000000 +0100 @@ -211,12 +211,12 @@ extern struct lock_class lock_classes[MA this_cpu_inc(lockdep_stats.ptr); #define debug_atomic_inc(ptr) { \ - WARN_ON_ONCE(!irqs_disabled()); \ + WARN_ON_ONCE(!hard_irqs_disabled() && !irqs_disabled());\ __this_cpu_inc(lockdep_stats.ptr); \ } #define debug_atomic_dec(ptr) { \ - WARN_ON_ONCE(!irqs_disabled()); \ + WARN_ON_ONCE(!hard_irqs_disabled() && !irqs_disabled());\ __this_cpu_dec(lockdep_stats.ptr); \ } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/locking/Makefile linux-dovetail-v5.15.y-dovetail/kernel/locking/Makefile --- linux-5.15.26/kernel/locking/Makefile 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/locking/Makefile 2022-03-10 09:47:50.000000000 +0100 @@ -28,6 +28,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex_api. obj-$(CONFIG_PREEMPT_RT) += spinlock_rt.o ww_rt_mutex.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o +obj-$(CONFIG_IRQ_PIPELINE) += pipeline.o obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/locking/pipeline.c linux-dovetail-v5.15.y-dovetail/kernel/locking/pipeline.c --- linux-5.15.26/kernel/locking/pipeline.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/locking/pipeline.c 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,231 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2016 Philippe Gerum . + */ +#include +#include +#include +#include +#include +#include + +/* + * A hybrid spinlock behaves in different ways depending on the + * current interrupt stage on entry. + * + * Such spinlock always leaves hard IRQs disabled once locked. In + * addition, it stalls the in-band stage when protecting a critical + * section there, disabling preemption like regular spinlocks do as + * well. This combination preserves the regular locking logic when + * called from the in-band stage, while fully disabling preemption by + * other interrupt stages. + * + * When taken from the pipeline entry context, a hybrid lock behaves + * like a hard spinlock, assuming that hard IRQs are already disabled. + * + * The irq descriptor lock (struct irq_desc) is a typical example of + * such lock, which properly serializes accesses regardless of the + * calling context. + */ +void __hybrid_spin_lock(struct raw_spinlock *rlock) +{ + struct hybrid_spinlock *lock; + unsigned long __flags; + + if (running_inband()) + preempt_disable(); + + __flags = hard_local_irq_save(); + hard_lock_acquire(rlock, 0, _RET_IP_); + lock = container_of(rlock, struct hybrid_spinlock, rlock); + lock->hwflags = __flags; +} +EXPORT_SYMBOL(__hybrid_spin_lock); + +void __hybrid_spin_lock_nested(struct raw_spinlock *rlock, int subclass) +{ + struct hybrid_spinlock *lock; + unsigned long __flags; + + if (running_inband()) + preempt_disable(); + + __flags = hard_local_irq_save(); + hard_lock_acquire_nested(rlock, subclass, _RET_IP_); + lock = container_of(rlock, struct hybrid_spinlock, rlock); + lock->hwflags = __flags; +} +EXPORT_SYMBOL(__hybrid_spin_lock_nested); + +void __hybrid_spin_unlock(struct raw_spinlock *rlock) +{ + struct hybrid_spinlock *lock; + unsigned long __flags; + + /* Pick the flags before releasing the lock. */ + lock = container_of(rlock, struct hybrid_spinlock, rlock); + __flags = lock->hwflags; + hard_lock_release(rlock, _RET_IP_); + hard_local_irq_restore(__flags); + + if (running_inband()) + preempt_enable(); +} +EXPORT_SYMBOL(__hybrid_spin_unlock); + +void __hybrid_spin_lock_irq(struct raw_spinlock *rlock) +{ + struct hybrid_spinlock *lock; + unsigned long __flags; + + __flags = hard_local_irq_save(); + + if (running_inband()) { + stall_inband(); + trace_hardirqs_off(); + preempt_disable(); + } + + hard_lock_acquire(rlock, 0, _RET_IP_); + lock = container_of(rlock, struct hybrid_spinlock, rlock); + lock->hwflags = __flags; +} +EXPORT_SYMBOL(__hybrid_spin_lock_irq); + +void __hybrid_spin_unlock_irq(struct raw_spinlock *rlock) +{ + struct hybrid_spinlock *lock; + unsigned long __flags; + + /* Pick the flags before releasing the lock. */ + lock = container_of(rlock, struct hybrid_spinlock, rlock); + __flags = lock->hwflags; + hard_lock_release(rlock, _RET_IP_); + + if (running_inband()) { + trace_hardirqs_on(); + unstall_inband_nocheck(); + hard_local_irq_restore(__flags); + preempt_enable(); + return; + } + + hard_local_irq_restore(__flags); +} +EXPORT_SYMBOL(__hybrid_spin_unlock_irq); + +unsigned long __hybrid_spin_lock_irqsave(struct raw_spinlock *rlock) +{ + struct hybrid_spinlock *lock; + unsigned long __flags, flags; + + __flags = flags = hard_local_irq_save(); + + if (running_inband()) { + flags = test_and_stall_inband(); + trace_hardirqs_off(); + preempt_disable(); + } + + hard_lock_acquire(rlock, 0, _RET_IP_); + lock = container_of(rlock, struct hybrid_spinlock, rlock); + lock->hwflags = __flags; + + return flags; +} +EXPORT_SYMBOL(__hybrid_spin_lock_irqsave); + +void __hybrid_spin_unlock_irqrestore(struct raw_spinlock *rlock, + unsigned long flags) +{ + struct hybrid_spinlock *lock; + unsigned long __flags; + + /* Pick the flags before releasing the lock. */ + lock = container_of(rlock, struct hybrid_spinlock, rlock); + __flags = lock->hwflags; + hard_lock_release(rlock, _RET_IP_); + + if (running_inband()) { + if (!flags) { + trace_hardirqs_on(); + unstall_inband_nocheck(); + } + hard_local_irq_restore(__flags); + preempt_enable(); + return; + } + + hard_local_irq_restore(__flags); +} +EXPORT_SYMBOL(__hybrid_spin_unlock_irqrestore); + +int __hybrid_spin_trylock(struct raw_spinlock *rlock) +{ + struct hybrid_spinlock *lock; + unsigned long __flags; + + if (running_inband()) + preempt_disable(); + + lock = container_of(rlock, struct hybrid_spinlock, rlock); + __flags = hard_local_irq_save(); + + hard_spin_trylock_prepare(rlock); + if (do_raw_spin_trylock(rlock)) { + lock->hwflags = __flags; + hard_trylock_acquire(rlock, 1, _RET_IP_); + return 1; + } + + hard_spin_trylock_fail(rlock); + hard_local_irq_restore(__flags); + + if (running_inband()) + preempt_enable(); + + return 0; +} +EXPORT_SYMBOL(__hybrid_spin_trylock); + +int __hybrid_spin_trylock_irqsave(struct raw_spinlock *rlock, + unsigned long *flags) +{ + struct hybrid_spinlock *lock; + unsigned long __flags; + bool inband; + + inband = running_inband(); + + __flags = *flags = hard_local_irq_save(); + + lock = container_of(rlock, struct hybrid_spinlock, rlock); + if (inband) { + *flags = test_and_stall_inband(); + trace_hardirqs_off(); + preempt_disable(); + } + + hard_spin_trylock_prepare(rlock); + if (do_raw_spin_trylock(rlock)) { + hard_trylock_acquire(rlock, 1, _RET_IP_); + lock->hwflags = __flags; + return 1; + } + + hard_spin_trylock_fail(rlock); + + if (inband && !*flags) { + trace_hardirqs_on(); + unstall_inband_nocheck(); + } + + hard_local_irq_restore(__flags); + + if (inband) + preempt_enable(); + + return 0; +} +EXPORT_SYMBOL(__hybrid_spin_trylock_irqsave); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/locking/spinlock_debug.c linux-dovetail-v5.15.y-dovetail/kernel/locking/spinlock_debug.c --- linux-5.15.26/kernel/locking/spinlock_debug.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/locking/spinlock_debug.c 2022-03-10 09:47:50.000000000 +0100 @@ -116,6 +116,7 @@ void do_raw_spin_lock(raw_spinlock_t *lo mmiowb_spin_lock(); debug_spin_lock_after(lock); } +EXPORT_SYMBOL_GPL(do_raw_spin_lock); int do_raw_spin_trylock(raw_spinlock_t *lock) { @@ -133,6 +134,7 @@ int do_raw_spin_trylock(raw_spinlock_t * #endif return ret; } +EXPORT_SYMBOL_GPL(do_raw_spin_trylock); void do_raw_spin_unlock(raw_spinlock_t *lock) { @@ -140,6 +142,7 @@ void do_raw_spin_unlock(raw_spinlock_t * debug_spin_unlock(lock); arch_spin_unlock(&lock->raw_lock); } +EXPORT_SYMBOL_GPL(do_raw_spin_unlock); #ifndef CONFIG_PREEMPT_RT static void rwlock_bug(rwlock_t *lock, const char *msg) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/Makefile linux-dovetail-v5.15.y-dovetail/kernel/Makefile --- linux-5.15.26/kernel/Makefile 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/Makefile 2022-03-10 09:47:50.000000000 +0100 @@ -109,6 +109,7 @@ obj-$(CONFIG_TRACE_CLOCK) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_IRQ_WORK) += irq_work.o +obj-$(CONFIG_DOVETAIL) += dovetail.o obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ obj-$(CONFIG_KCSAN) += kcsan/ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/notifier.c linux-dovetail-v5.15.y-dovetail/kernel/notifier.c --- linux-5.15.26/kernel/notifier.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/notifier.c 2022-03-10 09:47:50.000000000 +0100 @@ -194,6 +194,9 @@ int atomic_notifier_call_chain(struct at { int ret; + if (!running_inband()) + return notifier_call_chain(&nh->head, val, v, -1, NULL); + rcu_read_lock(); ret = notifier_call_chain(&nh->head, val, v, -1, NULL); rcu_read_unlock(); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/panic.c linux-dovetail-v5.15.y-dovetail/kernel/panic.c --- linux-5.15.26/kernel/panic.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/panic.c 2022-03-10 09:47:50.000000000 +0100 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +51,7 @@ static unsigned long tainted_mask = IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0; static int pause_on_oops; static int pause_on_oops_flag; -static DEFINE_SPINLOCK(pause_on_oops_lock); +static DEFINE_HARD_SPINLOCK(pause_on_oops_lock); bool crash_kexec_post_notifiers; int panic_on_warn __read_mostly; unsigned long panic_on_taint; @@ -190,8 +191,9 @@ void panic(const char *fmt, ...) * there is nothing to prevent an interrupt handler (that runs * after setting panic_cpu) from invoking panic() again. */ - local_irq_disable(); + hard_local_irq_disable(); preempt_disable_notrace(); + irq_pipeline_oops(); /* * It's possible to come here directly from a panic-assertion and @@ -267,9 +269,12 @@ void panic(const char *fmt, ...) /* * Run any panic handlers, including those that might need to - * add information to the kmsg dump output. + * add information to the kmsg dump output. Skip panic + * handlers if running over the oob stage, as they would most + * likely break. */ - atomic_notifier_call_chain(&panic_notifier_list, 0, buf); + if (running_inband()) + atomic_notifier_call_chain(&panic_notifier_list, 0, buf); kmsg_dump(KMSG_DUMP_PANIC); @@ -472,7 +477,7 @@ static void do_oops_enter_exit(void) if (!pause_on_oops) return; - spin_lock_irqsave(&pause_on_oops_lock, flags); + raw_spin_lock_irqsave(&pause_on_oops_lock, flags); if (pause_on_oops_flag == 0) { /* This CPU may now print the oops message */ pause_on_oops_flag = 1; @@ -482,21 +487,21 @@ static void do_oops_enter_exit(void) /* This CPU gets to do the counting */ spin_counter = pause_on_oops; do { - spin_unlock(&pause_on_oops_lock); + raw_spin_unlock(&pause_on_oops_lock); spin_msec(MSEC_PER_SEC); - spin_lock(&pause_on_oops_lock); + raw_spin_lock(&pause_on_oops_lock); } while (--spin_counter); pause_on_oops_flag = 0; } else { /* This CPU waits for a different one */ while (spin_counter) { - spin_unlock(&pause_on_oops_lock); + raw_spin_unlock(&pause_on_oops_lock); spin_msec(1); - spin_lock(&pause_on_oops_lock); + raw_spin_lock(&pause_on_oops_lock); } } } - spin_unlock_irqrestore(&pause_on_oops_lock, flags); + raw_spin_unlock_irqrestore(&pause_on_oops_lock, flags); } /* @@ -526,6 +531,7 @@ void oops_enter(void) { tracing_off(); /* can't trust the integrity of the kernel anymore: */ + irq_pipeline_oops(); debug_locks_off(); do_oops_enter_exit(); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/power/hibernate.c linux-dovetail-v5.15.y-dovetail/kernel/power/hibernate.c --- linux-5.15.26/kernel/power/hibernate.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/power/hibernate.c 2022-03-10 09:47:50.000000000 +0100 @@ -305,6 +305,7 @@ static int create_image(int platform_mod goto Enable_cpus; local_irq_disable(); + hard_cond_local_irq_disable(); system_state = SYSTEM_SUSPEND; @@ -472,6 +473,7 @@ static int resume_target_kernel(bool pla local_irq_disable(); system_state = SYSTEM_SUSPEND; + hard_cond_local_irq_disable(); error = syscore_suspend(); if (error) @@ -593,6 +595,7 @@ int hibernation_platform_enter(void) local_irq_disable(); system_state = SYSTEM_SUSPEND; + hard_cond_local_irq_disable(); syscore_suspend(); if (pm_wakeup_pending()) { error = -EAGAIN; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/printk/internal.h linux-dovetail-v5.15.y-dovetail/kernel/printk/internal.h --- linux-5.15.26/kernel/printk/internal.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/printk/internal.h 2022-03-10 09:47:50.000000000 +0100 @@ -38,6 +38,26 @@ void defer_console_output(void); u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags); + +#ifdef CONFIG_IRQ_PIPELINE + +extern bool irq_pipeline_active; + +static inline bool printk_stage_safe(void) +{ + return running_inband() && + (!hard_irqs_disabled() || !irq_pipeline_active); +} + +#else + +static inline bool printk_stage_safe(void) +{ + return true; +} + +#endif + #else /* diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/printk/printk.c linux-dovetail-v5.15.y-dovetail/kernel/printk/printk.c --- linux-5.15.26/kernel/printk/printk.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/printk/printk.c 2022-03-10 09:47:50.000000000 +0100 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -1982,10 +1983,10 @@ static u8 *__printk_recursion_counter(vo bool success = true; \ \ typecheck(u8 *, recursion_ptr); \ - local_irq_save(flags); \ + prb_irq_save(flags); \ (recursion_ptr) = __printk_recursion_counter(); \ if (*(recursion_ptr) > PRINTK_MAX_RECURSION) { \ - local_irq_restore(flags); \ + prb_irq_restore(flags); \ success = false; \ } else { \ (*(recursion_ptr))++; \ @@ -1998,7 +1999,7 @@ static u8 *__printk_recursion_counter(vo do { \ typecheck(u8 *, recursion_ptr); \ (*(recursion_ptr))--; \ - local_irq_restore(flags); \ + prb_irq_restore(flags); \ } while (0) int printk_delay_msec __read_mostly; @@ -2121,9 +2122,6 @@ int vprintk_store(int facility, int leve */ ts_nsec = local_clock(); - if (!printk_enter_irqsave(recursion_ptr, irqflags)) - return 0; - /* * The sprintf needs to come first since the syslog prefix might be * passed in as a parameter. An extra byte must be reserved so that @@ -2147,6 +2145,10 @@ int vprintk_store(int facility, int leve if (dev_info) flags |= LOG_NEWLINE; + /* Disable interrupts as late as possible. */ + if (!printk_enter_irqsave(recursion_ptr, irqflags)) + return 0; + if (flags & LOG_CONT) { prb_rec_init_wr(&r, reserve_size); if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { @@ -2217,6 +2219,12 @@ asmlinkage int vprintk_emit(int facility if (unlikely(suppress_printk)) return 0; + if (unlikely(!printk_stage_safe())) { + printed_len = vprintk_store(facility, level, dev_info, fmt, args); + defer_console_output(); + return printed_len; + } + if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; in_sched = true; @@ -2323,6 +2331,73 @@ asmlinkage __visible void early_printk(c } #endif +#ifdef CONFIG_RAW_PRINTK +static struct console *raw_console; +static DEFINE_RAW_SPINLOCK(raw_console_lock); + +void raw_puts(const char *s, size_t len) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&raw_console_lock, flags); + if (raw_console) + raw_console->write_raw(raw_console, s, len); + raw_spin_unlock_irqrestore(&raw_console_lock, flags); +} +EXPORT_SYMBOL(raw_puts); + +void raw_vprintk(const char *fmt, va_list ap) +{ + char buf[256]; + size_t n; + + if (raw_console == NULL || console_suspended) + return; + + touch_nmi_watchdog(); + n = vscnprintf(buf, sizeof(buf), fmt, ap); + raw_puts(buf, n); +} +EXPORT_SYMBOL(raw_vprintk); + +asmlinkage __visible void raw_printk(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + raw_vprintk(fmt, ap); + va_end(ap); +} +EXPORT_SYMBOL(raw_printk); + +static inline void register_raw_console(struct console *newcon) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&raw_console_lock, flags); + if (newcon->write_raw) + raw_console = newcon; + raw_spin_unlock_irqrestore(&raw_console_lock, flags); +} + +static inline void unregister_raw_console(struct console *oldcon) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&raw_console_lock, flags); + if (oldcon == raw_console) + raw_console = NULL; + raw_spin_unlock_irqrestore(&raw_console_lock, flags); +} + +#else + +static inline void register_raw_console(struct console *newcon) { } + +static inline void unregister_raw_console(struct console *oldcon) { } + +#endif + static int __add_preferred_console(char *name, int idx, char *options, char *brl_options, bool user_specified) { @@ -2987,6 +3062,9 @@ void register_console(struct console *ne if (err || newcon->flags & CON_BRL) return; + /* The latest raw console to register is current. */ + register_raw_console(newcon); + /* * If we have a bootconsole, and are switching to a real console, * don't print everything out again, since when the boot console, and @@ -3072,6 +3150,8 @@ int unregister_console(struct console *c (console->flags & CON_BOOT) ? "boot" : "" , console->name, console->index); + unregister_raw_console(console); + res = _braille_unregister_console(console); if (res < 0) return res; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/printk/printk_ringbuffer.c linux-dovetail-v5.15.y-dovetail/kernel/printk/printk_ringbuffer.c --- linux-5.15.26/kernel/printk/printk_ringbuffer.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/printk/printk_ringbuffer.c 2022-03-10 09:47:50.000000000 +0100 @@ -1353,12 +1353,12 @@ bool prb_reserve_in_last(struct prb_rese struct prb_desc *d; unsigned long id; - local_irq_save(e->irqflags); + prb_irq_save(e->irqflags); /* Transition the newest descriptor back to the reserved state. */ d = desc_reopen_last(desc_ring, caller_id, &id); if (!d) { - local_irq_restore(e->irqflags); + prb_irq_restore(e->irqflags); goto fail_reopen; } @@ -1494,12 +1494,12 @@ bool prb_reserve(struct prb_reserved_ent * interrupts during the reserve/commit window in order to minimize * the likelihood of this happening. */ - local_irq_save(e->irqflags); + prb_irq_save(e->irqflags); if (!desc_reserve(rb, &id)) { /* Descriptor reservation failures are tracked. */ atomic_long_inc(&rb->fail); - local_irq_restore(e->irqflags); + prb_irq_restore(e->irqflags); goto fail; } @@ -1604,7 +1604,7 @@ static void _prb_commit(struct prb_reser } /* Restore interrupts, the reserve/commit window is finished. */ - local_irq_restore(e->irqflags); + prb_irq_restore(e->irqflags); } /** diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/printk/printk_ringbuffer.h linux-dovetail-v5.15.y-dovetail/kernel/printk/printk_ringbuffer.h --- linux-5.15.26/kernel/printk/printk_ringbuffer.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/printk/printk_ringbuffer.h 2022-03-10 09:47:50.000000000 +0100 @@ -222,6 +222,18 @@ enum desc_state { #define DESC0_SV(ct_bits) DESC_SV(DESC0_ID(ct_bits), desc_reusable) /* + * When interrupt pipelining is enabled, we want the critical sections + * to be protected against preemption by out-of-band code. + */ +#ifdef CONFIG_IRQ_PIPELINE +#define prb_irq_save(__flags) do { (__flags) = hard_local_irq_save(); } while (0) +#define prb_irq_restore(__flags) hard_local_irq_restore(__flags) +#else +#define prb_irq_save(__flags) local_irq_save(__flags) +#define prb_irq_restore(__flags) local_irq_restore(__flags) +#endif + +/* * Define a ringbuffer with an external text data buffer. The same as * DEFINE_PRINTKRB() but requires specifying an external buffer for the * text data. diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/printk/printk_safe.c linux-dovetail-v5.15.y-dovetail/kernel/printk/printk_safe.c --- linux-5.15.26/kernel/printk/printk_safe.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/printk/printk_safe.c 2022-03-10 09:47:50.000000000 +0100 @@ -9,6 +9,7 @@ #include #include #include +#include #include "internal.h" @@ -38,7 +39,7 @@ asmlinkage int vprintk(const char *fmt, * Use the main logbuf even in NMI. But avoid calling console * drivers that might have their own locks. */ - if (this_cpu_read(printk_context) || in_nmi()) { + if (this_cpu_read(printk_context) || !printk_stage_safe() || in_nmi()) { int len; len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/ptrace.c linux-dovetail-v5.15.y-dovetail/kernel/ptrace.c --- linux-5.15.26/kernel/ptrace.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/ptrace.c 2022-03-10 09:47:50.000000000 +0100 @@ -856,10 +856,12 @@ static int ptrace_resume(struct task_str if (unlikely(!arch_has_block_step())) return -EIO; user_enable_block_step(child); + inband_ptstep_notify(child); } else if (is_singlestep(request) || is_sysemu_singlestep(request)) { if (unlikely(!arch_has_single_step())) return -EIO; user_enable_single_step(child); + inband_ptstep_notify(child); } else { user_disable_single_step(child); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/rcu/tree.c linux-dovetail-v5.15.y-dovetail/kernel/rcu/tree.c --- linux-5.15.26/kernel/rcu/tree.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/rcu/tree.c 2022-03-10 09:47:50.000000000 +0100 @@ -244,6 +244,11 @@ static long rcu_get_n_cbs_cpu(int cpu) return 0; } +static inline bool rcu_in_nonmaskable(void) +{ + return on_pipeline_entry() || in_nmi(); +} + void rcu_softirq_qs(void) { rcu_qs(); @@ -768,7 +773,7 @@ noinstr void rcu_nmi_exit(void) trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks)); WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ - if (!in_nmi()) + if (!rcu_in_nonmaskable()) rcu_prepare_for_idle(); // instrumentation for the noinstr rcu_dynticks_eqs_enter() @@ -779,7 +784,7 @@ noinstr void rcu_nmi_exit(void) rcu_dynticks_eqs_enter(); // ... but is no longer watching here. - if (!in_nmi()) + if (!rcu_in_nonmaskable()) rcu_dynticks_task_enter(); } @@ -946,7 +951,7 @@ void __rcu_irq_enter_check_tick(void) struct rcu_data *rdp = this_cpu_ptr(&rcu_data); // If we're here from NMI there's nothing to do. - if (in_nmi()) + if (rcu_in_nonmaskable()) return; RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(), @@ -1007,14 +1012,14 @@ noinstr void rcu_nmi_enter(void) */ if (rcu_dynticks_curr_cpu_in_eqs()) { - if (!in_nmi()) + if (!rcu_in_nonmaskable()) rcu_dynticks_task_exit(); // RCU is not watching here ... rcu_dynticks_eqs_exit(); // ... but is watching here. - if (!in_nmi()) { + if (!rcu_in_nonmaskable()) { instrumentation_begin(); rcu_cleanup_after_idle(); instrumentation_end(); @@ -1027,7 +1032,7 @@ noinstr void rcu_nmi_enter(void) instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); incby = 1; - } else if (!in_nmi()) { + } else if (!rcu_in_nonmaskable()) { instrumentation_begin(); rcu_irq_enter_check_tick(); } else { @@ -1105,10 +1110,11 @@ static void rcu_disable_urgency_upon_qs( /** * rcu_is_watching - see if RCU thinks that the current CPU is not idle * - * Return true if RCU is watching the running CPU, which means that this - * CPU can safely enter RCU read-side critical sections. In other words, - * if the current CPU is not in its idle loop or is in an interrupt or - * NMI handler, return true. + * Return true if RCU is watching the running CPU, which means that + * this CPU can safely enter RCU read-side critical sections. In + * other words, if the current CPU is not in its idle loop or is in an + * interrupt, a NMI handler or entering the interrupt pipeline, return + * true. * * Make notrace because it can be called by the internal functions of * ftrace, and making this notrace removes unnecessary recursion calls. @@ -1117,6 +1123,12 @@ notrace bool rcu_is_watching(void) { bool ret; + if (on_pipeline_entry()) + return true; + + if (WARN_ON_ONCE(irq_pipeline_debug() && running_oob())) + return false; + preempt_disable_notrace(); ret = !rcu_dynticks_curr_cpu_in_eqs(); preempt_enable_notrace(); @@ -1163,7 +1175,7 @@ bool rcu_lockdep_current_cpu_online(void struct rcu_node *rnp; bool ret = false; - if (in_nmi() || !rcu_scheduler_fully_active) + if (rcu_in_nonmaskable() || !rcu_scheduler_fully_active) return true; preempt_disable_notrace(); rdp = this_cpu_ptr(&rcu_data); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/rcu/tree_plugin.h linux-dovetail-v5.15.y-dovetail/kernel/rcu/tree_plugin.h --- linux-5.15.26/kernel/rcu/tree_plugin.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/rcu/tree_plugin.h 2022-03-10 09:47:50.000000000 +0100 @@ -625,8 +625,9 @@ static void rcu_read_unlock_special(stru bool preempt_bh_were_disabled = !!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)); - /* NMI handlers cannot block and cannot safely manipulate state. */ - if (in_nmi()) + /* Neither NMI handlers nor pipeline entry code can either + block or safely manipulate state. */ + if (rcu_in_nonmaskable()) return; local_irq_save(flags); @@ -815,7 +816,7 @@ void rcu_read_unlock_strict(void) struct rcu_data *rdp; if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) || - irqs_disabled() || preempt_count() || !rcu_state.gp_kthread) + on_pipeline_entry() || irqs_disabled() || preempt_count() || !rcu_state.gp_kthread) return; rdp = this_cpu_ptr(&rcu_data); rcu_report_qs_rdp(rdp); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/rcu/update.c linux-dovetail-v5.15.y-dovetail/kernel/rcu/update.c --- linux-5.15.26/kernel/rcu/update.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/rcu/update.c 2022-03-10 09:47:50.000000000 +0100 @@ -101,6 +101,11 @@ module_param(rcu_normal_after_boot, int, */ static bool rcu_read_lock_held_common(bool *ret) { + if (irqs_pipelined() && + (hard_irqs_disabled() || running_oob())) { + *ret = true; + return true; + } if (!debug_lockdep_rcu_enabled()) { *ret = true; return true; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/sched/core.c linux-dovetail-v5.15.y-dovetail/kernel/sched/core.c --- linux-5.15.26/kernel/sched/core.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/sched/core.c 2022-03-10 09:47:50.000000000 +0100 @@ -2817,6 +2817,8 @@ static int __set_cpus_allowed_ptr_locked if (flags & SCA_USER) user_mask = clear_user_cpus_ptr(p); + inband_migration_notify(p, dest_cpu); + ret = affine_move_task(rq, p, rf, dest_cpu, flags); kfree(user_mask); @@ -3966,7 +3968,7 @@ try_to_wake_up(struct task_struct *p, un * - we're serialized against set_special_state() by virtue of * it disabling IRQs (this allows not taking ->pi_lock). */ - if (!ttwu_state_match(p, state, &success)) + if (!ttwu_state_match(p, state, &success) || task_is_off_stage(p)) goto out; trace_sched_waking(p); @@ -3983,7 +3985,7 @@ try_to_wake_up(struct task_struct *p, un */ raw_spin_lock_irqsave(&p->pi_lock, flags); smp_mb__after_spinlock(); - if (!ttwu_state_match(p, state, &success)) + if (!ttwu_state_match(p, state, &success) || task_is_off_stage(p)) goto unlock; trace_sched_waking(p); @@ -4225,6 +4227,9 @@ static void __sched_fork(unsigned long c p->wake_entry.u_flags = CSD_TYPE_TTWU; p->migration_pending = NULL; #endif +#ifdef CONFIG_IRQ_PIPELINE + init_task_stall_bits(p); +#endif } DEFINE_STATIC_KEY_FALSE(sched_numa_balancing); @@ -4750,6 +4755,13 @@ prepare_task_switch(struct rq *rq, struc fire_sched_out_preempt_notifiers(prev, next); kmap_local_sched_out(); prepare_task(next); + prepare_inband_switch(next); + /* + * Do not fold the following hard irqs disabling into + * prepare_inband_switch(), this is required when pipelining + * interrupts, not only by alternate scheduling. + */ + hard_cond_local_irq_disable(); prepare_arch_switch(next); } @@ -4875,8 +4887,19 @@ asmlinkage __visible void schedule_tail( * finish_task_switch() will drop rq->lock() and lower preempt_count * and the preempt_enable() will end up enabling preemption (on * PREEMPT_COUNT kernels). + * + * If interrupts are pipelined, we may enable hard irqs since + * the in-band stage is stalled. If dovetailing is enabled + * too, schedule_tail() is the place where transitions of + * tasks from the in-band to the oob stage completes. The + * companion core is notified that 'prev' is now suspended in + * the in-band stage, and can be safely resumed in the oob + * stage. */ + WARN_ON_ONCE(irq_pipeline_debug() && !irqs_disabled()); + hard_cond_local_irq_enable(); + oob_trampoline(); finish_task_switch(prev); preempt_enable(); @@ -4929,6 +4952,20 @@ context_switch(struct rq *rq, struct tas */ switch_mm_irqs_off(prev->active_mm, next->mm, next); + /* + * If dovetail is enabled, insert a short window of + * opportunity for preemption by out-of-band IRQs + * before finalizing the context switch. + * dovetail_context_switch() can deal with preempting + * partially switched in-band contexts. + */ + if (dovetailing()) { + struct mm_struct *oldmm = prev->active_mm; + prev->active_mm = next->mm; + hard_local_irq_sync(); + prev->active_mm = oldmm; + } + if (!prev->mm) { // from kernel /* will mmdrop() in finish_task_switch(). */ rq->prev_mm = prev->active_mm; @@ -4944,6 +4981,15 @@ context_switch(struct rq *rq, struct tas switch_to(prev, next, prev); barrier(); + /* + * If 'next' is on its way to the oob stage, don't run the + * context switch epilogue just yet. We will do that at some + * point later, when the task switches back to the in-band + * stage. + */ + if (unlikely(inband_switch_tail())) + return NULL; + return finish_task_switch(prev); } @@ -5501,6 +5547,8 @@ static inline void schedule_debug(struct panic("corrupted shadow stack detected inside scheduler\n"); #endif + check_inband_stage(); + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP if (!preempt && READ_ONCE(prev->__state) && prev->non_block_count) { printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n", @@ -6166,7 +6214,7 @@ pick_next_task(struct rq *rq, struct tas * * WARNING: must be called with preemption disabled! */ -static void __sched notrace __schedule(unsigned int sched_mode) +static int __sched notrace __schedule(unsigned int sched_mode) { struct task_struct *prev, *next; unsigned long *switch_count; @@ -6289,6 +6337,9 @@ static void __sched notrace __schedule(u /* Also unlocks the rq: */ rq = context_switch(rq, prev, next, &rf); + if (dovetailing() && rq == NULL) + /* Task moved to the oob stage. */ + return 1; } else { rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); @@ -6296,6 +6347,8 @@ static void __sched notrace __schedule(u __balance_callbacks(rq); raw_spin_rq_unlock_irq(rq); } + + return 0; } void __noreturn do_task_dead(void) @@ -6367,7 +6420,8 @@ asmlinkage __visible void __sched schedu sched_submit_work(tsk); do { preempt_disable(); - __schedule(SM_NONE); + if (__schedule(SM_NONE)) + return; sched_preempt_enable_no_resched(); } while (need_resched()); sched_update_worker(tsk); @@ -6460,7 +6514,8 @@ static void __sched notrace preempt_sche */ preempt_disable_notrace(); preempt_latency_start(1); - __schedule(SM_PREEMPT); + if (__schedule(SM_PREEMPT)) + return; preempt_latency_stop(1); preempt_enable_no_resched_notrace(); @@ -6482,7 +6537,7 @@ asmlinkage __visible void __sched notrac * If there is a non-zero preempt_count or interrupts are disabled, * we do not want to preempt the current task. Just return.. */ - if (likely(!preemptible())) + if (likely(!running_inband() || !preemptible())) return; preempt_schedule_common(); @@ -6514,7 +6569,7 @@ asmlinkage __visible void __sched notrac { enum ctx_state prev_ctx; - if (likely(!preemptible())) + if (likely(!running_inband() || !preemptible())) return; do { @@ -6675,23 +6730,41 @@ __setup("preempt=", setup_preempt_mode); * off of irq context. * Note, that this is called and return with irqs disabled. This will * protect us against recursive calling from irq. + * + * IRQ pipeline: we are called with hard irqs off, synchronize the + * pipeline then return the same way, so that the in-band log is + * guaranteed empty and further interrupt delivery is postponed by the + * hardware until have exited the kernel. */ asmlinkage __visible void __sched preempt_schedule_irq(void) { enum ctx_state prev_state; + if (irq_pipeline_debug()) { + /* Catch any weirdness in pipelined entry code. */ + if (WARN_ON_ONCE(!running_inband())) + return; + WARN_ON_ONCE(!hard_irqs_disabled()); + } + + hard_cond_local_irq_enable(); + /* Catch callers which need to be fixed */ BUG_ON(preempt_count() || !irqs_disabled()); prev_state = exception_enter(); - do { + for (;;) { preempt_disable(); local_irq_enable(); __schedule(SM_PREEMPT); + sync_inband_irqs(); local_irq_disable(); sched_preempt_enable_no_resched(); - } while (need_resched()); + if (!need_resched()) + break; + hard_cond_local_irq_enable(); + } exception_exit(prev_state); } @@ -8151,6 +8224,8 @@ SYSCALL_DEFINE0(sched_yield) #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) int __sched __cond_resched(void) { + check_inband_stage(); + if (should_resched(0)) { preempt_schedule_common(); return 1; @@ -10812,6 +10887,231 @@ struct cgroup_subsys cpu_cgrp_subsys = { #endif /* CONFIG_CGROUP_SCHED */ +#ifdef CONFIG_DOVETAIL + +int dovetail_leave_inband(void) +{ + struct task_struct *p = current; + struct irq_pipeline_data *pd; + unsigned long flags; + + preempt_disable(); + + pd = raw_cpu_ptr(&irq_pipeline); + + if (WARN_ON_ONCE(dovetail_debug() && pd->task_inflight)) + goto out; /* Paranoid. */ + + raw_spin_lock_irqsave(&p->pi_lock, flags); + pd->task_inflight = p; + /* + * The scope of the off-stage state is broader than _TLF_OOB, + * in that it includes the transition path from the in-band + * context to the oob stage. + */ + set_thread_local_flags(_TLF_OFFSTAGE); + set_current_state(TASK_INTERRUPTIBLE); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + sched_submit_work(p); + /* + * The current task is scheduled out from the inband stage, + * before resuming on the oob stage. Since this code stands + * for the scheduling tail of the oob scheduler, + * arch_dovetail_switch_finish() is called to perform + * architecture-specific fixups (e.g. fpu context reload). + */ + if (likely(__schedule(SM_NONE))) { + arch_dovetail_switch_finish(false); + return 0; + } + + clear_thread_local_flags(_TLF_OFFSTAGE); + pd->task_inflight = NULL; +out: + preempt_enable(); + + return -ERESTARTSYS; +} +EXPORT_SYMBOL_GPL(dovetail_leave_inband); + +void dovetail_resume_inband(void) +{ + struct task_struct *p; + + p = __this_cpu_read(irq_pipeline.rqlock_owner); + if (WARN_ON_ONCE(dovetail_debug() && p == NULL)) + return; + + if (WARN_ON_ONCE(dovetail_debug() && (preempt_count() & STAGE_MASK))) + return; + + finish_task_switch(p); + preempt_enable(); + oob_trampoline(); +} +EXPORT_SYMBOL_GPL(dovetail_resume_inband); + +#ifdef CONFIG_KVM + +#include + +static inline void notify_guest_preempt(void) +{ + struct kvm_oob_notifier *nfy; + struct irq_pipeline_data *p; + + p = raw_cpu_ptr(&irq_pipeline); + nfy = p->vcpu_notify; + if (unlikely(nfy)) + nfy->handler(nfy); +} +#else +static inline void notify_guest_preempt(void) +{ } +#endif + +bool dovetail_context_switch(struct dovetail_altsched_context *out, + struct dovetail_altsched_context *in, + bool leave_inband) +{ + unsigned long pc __maybe_unused, lockdep_irqs; + struct task_struct *next, *prev, *last; + struct mm_struct *prev_mm, *next_mm; + bool inband_tail = false; + + WARN_ON_ONCE(dovetail_debug() && on_pipeline_entry()); + + if (leave_inband) { + struct task_struct *tsk = current; + /* + * We are about to leave the current inband context + * for switching to an out-of-band task, save the + * preempted context information. + */ + out->task = tsk; + out->active_mm = tsk->active_mm; + /* + * Switching out-of-band may require some housekeeping + * from a kernel VM which might currently run guest + * code, notify it about the upcoming preemption. + */ + notify_guest_preempt(); + } + + arch_dovetail_switch_prepare(leave_inband); + + next = in->task; + prev = out->task; + prev_mm = out->active_mm; + next_mm = in->active_mm; + + if (next_mm == NULL) { + in->active_mm = prev_mm; + in->borrowed_mm = true; + enter_lazy_tlb(prev_mm, next); + } else { + switch_oob_mm(prev_mm, next_mm, next); + /* + * We might be switching back to the inband context + * which we preempted earlier, shortly after "current" + * dropped its mm context in the do_exit() path + * (next->mm == NULL). In such a case, a lazy TLB + * state is expected when leaving the mm. + */ + if (next->mm == NULL) + enter_lazy_tlb(prev_mm, next); + } + + if (out->borrowed_mm) { + out->borrowed_mm = false; + out->active_mm = NULL; + } + + /* + * Tasks running out-of-band may alter the (in-band) + * preemption count as long as they don't trigger an in-band + * rescheduling, which Dovetail properly blocks. + * + * If the preemption count is not stack-based but a global + * per-cpu variable instead, changing it has a globally + * visible side-effect though, which is a problem if the + * out-of-band task is preempted and schedules away before the + * change is rolled back: this may cause the in-band context + * to later resume with a broken preemption count. + * + * For this reason, the preemption count of any context which + * blocks from the out-of-band stage is carried over and + * restored across switches, emulating a stack-based + * storage. + * + * Eventually, the count is reset to FORK_PREEMPT_COUNT upon + * transition from out-of-band to in-band stage, reinstating + * the value in effect when the converse transition happened + * at some point before. + */ + if (IS_ENABLED(CONFIG_HAVE_PERCPU_PREEMPT_COUNT)) + pc = preempt_count(); + + /* + * Like the preemption count and for the same reason, the irq + * state maintained by lockdep must be preserved across + * switches. + */ + lockdep_irqs = lockdep_read_irqs_state(); + + switch_to(prev, next, last); + barrier(); + + if (check_hard_irqs_disabled()) + hard_local_irq_disable(); + + /* + * If we entered this routine for switching to an out-of-band + * task but don't have _TLF_OOB set for the current context + * when resuming, this portion of code is the switch tail of + * the inband schedule() routine, finalizing a transition to + * the inband stage for the current task. Update the stage + * level as/if required. + */ + if (unlikely(!leave_inband && !test_thread_local_flags(_TLF_OOB))) { + if (IS_ENABLED(CONFIG_HAVE_PERCPU_PREEMPT_COUNT)) + preempt_count_set(FORK_PREEMPT_COUNT); + else if (unlikely(dovetail_debug() && + !(preempt_count() & STAGE_MASK))) + WARN_ON_ONCE(1); + else + preempt_count_sub(STAGE_OFFSET); + + lockdep_write_irqs_state(lockdep_irqs); + + /* + * Fixup the interrupt state conversely to what + * inband_switch_tail() does for the opposite stage + * switching direction. + */ + stall_inband(); + trace_hardirqs_off(); + inband_tail = true; + } else { + if (IS_ENABLED(CONFIG_HAVE_PERCPU_PREEMPT_COUNT)) + preempt_count_set(pc); + + lockdep_write_irqs_state(lockdep_irqs); + } + + arch_dovetail_switch_finish(leave_inband); + + /* + * inband_tail is true whenever we are finalizing a transition + * to the inband stage from the oob context for current. See + * above. + */ + return inband_tail; +} +EXPORT_SYMBOL_GPL(dovetail_context_switch); + +#endif /* CONFIG_DOVETAIL */ + void dump_cpu_task(int cpu) { pr_info("Task dump for CPU %d:\n", cpu); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/sched/idle.c linux-dovetail-v5.15.y-dovetail/kernel/sched/idle.c --- linux-5.15.26/kernel/sched/idle.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/sched/idle.c 2022-03-10 09:47:50.000000000 +0100 @@ -78,6 +78,7 @@ void __weak arch_cpu_idle_dead(void) { } void __weak arch_cpu_idle(void) { cpu_idle_force_poll = 1; + hard_local_irq_enable(); raw_local_irq_enable(); } @@ -85,13 +86,18 @@ void __weak arch_cpu_idle(void) * default_idle_call - Default CPU idle routine. * * To use when the cpuidle framework cannot be used. + * + * When interrupts are pipelined, this call is entered with hard irqs + * on and the in-band stage is stalled. Returns with hard irqs on, + * in-band stage stalled. irq_cpuidle_enter() then turns off hard irqs + * before synchronizing irqs, making sure we have no event lingering + * in the interrupt log as we go for a nap. */ void __cpuidle default_idle_call(void) { if (current_clr_polling_and_test()) { - local_irq_enable(); - } else { - + local_irq_enable_full(); + } else if (irq_cpuidle_enter(NULL, NULL)) { /* hard irqs off now */ trace_cpu_idle(1, smp_processor_id()); stop_critical_timings(); @@ -125,6 +131,8 @@ void __cpuidle default_idle_call(void) start_critical_timings(); trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); + } else { + local_irq_enable_full(); } } @@ -247,6 +255,13 @@ exit_idle: __current_set_polling(); /* + * Catch mishandling of the CPU's interrupt disable flag when + * pipelining IRQs. + */ + if (WARN_ON_ONCE(irq_pipeline_debug() && hard_irqs_disabled())) + hard_local_irq_enable(); + + /* * It is up to the idle functions to reenable local interrupts */ if (WARN_ON_ONCE(irqs_disabled())) @@ -304,6 +319,7 @@ static void do_idle(void) cpu_idle_poll(); } else { cpuidle_idle_call(); + WARN_ON_ONCE(irq_pipeline_debug() && hard_irqs_disabled()); } arch_cpu_idle_exit(); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/sched/sched.h linux-dovetail-v5.15.y-dovetail/kernel/sched/sched.h --- linux-5.15.26/kernel/sched/sched.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/sched/sched.h 2022-03-10 09:47:50.000000000 +0100 @@ -53,6 +53,8 @@ #include #include #include +#include +#include #include #include #include diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/sched/wait.c linux-dovetail-v5.15.y-dovetail/kernel/sched/wait.c --- linux-5.15.26/kernel/sched/wait.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/sched/wait.c 2022-03-10 09:47:50.000000000 +0100 @@ -85,6 +85,8 @@ static int __wake_up_common(struct wait_ wait_queue_entry_t *curr, *next; int cnt = 0; + check_inband_stage(); + lockdep_assert_held(&wq_head->lock); if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/signal.c linux-dovetail-v5.15.y-dovetail/kernel/signal.c --- linux-5.15.26/kernel/signal.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/signal.c 2022-03-10 09:47:50.000000000 +0100 @@ -760,6 +760,10 @@ still_pending: void signal_wake_up_state(struct task_struct *t, unsigned int state) { set_tsk_thread_flag(t, TIF_SIGPENDING); + + /* TIF_SIGPENDING must be set prior to notifying. */ + inband_signal_notify(t); + /* * TASK_WAKEKILL also means wake it up in the stopped/traced/killable * case. We don't check t->state here because there is a race with it @@ -981,8 +985,11 @@ static inline bool wants_signal(int sig, if (sig == SIGKILL) return true; - if (task_is_stopped_or_traced(p)) + if (task_is_stopped_or_traced(p)) { + if (!signal_pending(p)) + inband_signal_notify(p); return false; + } return task_curr(p) || !task_sigpending(p); } @@ -2237,6 +2244,7 @@ static void ptrace_stop(int exit_code, i * schedule() will not sleep if there is a pending signal that * can awaken the task. */ + inband_ptstop_notify(); set_special_state(TASK_TRACED); /* @@ -2330,6 +2338,8 @@ static void ptrace_stop(int exit_code, i read_unlock(&tasklist_lock); } + inband_ptcont_notify(); + /* * We are back. Now reacquire the siglock before touching * last_siginfo, so that we are sure to have synchronized with diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/stop_machine.c linux-dovetail-v5.15.y-dovetail/kernel/stop_machine.c --- linux-5.15.26/kernel/stop_machine.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/stop_machine.c 2022-03-10 09:47:50.000000000 +0100 @@ -232,8 +232,8 @@ static int multi_cpu_stop(void *data) curstate = newstate; switch (curstate) { case MULTI_STOP_DISABLE_IRQ: - local_irq_disable(); hard_irq_disable(); + local_irq_disable(); break; case MULTI_STOP_RUN: if (is_active) @@ -254,6 +254,7 @@ static int multi_cpu_stop(void *data) rcu_momentary_dyntick_idle(); } while (curstate != MULTI_STOP_EXIT); + hard_irq_enable(); local_irq_restore(flags); return err; } @@ -611,6 +612,7 @@ int stop_machine_cpuslocked(cpu_stop_fn_ local_irq_save(flags); hard_irq_disable(); ret = (*fn)(data); + hard_irq_enable(); local_irq_restore(flags); return ret; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/time/clockevents.c linux-dovetail-v5.15.y-dovetail/kernel/time/clockevents.c --- linux-5.15.26/kernel/time/clockevents.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/time/clockevents.c 2022-03-10 09:47:50.000000000 +0100 @@ -97,6 +97,7 @@ static int __clockevents_switch_state(st /* Transition with new state-specific callbacks */ switch (state) { case CLOCK_EVT_STATE_DETACHED: + case CLOCK_EVT_STATE_RESERVED: /* The clockevent device is getting replaced. Shut it down. */ case CLOCK_EVT_STATE_SHUTDOWN: @@ -436,6 +437,69 @@ int clockevents_unbind_device(struct clo } EXPORT_SYMBOL_GPL(clockevents_unbind_device); +#ifdef CONFIG_IRQ_PIPELINE + +/** + * clockevents_register_proxy - register a proxy device on the current CPU + * @dev: proxy to register + */ +int clockevents_register_proxy(struct clock_proxy_device *dev) +{ + struct clock_event_device *proxy_dev, *real_dev; + unsigned long flags; + u32 freq; + int ret; + + raw_spin_lock_irqsave(&clockevents_lock, flags); + + ret = tick_setup_proxy(dev); + if (ret) { + raw_spin_unlock_irqrestore(&clockevents_lock, flags); + return ret; + } + + proxy_dev = &dev->proxy_device; + clockevent_set_state(proxy_dev, CLOCK_EVT_STATE_DETACHED); + + list_add(&proxy_dev->list, &clockevent_devices); + tick_check_new_device(proxy_dev); + clockevents_notify_released(); + + raw_spin_unlock_irqrestore(&clockevents_lock, flags); + + real_dev = dev->real_device; + freq = (1000000000ULL * real_dev->mult) >> real_dev->shift; + printk(KERN_INFO "CPU%d: proxy tick device registered (%u.%02uMHz)\n", + smp_processor_id(), freq / 1000000, (freq / 10000) % 100); + + return ret; +} + +void clockevents_unregister_proxy(struct clock_proxy_device *dev) +{ + unsigned long flags; + int ret; + + clockevents_register_device(dev->real_device); + clockevents_switch_state(dev->real_device, CLOCK_EVT_STATE_DETACHED); + + /* + * Pop the proxy device, do not give it back to the + * framework. + */ + raw_spin_lock_irqsave(&clockevents_lock, flags); + ret = clockevents_replace(&dev->proxy_device); + raw_spin_unlock_irqrestore(&clockevents_lock, flags); + + if (WARN_ON(ret)) + return; + + printk(KERN_INFO "CPU%d: proxy tick device unregistered\n", + smp_processor_id()); +} + +#endif + /** * clockevents_register_device - register a clock event device * @dev: device to register @@ -574,8 +638,17 @@ void clockevents_exchange_device(struct */ if (old) { module_put(old->owner); - clockevents_switch_state(old, CLOCK_EVT_STATE_DETACHED); - list_move(&old->list, &clockevents_released); + /* + * Do not move a proxy tick device to the release + * list, keep it around but mark it as reserved. + */ + if (new && new->features & CLOCK_EVT_FEAT_PROXY) { + list_del(&old->list); + clockevents_switch_state(old, CLOCK_EVT_STATE_RESERVED); + } else { + clockevents_switch_state(old, CLOCK_EVT_STATE_DETACHED); + list_move(&old->list, &clockevents_released); + } } if (new) { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/time/clocksource.c linux-dovetail-v5.15.y-dovetail/kernel/time/clocksource.c --- linux-5.15.26/kernel/time/clocksource.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/time/clocksource.c 2022-03-10 09:47:50.000000000 +0100 @@ -1131,8 +1131,8 @@ void __clocksource_update_freq_scale(str clocksource_update_max_deferment(cs); - pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", - cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); + pr_info("%s: freq: %Lu Hz, mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", + cs->name, (u64)freq * scale, cs->mask, cs->max_cycles, cs->max_idle_ns); } EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale); @@ -1401,10 +1401,36 @@ static ssize_t available_clocksource_sho } static DEVICE_ATTR_RO(available_clocksource); +/** + * vdso_clocksource_show - sysfs interface for vDSO type of + * current clocksource + * @dev: unused + * @attr: unused + * @buf: char buffer to be filled with vDSO type + */ +static ssize_t vdso_clocksource_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t count = 0, type; + + mutex_lock(&clocksource_mutex); + type = curr_clocksource->vdso_type; + count = snprintf(buf, PAGE_SIZE, "%s\n", + type == CLOCKSOURCE_VDSO_NONE ? "none" : + type == CLOCKSOURCE_VDSO_ARCHITECTED ? "architected" : + "mmio"); + mutex_unlock(&clocksource_mutex); + + return count; +} +static DEVICE_ATTR_RO(vdso_clocksource); + static struct attribute *clocksource_attrs[] = { &dev_attr_current_clocksource.attr, &dev_attr_unbind_clocksource.attr, &dev_attr_available_clocksource.attr, + &dev_attr_vdso_clocksource.attr, NULL }; ATTRIBUTE_GROUPS(clocksource); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/time/hrtimer.c linux-dovetail-v5.15.y-dovetail/kernel/time/hrtimer.c --- linux-5.15.26/kernel/time/hrtimer.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/time/hrtimer.c 2022-03-10 09:47:50.000000000 +0100 @@ -978,6 +978,7 @@ void clock_was_set(unsigned int bases) out_timerfd: timerfd_clock_was_set(); + inband_clock_was_set(); } static void clock_was_set_work(struct work_struct *work) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/time/Makefile linux-dovetail-v5.15.y-dovetail/kernel/time/Makefile --- linux-5.15.26/kernel/time/Makefile 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/time/Makefile 2022-03-10 09:47:50.000000000 +0100 @@ -16,6 +16,7 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROAD endif obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o +obj-$(CONFIG_IRQ_PIPELINE) += tick-proxy.o obj-$(CONFIG_LEGACY_TIMER_TICK) += tick-legacy.o obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/time/tick-broadcast.c linux-dovetail-v5.15.y-dovetail/kernel/time/tick-broadcast.c --- linux-5.15.26/kernel/time/tick-broadcast.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/time/tick-broadcast.c 2022-03-10 09:47:50.000000000 +0100 @@ -796,6 +796,14 @@ static int ___tick_broadcast_oneshot_con int ret = 0; ktime_t now; + /* + * If proxying the hardware timer for high-precision tick + * delivery to the out-of-band stage, the whole broadcast + * dance is a no-go. Deny entering deep idle. + */ + if (dev->features & CLOCK_EVT_FEAT_PROXY) + return -EBUSY; + raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/time/tick-common.c linux-dovetail-v5.15.y-dovetail/kernel/time/tick-common.c --- linux-5.15.26/kernel/time/tick-common.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/time/tick-common.c 2022-03-10 09:47:50.000000000 +0100 @@ -246,7 +246,8 @@ static void tick_setup_device(struct tic } else { handler = td->evtdev->event_handler; next_event = td->evtdev->next_event; - td->evtdev->event_handler = clockevents_handle_noop; + if (!clockevent_state_reserved(td->evtdev)) + td->evtdev->event_handler = clockevents_handle_noop; } td->evtdev = newdev; @@ -321,6 +322,17 @@ static bool tick_check_preferred(struct !cpumask_equal(curdev->cpumask, newdev->cpumask); } +static bool tick_check_is_proxy(struct clock_event_device *curdev) +{ + if (!irqs_pipelined()) + return false; + + /* + * Never replace an active proxy except when unregistering it. + */ + return curdev && curdev->features & CLOCK_EVT_FEAT_PROXY; +} + /* * Check whether the new device is a better fit than curdev. curdev * can be NULL ! @@ -328,6 +340,9 @@ static bool tick_check_preferred(struct bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev) { + if (tick_check_is_proxy(curdev)) + return false; + if (!tick_check_percpu(curdev, newdev, smp_processor_id())) return false; @@ -348,6 +363,9 @@ void tick_check_new_device(struct clock_ td = &per_cpu(tick_cpu_device, cpu); curdev = td->evtdev; + if (tick_check_is_proxy(curdev)) + goto out_bc; + if (!tick_check_replacement(curdev, newdev)) goto out_bc; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/time/tick-internal.h linux-dovetail-v5.15.y-dovetail/kernel/time/tick-internal.h --- linux-5.15.26/kernel/time/tick-internal.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/time/tick-internal.h 2022-03-10 09:47:50.000000000 +0100 @@ -50,12 +50,15 @@ static inline void clockevent_set_state( extern void clockevents_shutdown(struct clock_event_device *dev); extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); -extern void clockevents_switch_state(struct clock_event_device *dev, - enum clock_event_state state); extern int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, bool force); extern void clockevents_handle_noop(struct clock_event_device *dev); extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); +#ifdef CONFIG_IRQ_PIPELINE +int clockevents_register_proxy(struct clock_proxy_device *dev); +extern void clockevents_unregister_proxy(struct clock_proxy_device *dev); +int tick_setup_proxy(struct clock_proxy_device *dev); +#endif extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); /* Broadcasting support */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/time/tick-proxy.c linux-dovetail-v5.15.y-dovetail/kernel/time/tick-proxy.c --- linux-5.15.26/kernel/time/tick-proxy.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/time/tick-proxy.c 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,466 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2017 Philippe Gerum . + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "tick-internal.h" + +static unsigned int proxy_tick_irq; + +static DEFINE_MUTEX(proxy_mutex); + +static DEFINE_PER_CPU(struct clock_proxy_device, proxy_tick_device); + +static inline struct clock_event_device * +get_real_tick_device(struct clock_event_device *proxy_dev) +{ + return container_of(proxy_dev, struct clock_proxy_device, proxy_device)->real_device; +} + +static void proxy_event_handler(struct clock_event_device *real_dev) +{ + struct clock_proxy_device *dev = raw_cpu_ptr(&proxy_tick_device); + struct clock_event_device *proxy_dev = &dev->proxy_device; + + proxy_dev->event_handler(proxy_dev); +} + +static int proxy_set_state_oneshot(struct clock_event_device *dev) +{ + struct clock_event_device *real_dev = get_real_tick_device(dev); + unsigned long flags; + int ret; + + flags = hard_local_irq_save(); + ret = real_dev->set_state_oneshot(real_dev); + hard_local_irq_restore(flags); + + return ret; +} + +static int proxy_set_state_periodic(struct clock_event_device *dev) +{ + struct clock_event_device *real_dev = get_real_tick_device(dev); + unsigned long flags; + int ret; + + flags = hard_local_irq_save(); + ret = real_dev->set_state_periodic(real_dev); + hard_local_irq_restore(flags); + + return ret; +} + +static int proxy_set_state_oneshot_stopped(struct clock_event_device *dev) +{ + struct clock_event_device *real_dev = get_real_tick_device(dev); + unsigned long flags; + int ret; + + flags = hard_local_irq_save(); + ret = real_dev->set_state_oneshot_stopped(real_dev); + hard_local_irq_restore(flags); + + return ret; +} + +static int proxy_set_state_shutdown(struct clock_event_device *dev) +{ + struct clock_event_device *real_dev = get_real_tick_device(dev); + unsigned long flags; + int ret; + + flags = hard_local_irq_save(); + ret = real_dev->set_state_shutdown(real_dev); + hard_local_irq_restore(flags); + + return ret; +} + +static void proxy_suspend(struct clock_event_device *dev) +{ + struct clock_event_device *real_dev = get_real_tick_device(dev); + unsigned long flags; + + flags = hard_local_irq_save(); + real_dev->suspend(real_dev); + hard_local_irq_restore(flags); +} + +static void proxy_resume(struct clock_event_device *dev) +{ + struct clock_event_device *real_dev = get_real_tick_device(dev); + unsigned long flags; + + flags = hard_local_irq_save(); + real_dev->resume(real_dev); + hard_local_irq_restore(flags); +} + +static int proxy_tick_resume(struct clock_event_device *dev) +{ + struct clock_event_device *real_dev = get_real_tick_device(dev); + unsigned long flags; + int ret; + + flags = hard_local_irq_save(); + ret = real_dev->tick_resume(real_dev); + hard_local_irq_restore(flags); + + return ret; +} + +static void proxy_broadcast(const struct cpumask *mask) +{ + struct clock_proxy_device *dev = raw_cpu_ptr(&proxy_tick_device); + struct clock_event_device *real_dev = dev->real_device; + unsigned long flags; + + flags = hard_local_irq_save(); + real_dev->broadcast(mask); + hard_local_irq_restore(flags); +} + +static int proxy_set_next_event(unsigned long delay, + struct clock_event_device *dev) +{ + struct clock_event_device *real_dev = get_real_tick_device(dev); + unsigned long flags; + int ret; + + flags = hard_local_irq_save(); + ret = real_dev->set_next_event(delay, real_dev); + hard_local_irq_restore(flags); + + return ret; +} + +static int proxy_set_next_ktime(ktime_t expires, + struct clock_event_device *dev) +{ + struct clock_event_device *real_dev = get_real_tick_device(dev); + unsigned long flags; + int ret; + + flags = hard_local_irq_save(); + ret = real_dev->set_next_ktime(expires, real_dev); + hard_local_irq_restore(flags); + + return ret; +} + +static irqreturn_t proxy_irq_handler(int sirq, void *dev_id) +{ + struct clock_event_device *evt; + + /* + * Tricky: we may end up running this in-band IRQ handler + * because tick_notify_proxy() was posted either: + * + * - from the out-of-band stage via ->handle_oob_event() for + * emulating an in-band tick. In this case, the active tick + * device for the in-band timing core is the proxy device, + * whose event handler is still the same than the real tick + * device's. + * + * - directly by the clock chip driver on the local CPU via + * clockevents_handle_event(), for propagating a tick to the + * in-band stage nobody from the out-of-band stage is + * interested on i.e. no proxy device was registered on the + * receiving CPU, which was excluded from @cpumask in the call + * to tick_install_proxy(). In this case, the active tick + * device for the in-band timing core is a real clock event + * device. + * + * In both cases, we are running on the in-band stage, and we + * should fire the event handler of the currently active tick + * device for the in-band timing core. + */ + evt = raw_cpu_ptr(&tick_cpu_device)->evtdev; + evt->event_handler(evt); + + return IRQ_HANDLED; +} + +#define interpose_proxy_handler(__proxy, __real, __h) \ + do { \ + if ((__real)->__h) \ + (__proxy)->__h = proxy_ ## __h; \ + } while (0) + +/* + * Setup a proxy which is about to override the tick device on the + * current CPU. Called with clockevents_lock held and irqs off so that + * the tick device does not change under our feet. + */ +int tick_setup_proxy(struct clock_proxy_device *dev) +{ + struct clock_event_device *proxy_dev, *real_dev; + + real_dev = raw_cpu_ptr(&tick_cpu_device)->evtdev; + if ((real_dev->features & + (CLOCK_EVT_FEAT_PIPELINE|CLOCK_EVT_FEAT_ONESHOT)) + != (CLOCK_EVT_FEAT_PIPELINE|CLOCK_EVT_FEAT_ONESHOT)) { + WARN(1, "cannot use clockevent device %s in proxy mode!", + real_dev->name); + return -ENODEV; + } + + /* + * The assumption is that neither us nor clockevents_register_proxy() + * can fail afterwards, so this is ok to advertise the new proxy as + * built by setting dev->real_device early. + */ + dev->real_device = real_dev; + dev->__original_handler = real_dev->event_handler; + + /* + * Inherit the feature bits since the proxy device has the + * same capabilities than the real one we are overriding + * (including CLOCK_EVT_FEAT_C3STOP if present). + */ + proxy_dev = &dev->proxy_device; + memset(proxy_dev, 0, sizeof(*proxy_dev)); + proxy_dev->features = real_dev->features | + CLOCK_EVT_FEAT_PERCPU | CLOCK_EVT_FEAT_PROXY; + proxy_dev->name = "proxy"; + proxy_dev->irq = real_dev->irq; + proxy_dev->bound_on = -1; + proxy_dev->cpumask = cpumask_of(smp_processor_id()); + proxy_dev->rating = real_dev->rating + 1; + proxy_dev->mult = real_dev->mult; + proxy_dev->shift = real_dev->shift; + proxy_dev->max_delta_ticks = real_dev->max_delta_ticks; + proxy_dev->min_delta_ticks = real_dev->min_delta_ticks; + proxy_dev->max_delta_ns = real_dev->max_delta_ns; + proxy_dev->min_delta_ns = real_dev->min_delta_ns; + /* + * Interpose default handlers which are safe wrt preemption by + * the out-of-band stage. + */ + interpose_proxy_handler(proxy_dev, real_dev, set_state_oneshot); + interpose_proxy_handler(proxy_dev, real_dev, set_state_oneshot_stopped); + interpose_proxy_handler(proxy_dev, real_dev, set_state_periodic); + interpose_proxy_handler(proxy_dev, real_dev, set_state_shutdown); + interpose_proxy_handler(proxy_dev, real_dev, suspend); + interpose_proxy_handler(proxy_dev, real_dev, resume); + interpose_proxy_handler(proxy_dev, real_dev, tick_resume); + interpose_proxy_handler(proxy_dev, real_dev, broadcast); + interpose_proxy_handler(proxy_dev, real_dev, set_next_event); + interpose_proxy_handler(proxy_dev, real_dev, set_next_ktime); + + dev->__setup_handler(dev); + + return 0; +} + +static int enable_oob_timer(void *arg) /* hard_irqs_disabled() */ +{ + struct clock_proxy_device *dev = raw_cpu_ptr(&proxy_tick_device); + struct clock_event_device *real_dev; + + /* + * Install the out-of-band handler on this CPU's real clock + * device, then turn on out-of-band mode for the associated + * IRQ (duplicates are silently ignored if the IRQ is common + * to multiple CPUs). + */ + real_dev = dev->real_device; + real_dev->event_handler = dev->handle_oob_event; + real_dev->features |= CLOCK_EVT_FEAT_OOB; + barrier(); + + /* + * irq_switch_oob() grabs the IRQ descriptor lock which is + * mutable, so that is fine to invoke this routine with hard + * IRQs off. + */ + irq_switch_oob(real_dev->irq, true); + + return 0; +} + +struct proxy_install_arg { + void (*setup_proxy)(struct clock_proxy_device *dev); + int result; +}; + +static void register_proxy_device(void *arg) /* irqs_disabled() */ +{ + struct clock_proxy_device *dev = raw_cpu_ptr(&proxy_tick_device); + struct proxy_install_arg *req = arg; + int ret; + + dev->__setup_handler = req->setup_proxy; + ret = clockevents_register_proxy(dev); + if (ret) { + if (!req->result) + req->result = ret; + } else { + dev->real_device->event_handler = proxy_event_handler; + } +} + +int tick_install_proxy(void (*setup_proxy)(struct clock_proxy_device *dev), + const struct cpumask *cpumask) +{ + struct proxy_install_arg arg; + int ret, sirq; + + mutex_lock(&proxy_mutex); + + ret = -EAGAIN; + if (proxy_tick_irq) + goto out; + + sirq = irq_create_direct_mapping(synthetic_irq_domain); + if (WARN_ON(sirq == 0)) + goto out; + + ret = __request_percpu_irq(sirq, proxy_irq_handler, + IRQF_NO_THREAD, /* no IRQF_TIMER here. */ + "proxy tick", + &proxy_tick_device); + if (WARN_ON(ret)) { + irq_dispose_mapping(sirq); + goto out; + } + + proxy_tick_irq = sirq; + barrier(); + + /* + * Install a proxy tick device on each CPU. As the proxy + * device is picked, the previous (real) tick device is + * switched to reserved state by the clockevent core. + * Immediately after, the proxy device starts controlling the + * real device under the hood to carry out the timing requests + * it receives. + * + * For a short period of time, after the proxy device is + * installed, and until the real device IRQ is switched to + * out-of-band mode, the flow is as follows: + * + * [inband timing request] + * proxy_dev->set_next_event(proxy_dev) + * oob_program_event(proxy_dev) + * real_dev->set_next_event(real_dev) + * ... + * + * original_timer_handler() [in-band stage] + * clockevents_handle_event(real_dev) + * proxy_event_handler(real_dev) + * inband_event_handler(proxy_dev) + * + * Eventually, we substitute the original (in-band) clock + * event handler with the out-of-band handler for the real + * clock event device, then turn on out-of-band mode for the + * timer IRQ associated to the latter. These two steps are + * performed over a stop_machine() context, so that no tick + * can race with this code while we swap handlers. + * + * Once the hand over is complete, the flow is as follows: + * + * [inband timing request] + * proxy_dev->set_next_event(proxy_dev) + * oob_program_event(proxy_dev) + * real_dev->set_next_event(real_dev) + * ... + * + * inband_event_handler() [out-of-band stage] + * clockevents_handle_event(real_dev) + * handle_oob_event(proxy_dev) + * ...(inband tick emulation)... + * tick_notify_proxy() + * ... + * proxy_irq_handler(proxy_dev) [in-band stage] + * clockevents_handle_event(proxy_dev) + * inband_event_handler(proxy_dev) + */ + arg.setup_proxy = setup_proxy; + arg.result = 0; + on_each_cpu_mask(cpumask, register_proxy_device, &arg, true); + if (arg.result) { + tick_uninstall_proxy(cpumask); + return arg.result; + } + + /* + * Start ticking from the out-of-band interrupt stage upon + * receipt of out-of-band timer events. + */ + stop_machine(enable_oob_timer, NULL, cpumask); +out: + mutex_unlock(&proxy_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(tick_install_proxy); + +static int disable_oob_timer(void *arg) /* hard_irqs_disabled() */ +{ + struct clock_proxy_device *dev = raw_cpu_ptr(&proxy_tick_device); + struct clock_event_device *real_dev; + + dev = raw_cpu_ptr(&proxy_tick_device); + real_dev = dev->real_device; + real_dev->event_handler = dev->__original_handler; + real_dev->features &= ~CLOCK_EVT_FEAT_OOB; + barrier(); + + irq_switch_oob(real_dev->irq, false); + + return 0; +} + +static void unregister_proxy_device(void *arg) /* irqs_disabled() */ +{ + struct clock_proxy_device *dev = raw_cpu_ptr(&proxy_tick_device); + + if (dev->real_device) { + clockevents_unregister_proxy(dev); + dev->real_device = NULL; + } +} + +void tick_uninstall_proxy(const struct cpumask *cpumask) +{ + /* + * Undo all we did in tick_install_proxy(), handing over + * control of the tick device back to the inband code. + */ + mutex_lock(&proxy_mutex); + stop_machine(disable_oob_timer, NULL, cpu_online_mask); + on_each_cpu_mask(cpumask, unregister_proxy_device, NULL, true); + free_percpu_irq(proxy_tick_irq, &proxy_tick_device); + irq_dispose_mapping(proxy_tick_irq); + proxy_tick_irq = 0; + mutex_unlock(&proxy_mutex); +} +EXPORT_SYMBOL_GPL(tick_uninstall_proxy); + +void tick_notify_proxy(void) +{ + /* + * Schedule a tick on the proxy device to occur from the + * in-band stage, which will trigger proxy_irq_handler() at + * some point (i.e. when the in-band stage is back in control + * and not stalled). Note that we might be called from the + * in-band stage in some cases (see proxy_irq_handler()). + */ + irq_post_inband(proxy_tick_irq); +} +EXPORT_SYMBOL_GPL(tick_notify_proxy); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/time/vsyscall.c linux-dovetail-v5.15.y-dovetail/kernel/time/vsyscall.c --- linux-5.15.26/kernel/time/vsyscall.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/time/vsyscall.c 2022-03-10 09:47:50.000000000 +0100 @@ -69,16 +69,42 @@ static inline void update_vdso_data(stru vdso_ts->nsec = tk->tkr_mono.xtime_nsec; } +static void update_generic_mmio(struct vdso_data *vdata, struct timekeeper *tk) +{ +#ifdef CONFIG_GENERIC_CLOCKSOURCE_VDSO + const struct clocksource *cs = tk->tkr_mono.clock; + u16 seq; + + if (cs->vdso_type == (vdata->cs_type_seq >> 16)) + return; + + seq = vdata->cs_type_seq; + if (++seq == 0) + seq = 1; + + vdata->cs_type_seq = cs->vdso_type << 16 | seq; + + if (cs->vdso_type >= CLOCKSOURCE_VDSO_MMIO) + snprintf(vdata->cs_mmdev, sizeof(vdata->cs_mmdev), + "/dev/ucs/%u", cs->vdso_type - CLOCKSOURCE_VDSO_MMIO); +#endif +} + void update_vsyscall(struct timekeeper *tk) { struct vdso_data *vdata = __arch_get_k_vdso_data(); struct vdso_timestamp *vdso_ts; + unsigned long flags; s32 clock_mode; u64 nsec; + flags = hard_cond_local_irq_save(); + /* copy vsyscall data */ vdso_write_begin(vdata); + update_generic_mmio(vdata, tk); + clock_mode = tk->tkr_mono.clock->vdso_clock_mode; vdata[CS_HRES_COARSE].clock_mode = clock_mode; vdata[CS_RAW].clock_mode = clock_mode; @@ -110,13 +136,16 @@ void update_vsyscall(struct timekeeper * * If the current clocksource is not VDSO capable, then spare the * update of the high resolution parts. */ - if (clock_mode != VDSO_CLOCKMODE_NONE) + if (IS_ENABLED(CONFIG_GENERIC_CLOCKSOURCE_VDSO) || + clock_mode != VDSO_CLOCKMODE_NONE) update_vdso_data(vdata, tk); __arch_update_vsyscall(vdata, tk); vdso_write_end(vdata); + hard_cond_local_irq_restore(flags); + __arch_sync_vdso_data(vdata); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/trace/ftrace.c linux-dovetail-v5.15.y-dovetail/kernel/trace/ftrace.c --- linux-5.15.26/kernel/trace/ftrace.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/trace/ftrace.c 2022-03-10 09:47:50.000000000 +0100 @@ -6262,10 +6262,10 @@ static int ftrace_process_locs(struct mo * reason to cause large interrupt latencies while we do it. */ if (!mod) - local_irq_save(flags); + flags = hard_local_irq_save(); ftrace_update_code(mod, start_pg); if (!mod) - local_irq_restore(flags); + hard_local_irq_restore(flags); ret = 0; out: mutex_unlock(&ftrace_lock); @@ -6854,9 +6854,9 @@ void __init ftrace_init(void) unsigned long count, flags; int ret; - local_irq_save(flags); + flags = hard_local_irq_save(); ret = ftrace_dyn_arch_init(); - local_irq_restore(flags); + hard_local_irq_restore(flags); if (ret) goto failed; @@ -7011,7 +7011,15 @@ __ftrace_ops_list_func(unsigned long ip, } } while_for_each_ftrace_op(op); out: - preempt_enable_notrace(); + if (irqs_pipelined() && (hard_irqs_disabled() || !running_inband())) + /* + * Nothing urgent to schedule here. At latest the + * timer tick will pick up whatever the tracing + * functions kicked off. + */ + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); trace_clear_recursion(bit); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/trace/ring_buffer.c linux-dovetail-v5.15.y-dovetail/kernel/trace/ring_buffer.c --- linux-5.15.26/kernel/trace/ring_buffer.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/trace/ring_buffer.c 2022-03-10 09:47:50.000000000 +0100 @@ -3166,8 +3166,8 @@ rb_wakeups(struct trace_buffer *buffer, static __always_inline int trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { - unsigned int val = cpu_buffer->current_context; - unsigned long pc = preempt_count(); + unsigned int val; + unsigned long pc = preempt_count(), flags; int bit; if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) @@ -3176,6 +3176,10 @@ trace_recursive_lock(struct ring_buffer_ bit = pc & NMI_MASK ? RB_CTX_NMI : pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; + flags = hard_cond_local_irq_save(); + + val = cpu_buffer->current_context; + if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) { /* * It is possible that this was called by transitioning @@ -3185,6 +3189,7 @@ trace_recursive_lock(struct ring_buffer_ bit = RB_CTX_TRANSITION; if (val & (1 << (bit + cpu_buffer->nest))) { do_ring_buffer_record_recursion(); + hard_cond_local_irq_restore(flags); return 1; } } @@ -3192,14 +3197,20 @@ trace_recursive_lock(struct ring_buffer_ val |= (1 << (bit + cpu_buffer->nest)); cpu_buffer->current_context = val; + hard_cond_local_irq_restore(flags); + return 0; } static __always_inline void trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) { + unsigned long flags; + + flags = hard_cond_local_irq_save(); cpu_buffer->current_context &= cpu_buffer->current_context - (1 << cpu_buffer->nest); + hard_cond_local_irq_restore(flags); } /* The recursive locking above uses 5 bits */ diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/trace/trace_branch.c linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_branch.c --- linux-5.15.26/kernel/trace/trace_branch.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_branch.c 2022-03-10 09:47:50.000000000 +0100 @@ -53,7 +53,7 @@ probe_likely_condition(struct ftrace_lik if (unlikely(!tr)) return; - raw_local_irq_save(flags); + flags = hard_local_irq_save(); current->trace_recursion |= TRACE_BRANCH_BIT; data = this_cpu_ptr(tr->array_buffer.data); if (atomic_read(&data->disabled)) @@ -87,7 +87,7 @@ probe_likely_condition(struct ftrace_lik out: current->trace_recursion &= ~TRACE_BRANCH_BIT; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); } static inline diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/trace/trace.c linux-dovetail-v5.15.y-dovetail/kernel/trace/trace.c --- linux-5.15.26/kernel/trace/trace.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/trace/trace.c 2022-03-10 09:47:50.000000000 +0100 @@ -1134,9 +1134,9 @@ static void tracing_snapshot_instance_co return; } - local_irq_save(flags); + flags = hard_local_irq_save(); update_max_tr(tr, current, smp_processor_id(), cond_data); - local_irq_restore(flags); + hard_local_irq_restore(flags); } void tracing_snapshot_instance(struct trace_array *tr) @@ -1818,7 +1818,7 @@ update_max_tr(struct trace_array *tr, st if (tr->stop_count) return; - WARN_ON_ONCE(!irqs_disabled()); + WARN_ON_ONCE(!hard_irqs_disabled()); if (!tr->allocated_snapshot) { /* Only the nop tracer should hit this when disabling */ @@ -1862,7 +1862,7 @@ update_max_tr_single(struct trace_array if (tr->stop_count) return; - WARN_ON_ONCE(!irqs_disabled()); + WARN_ON_ONCE(!hard_irqs_disabled()); if (!tr->allocated_snapshot) { /* Only the nop tracer should hit this when disabling */ WARN_ON_ONCE(tr->current_trace != &nop_trace); @@ -2629,6 +2629,10 @@ unsigned int tracing_gen_ctx_irq_test(un trace_flags |= TRACE_FLAG_HARDIRQ; if (in_serving_softirq()) trace_flags |= TRACE_FLAG_SOFTIRQ; + if (running_oob()) + trace_flags |= TRACE_FLAG_OOB_STAGE; + if (irqs_pipelined() && hard_irqs_disabled()) + trace_flags |= TRACE_FLAG_IRQS_HARDOFF; if (tif_need_resched()) trace_flags |= TRACE_FLAG_NEED_RESCHED; @@ -7460,13 +7464,13 @@ tracing_snapshot_write(struct file *filp ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) break; - local_irq_disable(); + hard_local_irq_disable(); /* Now, we're going to swap */ if (iter->cpu_file == RING_BUFFER_ALL_CPUS) update_max_tr(tr, current, smp_processor_id(), NULL); else update_max_tr_single(tr, current, iter->cpu_file); - local_irq_enable(); + hard_local_irq_enable(); break; default: if (tr->allocated_snapshot) { diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/trace/trace_clock.c linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_clock.c --- linux-5.15.26/kernel/trace/trace_clock.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_clock.c 2022-03-10 09:47:50.000000000 +0100 @@ -97,7 +97,7 @@ u64 notrace trace_clock_global(void) int this_cpu; u64 now, prev_time; - raw_local_irq_save(flags); + flags = hard_local_irq_save(); this_cpu = raw_smp_processor_id(); @@ -139,7 +139,7 @@ u64 notrace trace_clock_global(void) arch_spin_unlock(&trace_clock_struct.lock); } out: - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); return now; } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/trace/trace_functions.c linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_functions.c --- linux-5.15.26/kernel/trace/trace_functions.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_functions.c 2022-03-10 09:47:50.000000000 +0100 @@ -233,7 +233,7 @@ function_stack_trace_call(unsigned long * Need to use raw, since this must be called before the * recursive protection is performed. */ - local_irq_save(flags); + flags = hard_local_irq_save(); cpu = raw_smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); @@ -245,7 +245,7 @@ function_stack_trace_call(unsigned long } atomic_dec(&data->disabled); - local_irq_restore(flags); + hard_local_irq_restore(flags); } static inline bool is_repeat_check(struct trace_array *tr, diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/trace/trace_functions_graph.c linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_functions_graph.c --- linux-5.15.26/kernel/trace/trace_functions_graph.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_functions_graph.c 2022-03-10 09:47:50.000000000 +0100 @@ -168,7 +168,7 @@ int trace_graph_entry(struct ftrace_grap if (tracing_thresh) return 1; - local_irq_save(flags); + flags = hard_local_irq_save(); cpu = raw_smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); @@ -180,7 +180,7 @@ int trace_graph_entry(struct ftrace_grap } atomic_dec(&data->disabled); - local_irq_restore(flags); + hard_local_irq_restore(flags); return ret; } @@ -248,7 +248,7 @@ void trace_graph_return(struct ftrace_gr return; } - local_irq_save(flags); + flags = hard_local_irq_save(); cpu = raw_smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); @@ -257,7 +257,7 @@ void trace_graph_return(struct ftrace_gr __trace_graph_return(tr, trace, trace_ctx); } atomic_dec(&data->disabled); - local_irq_restore(flags); + hard_local_irq_restore(flags); } void set_graph_array(struct trace_array *tr) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/trace/trace_irqsoff.c linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_irqsoff.c --- linux-5.15.26/kernel/trace/trace_irqsoff.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_irqsoff.c 2022-03-10 09:47:50.000000000 +0100 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "trace.h" @@ -26,7 +27,7 @@ static int tracer_enabled __read_most static DEFINE_PER_CPU(int, tracing_cpu); -static DEFINE_RAW_SPINLOCK(max_trace_lock); +static DEFINE_HARD_SPINLOCK(max_trace_lock); enum { TRACER_IRQS_OFF = (1 << 1), @@ -44,7 +45,7 @@ static int start_irqsoff_tracer(struct t static inline int preempt_trace(int pc) { - return ((trace_type & TRACER_PREEMPT_OFF) && pc); + return (running_inband() && (trace_type & TRACER_PREEMPT_OFF) && pc); } #else # define preempt_trace(pc) (0) @@ -55,7 +56,7 @@ static inline int irq_trace(void) { return ((trace_type & TRACER_IRQS_OFF) && - irqs_disabled()); + (hard_irqs_disabled() || (running_inband() && irqs_disabled()))); } #else # define irq_trace() (0) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/trace/trace_output.c linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_output.c --- linux-5.15.26/kernel/trace/trace_output.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_output.c 2022-03-10 09:47:50.000000000 +0100 @@ -455,14 +455,19 @@ int trace_print_lat_fmt(struct trace_seq int hardirq; int softirq; int nmi; + int oob; nmi = entry->flags & TRACE_FLAG_NMI; hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + oob = irqs_pipelined() && (entry->flags & TRACE_FLAG_OOB_STAGE); irqs_off = + (entry->flags & (TRACE_FLAG_IRQS_OFF|TRACE_FLAG_IRQS_HARDOFF)) == + (TRACE_FLAG_IRQS_OFF|TRACE_FLAG_IRQS_HARDOFF) ? '*' : + (entry->flags & TRACE_FLAG_IRQS_HARDOFF) ? 'D' : (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : - (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : + !irqs_pipelined() && (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.'; switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | @@ -482,6 +487,8 @@ int trace_print_lat_fmt(struct trace_seq } hardsoft_irq = + (nmi && oob) ? '#' : + oob ? '~' : (nmi && hardirq) ? 'Z' : nmi ? 'z' : (hardirq && softirq) ? 'H' : diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/trace/trace_preemptirq.c linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_preemptirq.c --- linux-5.15.26/kernel/trace/trace_preemptirq.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_preemptirq.c 2022-03-10 09:47:50.000000000 +0100 @@ -10,6 +10,7 @@ #include #include #include +#include #include "trace.h" #define CREATE_TRACE_POINTS @@ -113,6 +114,57 @@ __visible void trace_hardirqs_off_caller } EXPORT_SYMBOL(trace_hardirqs_off_caller); NOKPROBE_SYMBOL(trace_hardirqs_off_caller); + +#ifdef CONFIG_IRQ_PIPELINE + +void trace_hardirqs_off_pipelined(void) +{ + WARN_ON(irq_pipeline_debug() && !hard_irqs_disabled()); + + if (running_inband()) + trace_hardirqs_off(); +} +EXPORT_SYMBOL(trace_hardirqs_off_pipelined); +NOKPROBE_SYMBOL(trace_hardirqs_off_pipelined); + +void trace_hardirqs_on_pipelined(void) +{ + WARN_ON(irq_pipeline_debug() && !hard_irqs_disabled()); + + /* + * If the in-band stage of the kernel is current but the IRQ + * was not delivered because the latter is stalled, keep the + * tracing logic unaware of the receipt, so that no false + * positive is triggered in lockdep (e.g. IN-HARDIRQ-W -> + * HARDIRQ-ON-W). + */ + if (running_inband() && !irqs_disabled()) { + stall_inband(); + trace_hardirqs_on(); + unstall_inband_nocheck(); + } +} +EXPORT_SYMBOL(trace_hardirqs_on_pipelined); +NOKPROBE_SYMBOL(trace_hardirqs_on_pipelined); + +#else + +void trace_hardirqs_off_pipelined(void) +{ + trace_hardirqs_off(); +} +EXPORT_SYMBOL(trace_hardirqs_off_pipelined); +NOKPROBE_SYMBOL(trace_hardirqs_off_pipelined); + +void trace_hardirqs_on_pipelined(void) +{ + trace_hardirqs_on(); +} +EXPORT_SYMBOL(trace_hardirqs_on_pipelined); +NOKPROBE_SYMBOL(trace_hardirqs_on_pipelined); + +#endif + #endif /* CONFIG_TRACE_IRQFLAGS */ #ifdef CONFIG_TRACE_PREEMPT_TOGGLE diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/trace/trace_sched_wakeup.c linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_sched_wakeup.c --- linux-5.15.26/kernel/trace/trace_sched_wakeup.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_sched_wakeup.c 2022-03-10 09:47:50.000000000 +0100 @@ -483,7 +483,9 @@ probe_wakeup_sched_switch(void *ignore, if (likely(!is_tracing_stopped())) { wakeup_trace->max_latency = delta; + hard_local_irq_disable(); update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu, NULL); + hard_local_irq_enable(); } out_unlock: diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/kernel/trace/trace_stack.c linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_stack.c --- linux-5.15.26/kernel/trace/trace_stack.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/kernel/trace/trace_stack.c 2022-03-10 09:47:50.000000000 +0100 @@ -171,8 +171,9 @@ static void check_stack(unsigned long ip if (!object_is_on_stack(stack)) return; - /* Can't do this from NMI context (can cause deadlocks) */ - if (in_nmi()) + /* Can't do this from NMI or oob stage contexts (can cause + deadlocks) */ + if (in_nmi() || !running_inband()) return; local_irq_save(flags); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/lib/atomic64.c linux-dovetail-v5.15.y-dovetail/lib/atomic64.c --- linux-5.15.26/lib/atomic64.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/lib/atomic64.c 2022-03-10 09:47:50.000000000 +0100 @@ -25,15 +25,15 @@ * Ensure each lock is in a separate cacheline. */ static union { - raw_spinlock_t lock; + hard_spinlock_t lock; char pad[L1_CACHE_BYTES]; } atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = { [0 ... (NR_LOCKS - 1)] = { - .lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock), + .lock = __HARD_SPIN_LOCK_INITIALIZER(atomic64_lock.lock), }, }; -static inline raw_spinlock_t *lock_addr(const atomic64_t *v) +static inline hard_spinlock_t *lock_addr(const atomic64_t *v) { unsigned long addr = (unsigned long) v; @@ -45,7 +45,7 @@ static inline raw_spinlock_t *lock_addr( s64 generic_atomic64_read(const atomic64_t *v) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + hard_spinlock_t *lock = lock_addr(v); s64 val; raw_spin_lock_irqsave(lock, flags); @@ -58,7 +58,7 @@ EXPORT_SYMBOL(generic_atomic64_read); void generic_atomic64_set(atomic64_t *v, s64 i) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + hard_spinlock_t *lock = lock_addr(v); raw_spin_lock_irqsave(lock, flags); v->counter = i; @@ -70,7 +70,7 @@ EXPORT_SYMBOL(generic_atomic64_set); void generic_atomic64_##op(s64 a, atomic64_t *v) \ { \ unsigned long flags; \ - raw_spinlock_t *lock = lock_addr(v); \ + hard_spinlock_t *lock = lock_addr(v); \ \ raw_spin_lock_irqsave(lock, flags); \ v->counter c_op a; \ @@ -82,7 +82,7 @@ EXPORT_SYMBOL(generic_atomic64_##op); s64 generic_atomic64_##op##_return(s64 a, atomic64_t *v) \ { \ unsigned long flags; \ - raw_spinlock_t *lock = lock_addr(v); \ + hard_spinlock_t *lock = lock_addr(v); \ s64 val; \ \ raw_spin_lock_irqsave(lock, flags); \ @@ -96,7 +96,7 @@ EXPORT_SYMBOL(generic_atomic64_##op##_re s64 generic_atomic64_fetch_##op(s64 a, atomic64_t *v) \ { \ unsigned long flags; \ - raw_spinlock_t *lock = lock_addr(v); \ + hard_spinlock_t *lock = lock_addr(v); \ s64 val; \ \ raw_spin_lock_irqsave(lock, flags); \ @@ -133,7 +133,7 @@ ATOMIC64_OPS(xor, ^=) s64 generic_atomic64_dec_if_positive(atomic64_t *v) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + hard_spinlock_t *lock = lock_addr(v); s64 val; raw_spin_lock_irqsave(lock, flags); @@ -148,7 +148,7 @@ EXPORT_SYMBOL(generic_atomic64_dec_if_po s64 generic_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + hard_spinlock_t *lock = lock_addr(v); s64 val; raw_spin_lock_irqsave(lock, flags); @@ -163,7 +163,7 @@ EXPORT_SYMBOL(generic_atomic64_cmpxchg); s64 generic_atomic64_xchg(atomic64_t *v, s64 new) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + hard_spinlock_t *lock = lock_addr(v); s64 val; raw_spin_lock_irqsave(lock, flags); @@ -177,7 +177,7 @@ EXPORT_SYMBOL(generic_atomic64_xchg); s64 generic_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + hard_spinlock_t *lock = lock_addr(v); s64 val; raw_spin_lock_irqsave(lock, flags); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/lib/dump_stack.c linux-dovetail-v5.15.y-dovetail/lib/dump_stack.c --- linux-5.15.26/lib/dump_stack.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/lib/dump_stack.c 2022-03-10 09:47:50.000000000 +0100 @@ -10,7 +10,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -66,6 +68,11 @@ void dump_stack_print_info(const char *l printk("%sHardware name: %s\n", log_lvl, dump_stack_arch_desc_str); +#ifdef CONFIG_IRQ_PIPELINE + printk("%sIRQ stage: %s\n", + log_lvl, current_irq_stage->name); +#endif + print_worker_info(log_lvl, current); print_stop_info(log_lvl, current); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/lib/Kconfig.debug linux-dovetail-v5.15.y-dovetail/lib/Kconfig.debug --- linux-5.15.26/lib/Kconfig.debug 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/lib/Kconfig.debug 2022-03-10 09:47:50.000000000 +0100 @@ -973,6 +973,38 @@ config DEBUG_SHIRQ is currently disabled). Drivers need to handle this correctly. Some don't and need to be caught. +config DEBUG_IRQ_PIPELINE + bool "Debug IRQ pipeline" + depends on IRQ_PIPELINE && DEBUG_KERNEL + help + Turn on this option for enabling debug checks related to + interrupt pipelining, like interrupt state consistency and + proper context isolation between the in-band and oob stages. + + If unsure, say N. + +config IRQ_PIPELINE_TORTURE_TEST + bool "Torture tests for IRQ pipeline" + depends on DEBUG_IRQ_PIPELINE + select TORTURE_TEST + default n + help + This option provides a kernel module that runs torture tests + on the IRQ pipeline mechanism. + + Say Y here if you want the IRQ pipeline torture tests to run + when the kernel starts. Say N if you are unsure. + +config DEBUG_DOVETAIL + bool "Debug Dovetail interface" + depends on DOVETAIL && DEBUG_KERNEL + select DEBUG_IRQ_PIPELINE + help + Turn on this option for enabling debug checks related to + running a dual kernel configuration, aka dovetailing. This + option implicitly enables the interrupt pipeline debugging + features. + menu "Debug Oops, Lockups and Hangs" config PANIC_ON_OOPS @@ -1381,6 +1413,27 @@ config DEBUG_LOCK_ALLOC spin_lock_init()/mutex_init()/etc., or whether there is any lock held during task exit. +config DEBUG_HARD_LOCKS + bool "Debug hard spinlocks" + depends on DEBUG_IRQ_PIPELINE && LOCKDEP && EXPERIMENTAL + help + Turn on this option for enabling LOCKDEP for hard spinlock + types used in interrupt pipelining. + + Keep in mind that enabling such feature will ruin the + latency figures for any out-of-band code, this is merely + useful for proving the correctness of the locking scheme of + such code without any consideration for real-time + guarantees. You have been warned. + + If unsure, say N. + +if DEBUG_HARD_LOCKS +comment "WARNING! DEBUG_HARD_LOCKS induces **massive** latency" +comment "overhead for the code running on the out-of-band" +comment "interrupt stage." +endif + config LOCKDEP bool depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/lib/smp_processor_id.c linux-dovetail-v5.15.y-dovetail/lib/smp_processor_id.c --- linux-5.15.26/lib/smp_processor_id.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/lib/smp_processor_id.c 2022-03-10 09:47:50.000000000 +0100 @@ -7,12 +7,16 @@ #include #include #include +#include noinstr static unsigned int check_preemption_disabled(const char *what1, const char *what2) { int this_cpu = raw_smp_processor_id(); + if (hard_irqs_disabled() || !running_inband()) + goto out; + if (likely(preempt_count())) goto out; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/lib/vdso/gettimeofday.c linux-dovetail-v5.15.y-dovetail/lib/vdso/gettimeofday.c --- linux-5.15.26/lib/vdso/gettimeofday.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/lib/vdso/gettimeofday.c 2022-03-10 09:47:50.000000000 +0100 @@ -5,6 +5,245 @@ #include #include +static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts); + +#ifndef vdso_clocksource_ok +static inline bool vdso_clocksource_ok(const struct vdso_data *vd) +{ + return vd->clock_mode != VDSO_CLOCKMODE_NONE; +} +#endif + +#ifndef vdso_cycles_ok +static inline bool vdso_cycles_ok(u64 cycles) +{ + return true; +} +#endif + +#ifdef CONFIG_GENERIC_CLOCKSOURCE_VDSO + +#include +#include +#include +#include + +static notrace u64 readl_mmio_up(const struct clksrc_info *vinfo) +{ + const struct clksrc_user_mmio_info *info = &vinfo->mmio; + return readl_relaxed(info->reg_lower); +} + +static notrace u64 readl_mmio_down(const struct clksrc_info *vinfo) +{ + const struct clksrc_user_mmio_info *info = &vinfo->mmio; + return ~(u64)readl_relaxed(info->reg_lower) & info->mask_lower; +} + +static notrace u64 readw_mmio_up(const struct clksrc_info *vinfo) +{ + const struct clksrc_user_mmio_info *info = &vinfo->mmio; + return readw_relaxed(info->reg_lower); +} + +static notrace u64 readw_mmio_down(const struct clksrc_info *vinfo) +{ + const struct clksrc_user_mmio_info *info = &vinfo->mmio; + return ~(u64)readl_relaxed(info->reg_lower) & info->mask_lower; +} + +static notrace u64 readl_dmmio_up(const struct clksrc_info *vinfo) +{ + const struct clksrc_user_mmio_info *info = &vinfo->mmio; + void __iomem *reg_lower, *reg_upper; + u32 upper, old_upper, lower; + + reg_lower = info->reg_lower; + reg_upper = info->reg_upper; + + upper = readl_relaxed(reg_upper); + do { + old_upper = upper; + lower = readl_relaxed(reg_lower); + upper = readl_relaxed(reg_upper); + } while (upper != old_upper); + + return (((u64)upper) << info->bits_lower) | lower; +} + +static notrace u64 readw_dmmio_up(const struct clksrc_info *vinfo) +{ + const struct clksrc_user_mmio_info *info = &vinfo->mmio; + void __iomem *reg_lower, *reg_upper; + u16 upper, old_upper, lower; + + reg_lower = info->reg_lower; + reg_upper = info->reg_upper; + + upper = readw_relaxed(reg_upper); + do { + old_upper = upper; + lower = readw_relaxed(reg_lower); + upper = readw_relaxed(reg_upper); + } while (upper != old_upper); + + return (((u64)upper) << info->bits_lower) | lower; +} + +static notrace __cold vdso_read_cycles_t *get_mmio_read_cycles(unsigned int type) +{ + switch (type) { + case CLKSRC_MMIO_L_UP: + return &readl_mmio_up; + case CLKSRC_MMIO_L_DOWN: + return &readl_mmio_down; + case CLKSRC_MMIO_W_UP: + return &readw_mmio_up; + case CLKSRC_MMIO_W_DOWN: + return &readw_mmio_down; + case CLKSRC_DMMIO_L_UP: + return &readl_dmmio_up; + case CLKSRC_DMMIO_W_UP: + return &readw_dmmio_up; + default: + return NULL; + } +} + +static __always_inline u16 to_cs_type(u32 cs_type_seq) +{ + return cs_type_seq >> 16; +} + +static __always_inline u16 to_seq(u32 cs_type_seq) +{ + return cs_type_seq; +} + +static __always_inline u32 to_cs_type_seq(u16 type, u16 seq) +{ + return (u32)type << 16U | seq; +} + +static notrace noinline __cold +void map_clocksource(const struct vdso_data *vd, struct vdso_priv *vp, + u32 seq, u32 new_cs_type_seq) +{ + vdso_read_cycles_t *read_cycles = NULL; + u32 new_cs_seq, new_cs_type; + struct clksrc_info *info; + int fd, ret; + + new_cs_seq = to_seq(new_cs_type_seq); + new_cs_type = to_cs_type(new_cs_type_seq); + info = &vp->clksrc_info[new_cs_type]; + + if (new_cs_type < CLOCKSOURCE_VDSO_MMIO) + goto done; + + fd = clock_open_device(vd->cs_mmdev, O_RDONLY); + if (fd < 0) + goto fallback_to_syscall; + + if (vdso_read_retry(vd, seq)) { + vdso_read_begin(vd); + if (to_seq(vd->cs_type_seq) != new_cs_seq) { + /* + * cs_mmdev no longer corresponds to + * vd->cs_type_seq. + */ + clock_close_device(fd); + return; + } + } + + ret = clock_ioctl_device(fd, CLKSRC_USER_MMIO_MAP, (long)&info->mmio); + clock_close_device(fd); + if (ret < 0) + goto fallback_to_syscall; + + read_cycles = get_mmio_read_cycles(info->mmio.type); + if (read_cycles == NULL) /* Mmhf, misconfigured. */ + goto fallback_to_syscall; +done: + info->read_cycles = read_cycles; + smp_wmb(); + new_cs_type_seq = to_cs_type_seq(new_cs_type, new_cs_seq); + WRITE_ONCE(vp->current_cs_type_seq, new_cs_type_seq); + + return; + +fallback_to_syscall: + new_cs_type = CLOCKSOURCE_VDSO_NONE; + info = &vp->clksrc_info[new_cs_type]; + goto done; +} + +static inline notrace +bool get_hw_counter(const struct vdso_data *vd, u32 *r_seq, u64 *cycles) +{ + const struct clksrc_info *info; + struct vdso_priv *vp; + u32 seq, cs_type_seq; + unsigned int cs; + + vp = __arch_get_vdso_priv(); + + for (;;) { + seq = vdso_read_begin(vd); + cs_type_seq = READ_ONCE(vp->current_cs_type_seq); + if (likely(to_seq(cs_type_seq) == to_seq(vd->cs_type_seq))) + break; + + map_clocksource(vd, vp, seq, vd->cs_type_seq); + } + + switch (to_cs_type(cs_type_seq)) { + case CLOCKSOURCE_VDSO_NONE: + return false; /* Use fallback. */ + case CLOCKSOURCE_VDSO_ARCHITECTED: + if (unlikely(!vdso_clocksource_ok(vd))) + return false; + *cycles = __arch_get_hw_counter(vd->clock_mode, vd); + if (unlikely(!vdso_cycles_ok(*cycles))) + return false; + break; + default: + cs = to_cs_type(READ_ONCE(cs_type_seq)); + info = &vp->clksrc_info[cs]; + *cycles = info->read_cycles(info); + break; + } + + *r_seq = seq; + + return true; +} + +#else + +static inline notrace +bool get_hw_counter(const struct vdso_data *vd, u32 *r_seq, u64 *cycles) +{ + *r_seq = vdso_read_begin(vd); + + /* + * CAUTION: checking the clocksource mode must happen inside + * the seqlocked section. + */ + if (unlikely(!vdso_clocksource_ok(vd))) + return false; + + *cycles = __arch_get_hw_counter(vd->clock_mode, vd); + if (unlikely(!vdso_cycles_ok(*cycles))) + return false; + + return true; +} + +#endif /* CONFIG_GENERIC_CLOCKSOURCE_VDSO */ + #ifndef vdso_calc_delta /* * Default implementation which works for all sane clocksources. That @@ -31,20 +270,6 @@ static inline bool __arch_vdso_hres_capa } #endif -#ifndef vdso_clocksource_ok -static inline bool vdso_clocksource_ok(const struct vdso_data *vd) -{ - return vd->clock_mode != VDSO_CLOCKMODE_NONE; -} -#endif - -#ifndef vdso_cycles_ok -static inline bool vdso_cycles_ok(u64 cycles) -{ - return true; -} -#endif - #ifdef CONFIG_TIME_NS static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, struct __kernel_timespec *ts) @@ -65,13 +290,7 @@ static __always_inline int do_hres_timen vdso_ts = &vd->basetime[clk]; do { - seq = vdso_read_begin(vd); - - if (unlikely(!vdso_clocksource_ok(vd))) - return -1; - - cycles = __arch_get_hw_counter(vd->clock_mode, vd); - if (unlikely(!vdso_cycles_ok(cycles))) + if (!get_hw_counter(vd, &seq, &cycles)) return -1; ns = vdso_ts->nsec; last = vd->cycle_last; @@ -120,30 +339,29 @@ static __always_inline int do_hres(const do { /* - * Open coded to handle VDSO_CLOCKMODE_TIMENS. Time namespace - * enabled tasks have a special VVAR page installed which - * has vd->seq set to 1 and vd->clock_mode set to - * VDSO_CLOCKMODE_TIMENS. For non time namespace affected tasks - * this does not affect performance because if vd->seq is - * odd, i.e. a concurrent update is in progress the extra + * Open coded to handle VDSO_CLOCKMODE_TIMENS. Time + * namespace enabled tasks have a special VVAR page + * installed which has vd->seq set to 1 and + * vd->clock_mode set to VDSO_CLOCKMODE_TIMENS. For + * non time namespace affected tasks this does not + * affect performance because if vd->seq is odd, + * i.e. a concurrent update is in progress the extra * check for vd->clock_mode is just a few extra - * instructions while spin waiting for vd->seq to become - * even again. + * instructions while spin waiting for vd->seq to + * become even again. */ while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) { if (IS_ENABLED(CONFIG_TIME_NS) && - vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - return do_hres_timens(vd, clk, ts); + vd->clock_mode == VDSO_CLOCKMODE_TIMENS) + return !do_hres_timens(vd, clk, ts); cpu_relax(); } + smp_rmb(); - if (unlikely(!vdso_clocksource_ok(vd))) + if (!get_hw_counter(vd, &seq, &cycles)) return -1; - cycles = __arch_get_hw_counter(vd->clock_mode, vd); - if (unlikely(!vdso_cycles_ok(cycles))) - return -1; ns = vdso_ts->nsec; last = vd->cycle_last; ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/lib/vdso/Kconfig linux-dovetail-v5.15.y-dovetail/lib/vdso/Kconfig --- linux-5.15.26/lib/vdso/Kconfig 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/lib/vdso/Kconfig 2022-03-10 09:47:50.000000000 +0100 @@ -30,4 +30,11 @@ config GENERIC_VDSO_TIME_NS Selected by architectures which support time namespaces in the VDSO +config GENERIC_CLOCKSOURCE_VDSO + select CLKSRC_MMIO + bool + help + Enables access to clocksources via the vDSO based on + generic MMIO operations. + endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/mm/huge_memory.c linux-dovetail-v5.15.y-dovetail/mm/huge_memory.c --- linux-5.15.26/mm/huge_memory.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/mm/huge_memory.c 2022-03-10 09:47:50.000000000 +0100 @@ -1104,7 +1104,7 @@ int copy_huge_pmd(struct mm_struct *dst_ * best effort that the pinned pages won't be replaced by another * random page during the coming copy-on-write. */ - if (unlikely(page_needs_cow_for_dma(src_vma, src_page))) { + if (unlikely(page_needs_cow(src_vma, src_page))) { pte_free(dst_mm, pgtable); spin_unlock(src_ptl); spin_unlock(dst_ptl); @@ -1218,7 +1218,7 @@ int copy_huge_pud(struct mm_struct *dst_ } /* Please refer to comments in copy_huge_pmd() */ - if (unlikely(page_needs_cow_for_dma(vma, pud_page(pud)))) { + if (unlikely(page_needs_cow(vma, pud_page(pud)))) { spin_unlock(src_ptl); spin_unlock(dst_ptl); __split_huge_pud(vma, src_pud, addr); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/mm/hugetlb.c linux-dovetail-v5.15.y-dovetail/mm/hugetlb.c --- linux-5.15.26/mm/hugetlb.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/mm/hugetlb.c 2022-03-10 09:47:50.000000000 +0100 @@ -4363,7 +4363,7 @@ again: * need to be without the pgtable locks since we could * sleep during the process. */ - if (unlikely(page_needs_cow_for_dma(vma, ptepage))) { + if (unlikely(page_needs_cow(vma, ptepage))) { pte_t src_pte_old = entry; struct page *new; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/mm/kasan/report.c linux-dovetail-v5.15.y-dovetail/mm/kasan/report.c --- linux-5.15.26/mm/kasan/report.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/mm/kasan/report.c 2022-03-10 09:47:50.000000000 +0100 @@ -98,7 +98,7 @@ static void print_error_description(stru info->access_addr, current->comm, task_pid_nr(current)); } -static DEFINE_SPINLOCK(report_lock); +static DEFINE_HARD_SPINLOCK(report_lock); static void start_report(unsigned long *flags) { @@ -106,7 +106,7 @@ static void start_report(unsigned long * * Make sure we don't end up in loop. */ kasan_disable_current(); - spin_lock_irqsave(&report_lock, *flags); + raw_spin_lock_irqsave(&report_lock, *flags); pr_err("==================================================================\n"); } @@ -116,7 +116,7 @@ static void end_report(unsigned long *fl trace_error_report_end(ERROR_DETECTOR_KASAN, addr); pr_err("==================================================================\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); - spin_unlock_irqrestore(&report_lock, *flags); + raw_spin_unlock_irqrestore(&report_lock, *flags); if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) { /* * This thread may hit another WARN() in the panic path. diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/mm/memory.c linux-dovetail-v5.15.y-dovetail/mm/memory.c --- linux-5.15.26/mm/memory.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/mm/memory.c 2022-03-10 09:47:50.000000000 +0100 @@ -900,7 +900,7 @@ copy_present_page(struct vm_area_struct * the page count. That might give false positives for * for pinning, but it will work correctly. */ - if (likely(!page_needs_cow_for_dma(src_vma, page))) + if (likely(!page_needs_cow(src_vma, page))) return 1; new_page = *prealloc; @@ -5256,6 +5256,15 @@ void print_vma_addr(char *prefix, unsign void __might_fault(const char *file, int line) { /* + * When running over the oob stage (e.g. some co-kernel's own + * thread), we should only make sure to run with hw IRQs + * enabled before accessing the memory. + */ + if (running_oob()) { + WARN_ON_ONCE(hard_irqs_disabled()); + return; + } + /* * Some code (nfs/sunrpc) uses socket ops on kernel memory while * holding the mmap_lock, this is safe because kernel memory doesn't * get paged out, therefore we'll never actually fault, and the diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/mm/mprotect.c linux-dovetail-v5.15.y-dovetail/mm/mprotect.c --- linux-5.15.26/mm/mprotect.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/mm/mprotect.c 2022-03-10 09:47:50.000000000 +0100 @@ -41,7 +41,7 @@ static unsigned long change_pte_range(st { pte_t *pte, oldpte; spinlock_t *ptl; - unsigned long pages = 0; + unsigned long pages = 0, flags; int target_node = NUMA_NO_NODE; bool dirty_accountable = cp_flags & MM_CP_DIRTY_ACCT; bool prot_numa = cp_flags & MM_CP_PROT_NUMA; @@ -113,6 +113,7 @@ static unsigned long change_pte_range(st continue; } + flags = hard_local_irq_save(); oldpte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_modify(oldpte, newprot); if (preserve_write) @@ -138,6 +139,7 @@ static unsigned long change_pte_range(st ptent = pte_mkwrite(ptent); } ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent); + hard_local_irq_restore(flags); pages++; } else if (is_swap_pte(oldpte)) { swp_entry_t entry = pte_to_swp_entry(oldpte); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/mm/vmalloc.c linux-dovetail-v5.15.y-dovetail/mm/vmalloc.c --- linux-5.15.26/mm/vmalloc.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/mm/vmalloc.c 2022-03-10 09:47:50.000000000 +0100 @@ -305,6 +305,8 @@ static int vmap_range_noflush(unsigned l break; } while (pgd++, phys_addr += (next - addr), addr = next, addr != end); + arch_advertise_page_mapping(start, end); + if (mask & ARCH_PAGE_TABLE_SYNC_MASK) arch_sync_kernel_mappings(start, end); @@ -538,6 +540,10 @@ static int vmap_pages_p4d_range(pgd_t *p return 0; } +void __weak arch_advertise_page_mapping(unsigned long start, unsigned long end) +{ +} + static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages) { @@ -559,6 +565,8 @@ static int vmap_small_pages_range_noflus return err; } while (pgd++, addr = next, addr != end); + arch_advertise_page_mapping(start, end); + if (mask & ARCH_PAGE_TABLE_SYNC_MASK) arch_sync_kernel_mappings(start, end); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/net/core/dev.c linux-dovetail-v5.15.y-dovetail/net/core/dev.c --- linux-5.15.26/net/core/dev.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/net/core/dev.c 2022-03-10 09:47:50.000000000 +0100 @@ -3098,6 +3098,10 @@ void __dev_kfree_skb_irq(struct sk_buff } else if (likely(!refcount_dec_and_test(&skb->users))) { return; } + + if (recycle_oob_skb(skb)) + return; + get_kfree_skb_cb(skb)->reason = reason; local_irq_save(flags); skb->next = __this_cpu_read(softnet_data.completion_queue); @@ -3575,7 +3579,12 @@ static int xmit_one(struct sk_buff *skb, unsigned int len; int rc; - if (dev_nit_active(dev)) + /* + * Clone-relay outgoing packet to listening taps. Network taps + * interested in out-of-band traffic should be handled by the + * companion core. + */ + if (dev_nit_active(dev) && !skb_is_oob(skb)) dev_queue_xmit_nit(skb, dev); len = skb->len; @@ -4883,6 +4892,81 @@ out_redir: } EXPORT_SYMBOL_GPL(do_xdp_generic); +#ifdef CONFIG_NET_OOB + +__weak bool netif_oob_deliver(struct sk_buff *skb) +{ + return false; +} + +__weak int netif_xmit_oob(struct sk_buff *skb) +{ + return NET_XMIT_DROP; +} + +static bool netif_receive_oob(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + + if (dev && netif_oob_diversion(dev)) + return netif_oob_deliver(skb); + + return false; +} + +static bool netif_receive_oob_list(struct list_head *head) +{ + struct sk_buff *skb, *next; + struct net_device *dev; + + if (list_empty(head)) + return false; + + dev = list_first_entry(head, struct sk_buff, list)->dev; + if (!dev || !netif_oob_diversion(dev)) + return false; + + /* Callee dequeues every skb it consumes. */ + list_for_each_entry_safe(skb, next, head, list) + netif_oob_deliver(skb); + + return list_empty(head); +} + +__weak void netif_oob_run(struct net_device *dev) +{ } + +static void napi_complete_oob(struct napi_struct *n) +{ + struct net_device *dev = n->dev; + + if (netif_oob_diversion(dev)) + netif_oob_run(dev); +} + +__weak void skb_inband_xmit_backlog(void) +{ } + +#else + +static inline bool netif_receive_oob(struct sk_buff *skb) +{ + return false; +} + +static inline bool netif_receive_oob_list(struct list_head *head) +{ + return false; +} + +static inline void napi_complete_oob(struct napi_struct *n) +{ } + +static inline void skb_inband_xmit_backlog(void) +{ } + +#endif + static int netif_rx_internal(struct sk_buff *skb) { int ret; @@ -4982,6 +5066,8 @@ static __latent_entropy void net_tx_acti { struct softnet_data *sd = this_cpu_ptr(&softnet_data); + skb_inband_xmit_backlog(); + if (sd->completion_queue) { struct sk_buff *clist; @@ -5702,6 +5788,9 @@ int netif_receive_skb(struct sk_buff *sk { int ret; + if (netif_receive_oob(skb)) + return NET_RX_SUCCESS; + trace_netif_receive_skb_entry(skb); ret = netif_receive_skb_internal(skb); @@ -5725,6 +5814,8 @@ void netif_receive_skb_list(struct list_ { struct sk_buff *skb; + if (netif_receive_oob_list(head)) + return; if (list_empty(head)) return; if (trace_netif_receive_skb_list_entry_enabled()) { @@ -6208,6 +6299,9 @@ gro_result_t napi_gro_receive(struct nap { gro_result_t ret; + if (netif_receive_oob(skb)) + return GRO_NORMAL; + skb_mark_napi_id(skb, napi); trace_napi_gro_receive_entry(skb); @@ -6542,6 +6636,8 @@ bool napi_complete_done(struct napi_stru unsigned long flags, val, new, timeout = 0; bool ret = true; + napi_complete_oob(n); + /* * 1) Don't let napi dequeue from the cpu poll list * just in case its running on a different cpu. diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/net/core/net-sysfs.c linux-dovetail-v5.15.y-dovetail/net/core/net-sysfs.c --- linux-5.15.26/net/core/net-sysfs.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/net/core/net-sysfs.c 2022-03-10 09:47:50.000000000 +0100 @@ -386,6 +386,54 @@ static ssize_t tx_queue_len_store(struct } NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec); +#ifdef CONFIG_NET_OOB + +__weak int netif_oob_switch_port(struct net_device *dev, bool enabled) +{ + return 0; +} + +__weak bool netif_oob_get_port(struct net_device *dev) +{ + return false; +} + +__weak ssize_t netif_oob_query_pool(struct net_device *dev, char *buf) +{ + return -EIO; +} + +static int switch_oob_port(struct net_device *dev, unsigned long enable) +{ + return netif_oob_switch_port(dev, (bool)enable); +} + +static ssize_t oob_port_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + return netdev_store(dev, attr, buf, len, switch_oob_port); +} + +static ssize_t oob_port_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + + return sprintf(buf, fmt_dec, netif_oob_get_port(netdev)); +} +static DEVICE_ATTR_RW(oob_port); + +static ssize_t oob_pool_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + + return netif_oob_query_pool(netdev, buf); +} +static DEVICE_ATTR_RO(oob_pool); + +#endif + static int change_gro_flush_timeout(struct net_device *dev, unsigned long val) { WRITE_ONCE(dev->gro_flush_timeout, val); @@ -659,6 +707,10 @@ static struct attribute *net_class_attrs &dev_attr_carrier_up_count.attr, &dev_attr_carrier_down_count.attr, &dev_attr_threaded.attr, +#ifdef CONFIG_NET_OOB + &dev_attr_oob_port.attr, + &dev_attr_oob_pool.attr, +#endif NULL, }; ATTRIBUTE_GROUPS(net_class); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/net/core/skbuff.c linux-dovetail-v5.15.y-dovetail/net/core/skbuff.c --- linux-5.15.26/net/core/skbuff.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/net/core/skbuff.c 2022-03-10 09:47:50.000000000 +0100 @@ -215,6 +215,117 @@ static void __build_skb_around(struct sk skb_set_kcov_handle(skb, kcov_common_handle()); } +#ifdef CONFIG_NET_OOB + +struct sk_buff *__netdev_alloc_oob_skb(struct net_device *dev, size_t len, + gfp_t gfp_mask) +{ + struct sk_buff *skb = __alloc_skb(len, gfp_mask, 0, NUMA_NO_NODE); + + if (!skb) + return NULL; + + skb_reserve(skb, NET_SKB_PAD); + skb->dev = dev; + skb->oob = true; + + return skb; +} +EXPORT_SYMBOL_GPL(__netdev_alloc_oob_skb); + +void __netdev_free_oob_skb(struct net_device *dev, struct sk_buff *skb) +{ + skb->oob = false; + skb->oob_clone = false; + dev_kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(__netdev_free_oob_skb); + +void netdev_reset_oob_skb(struct net_device *dev, struct sk_buff *skb) +{ + struct skb_shared_info *shinfo; + bool head_frag = skb->head_frag; + bool pfmemalloc = skb->pfmemalloc; + + if (WARN_ON_ONCE(!skb->oob || skb->oob_clone)) + return; + + memset(skb, 0, offsetof(struct sk_buff, tail)); + /* Out-of-band skbs are guaranteed to have linear storage. */ + skb->data = skb->head; + skb_reset_tail_pointer(skb); + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; + skb->head_frag = head_frag; + skb->pfmemalloc = pfmemalloc; + shinfo = skb_shinfo(skb); + memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); + atomic_set(&shinfo->dataref, 1); + skb_reserve(skb, NET_SKB_PAD); + + refcount_set(&skb->users, 1); + skb->dev = dev; + skb->oob = true; + skb_set_kcov_handle(skb, kcov_common_handle()); +} +EXPORT_SYMBOL_GPL(netdev_reset_oob_skb); + +struct sk_buff *skb_alloc_oob_head(gfp_t gfp_mask) +{ + struct sk_buff *skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask); + + if (!skb) + return NULL; + + /* + * skb heads allocated for out-of-band traffic should be + * reserved for clones, so memset is extraneous in the sense + * that skb_morph_oob() should follow the allocation. + */ + memset(skb, 0, offsetof(struct sk_buff, tail)); + refcount_set(&skb->users, 1); + skb->oob_clone = true; + skb_set_kcov_handle(skb, kcov_common_handle()); + + return skb; +} +EXPORT_SYMBOL_GPL(skb_alloc_oob_head); + +static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb); + +void skb_morph_oob_skb(struct sk_buff *n, struct sk_buff *skb) +{ + __skb_clone(n, skb); + n->oob = true; + n->oob_clone = true; + skb->oob_cloned = true; +} +EXPORT_SYMBOL_GPL(skb_morph_oob_skb); + +bool skb_release_oob_skb(struct sk_buff *skb, int *dref) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + + if (!skb_unref(skb)) + return false; + + /* + * ->nohdr is never set for oob shells, so we always refcount + * the full data (header + payload) when cloned. + */ + *dref = skb->cloned ? atomic_sub_return(1, &shinfo->dataref) : 0; + + return true; +} +EXPORT_SYMBOL_GPL(skb_release_oob_skb); + +__weak bool skb_oob_recycle(struct sk_buff *skb) +{ + return false; +} + +#endif /* CONFIG_NET_OOB */ + /** * __build_skb - build a network buffer * @data: data buffer provided by caller @@ -753,6 +864,9 @@ static void skb_release_all(struct sk_bu void __kfree_skb(struct sk_buff *skb) { + if (recycle_oob_skb(skb)) + return; + skb_release_all(skb); kfree_skbmem(skb); } @@ -949,12 +1063,18 @@ static void napi_skb_cache_put(struct sk void __kfree_skb_defer(struct sk_buff *skb) { + if (recycle_oob_skb(skb)) + return; + skb_release_all(skb); napi_skb_cache_put(skb); } void napi_skb_free_stolen_head(struct sk_buff *skb) { + if (recycle_oob_skb(skb)) + return; + if (unlikely(skb->slow_gro)) { nf_reset_ct(skb); skb_dst_drop(skb); @@ -987,6 +1107,9 @@ void napi_consume_skb(struct sk_buff *sk return; } + if (recycle_oob_skb(skb)) + return; + skb_release_all(skb); napi_skb_cache_put(skb); } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/net/Kconfig linux-dovetail-v5.15.y-dovetail/net/Kconfig --- linux-5.15.26/net/Kconfig 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/net/Kconfig 2022-03-10 09:47:50.000000000 +0100 @@ -58,6 +58,9 @@ config NET_REDIRECT config SKB_EXTENSIONS bool +config NET_OOB + bool + menu "Networking options" source "net/packet/Kconfig" diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/net/packet/af_packet.c linux-dovetail-v5.15.y-dovetail/net/packet/af_packet.c --- linux-5.15.26/net/packet/af_packet.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/net/packet/af_packet.c 2022-03-10 09:47:50.000000000 +0100 @@ -3302,6 +3302,7 @@ static int packet_create(struct net *net po = pkt_sk(sk); init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; + sk->sk_protocol = protocol; po->num = proto; po->xmit = dev_queue_xmit; diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/net/sched/Kconfig linux-dovetail-v5.15.y-dovetail/net/sched/Kconfig --- linux-5.15.26/net/sched/Kconfig 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/net/sched/Kconfig 2022-03-10 09:47:50.000000000 +0100 @@ -117,6 +117,29 @@ config NET_SCH_MULTIQ To compile this code as a module, choose M here: the module will be called sch_multiq. +config NET_SCH_OOB + tristate "Out-of-band packet queuing (OOB)" + depends on NET_OOB + help + Say Y here if you want to use a Dovetail-aware packet + scheduler for prioritizing egress traffic between the + regular (in-band) network stack and a companion core. This + scheduler helps in two cases: + + - for sending high priority packets originating from the + out-of-band stage to NICs which cannot handle outgoing + packets from that stage directly. In this case, these + packets take precedence over regular traffic for + transmission. + + - for sharing an out-of-band capable interface between the + in-band and out-of-band network stacks, proxying regular + traffic originating from the in-band stage to NICs which + will be processing all packets from the out-of-band stage. + + To compile this code as a module, choose M here: the + module will be called sch_oob. + config NET_SCH_RED tristate "Random Early Detection (RED)" help diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/net/sched/Makefile linux-dovetail-v5.15.y-dovetail/net/sched/Makefile --- linux-5.15.26/net/sched/Makefile 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/net/sched/Makefile 2022-03-10 09:47:50.000000000 +0100 @@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o +obj-$(CONFIG_NET_SCH_OOB) += sch_oob.o obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/net/sched/sch_oob.c linux-dovetail-v5.15.y-dovetail/net/sched/sch_oob.c --- linux-5.15.26/net/sched/sch_oob.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/net/sched/sch_oob.c 2022-03-10 09:47:50.000000000 +0100 @@ -0,0 +1,294 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2020 Philippe Gerum + */ + +#include +#include +#include +#include + +/* + * With Qdisc[2], 0=oob_fallback and 1=inband. User can graft whatever + * qdisc on these slots; both preset to pfifo_ops. skb->oob is checked + * to determine which qdisc should handle the packet eventually. + */ + +struct oob_qdisc_priv { + struct Qdisc *qdisc[2]; /* 0=oob_fallback, 1=in-band */ + struct tcf_proto __rcu *filter_list; + struct tcf_block *block; +}; + +static int oob_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + struct oob_qdisc_priv *p = qdisc_priv(sch); + struct net_device *dev = skb->dev; + struct Qdisc *qdisc; + int ret; + + /* + * If the device accepts oob traffic and can handle it + * directly from the oob stage, pass the outgoing packet to + * the transmit handler of the oob stack. This makes sure that + * all traffic, including the in-band one, flows through the + * oob stack which may implement its own queuing discipline. + * + * netif_xmit_oob() might fail handling the packet, in which + * case we leave it to the in-band packet scheduler, applying + * a best-effort strategy by giving higher priority to oob + * packets over mere in-band traffic. + */ + if (dev && netif_oob_diversion(dev) && netdev_is_oob_capable(dev)) { + ret = netif_xmit_oob(skb); + if (ret == NET_XMIT_SUCCESS) + return NET_XMIT_SUCCESS; + } + + /* + * Out-of-band fast lane is closed. Best effort: use a special + * 'high priority' queue for oob packets we handle from + * in-band context the usual way through the common stack. + */ + qdisc = skb->oob ? p->qdisc[0] : p->qdisc[1]; + ret = qdisc_enqueue(skb, qdisc, to_free); + if (ret == NET_XMIT_SUCCESS) { + sch->q.qlen++; + return NET_XMIT_SUCCESS; + } + + if (net_xmit_drop_count(ret)) + qdisc_qstats_drop(sch); + + return ret; +} + +static struct sk_buff *oob_dequeue(struct Qdisc *sch) +{ + struct oob_qdisc_priv *p = qdisc_priv(sch); + struct sk_buff *skb; + struct Qdisc *qdisc; + int band; + + /* + * First try to dequeue pending out-of-band packets. If none, + * then check for in-band traffic. + */ + for (band = 0; band < 2; band++) { + qdisc = p->qdisc[band]; + skb = qdisc->dequeue(qdisc); + if (skb) { + qdisc_bstats_update(sch, skb); + sch->q.qlen--; + return skb; + } + } + + return NULL; +} + +static struct sk_buff *oob_peek(struct Qdisc *sch) +{ + struct oob_qdisc_priv *p = qdisc_priv(sch); + struct sk_buff *skb; + struct Qdisc *qdisc; + int band; + + for (band = 0; band < 2; band++) { + qdisc = p->qdisc[band]; + skb = qdisc->ops->peek(qdisc); + if (skb) + return skb; + } + + return NULL; +} + +static int oob_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct oob_qdisc_priv *p = qdisc_priv(sch); + int ret; + + ret = tcf_block_get(&p->block, &p->filter_list, sch, extack); + if (ret) + return ret; + + p->qdisc[0] = qdisc_create_dflt(sch->dev_queue, + &pfifo_qdisc_ops, sch->handle, + extack); + p->qdisc[1] = qdisc_create_dflt(sch->dev_queue, + &pfifo_fast_ops, sch->handle, + extack); + + return 0; +} + +static void oob_reset(struct Qdisc *sch) +{ + struct oob_qdisc_priv *p = qdisc_priv(sch); + + qdisc_reset(p->qdisc[0]); + qdisc_reset(p->qdisc[1]); + sch->q.qlen = 0; +} + +static void oob_destroy(struct Qdisc *sch) +{ + struct oob_qdisc_priv *p = qdisc_priv(sch); + + tcf_block_put(p->block); + qdisc_put(p->qdisc[0]); + qdisc_put(p->qdisc[1]); +} + +static int oob_tune(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static int oob_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + return skb->len; +} + +static int oob_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, + struct Qdisc **old, struct netlink_ext_ack *extack) +{ + struct oob_qdisc_priv *p = qdisc_priv(sch); + unsigned long band = arg - 1; + + if (new == NULL) + new = &noop_qdisc; + + *old = qdisc_replace(sch, new, &p->qdisc[band]); + + return 0; +} + +static struct Qdisc * +oob_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct oob_qdisc_priv *p = qdisc_priv(sch); + unsigned long band = arg - 1; + + return p->qdisc[band]; +} + +static unsigned long oob_find(struct Qdisc *sch, u32 classid) +{ + unsigned long band = TC_H_MIN(classid); + + return band - 1 >= 2 ? 0 : band; +} + +static int oob_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct oob_qdisc_priv *p = qdisc_priv(sch); + + tcm->tcm_handle |= TC_H_MIN(cl); + tcm->tcm_info = p->qdisc[cl - 1]->handle; + + return 0; +} + +static int oob_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct oob_qdisc_priv *p = qdisc_priv(sch); + struct Qdisc *cl_q = p->qdisc[cl - 1]; + + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || + qdisc_qstats_copy(d, cl_q) < 0) + return -1; + + return 0; +} + +static void oob_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + int band; + + if (arg->stop) + return; + + for (band = 0; band < 2; band++) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, band + 1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static unsigned long oob_tcf_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + return oob_find(sch, classid); +} + +static void oob_tcf_unbind(struct Qdisc *q, unsigned long cl) +{ +} + +static struct tcf_block *oob_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) +{ + struct oob_qdisc_priv *p = qdisc_priv(sch); + + if (cl) + return NULL; + + return p->block; +} + +static const struct Qdisc_class_ops oob_class_ops = { + .graft = oob_graft, + .leaf = oob_leaf, + .find = oob_find, + .walk = oob_walk, + .dump = oob_dump_class, + .dump_stats = oob_dump_class_stats, + .tcf_block = oob_tcf_block, + .bind_tcf = oob_tcf_bind, + .unbind_tcf = oob_tcf_unbind, +}; + +static struct Qdisc_ops oob_qdisc_ops __read_mostly = { + .cl_ops = &oob_class_ops, + .id = "oob", + .priv_size = sizeof(struct oob_qdisc_priv), + .enqueue = oob_enqueue, + .dequeue = oob_dequeue, + .peek = oob_peek, + .init = oob_init, + .reset = oob_reset, + .destroy = oob_destroy, + .change = oob_tune, + .dump = oob_dump, + .owner = THIS_MODULE, +}; + +static int __init oob_module_init(void) +{ + return register_qdisc(&oob_qdisc_ops); +} + +static void __exit oob_module_exit(void) +{ + unregister_qdisc(&oob_qdisc_ops); +} + +module_init(oob_module_init) +module_exit(oob_module_exit) + +MODULE_LICENSE("GPL"); diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/net/socket.c linux-dovetail-v5.15.y-dovetail/net/socket.c --- linux-5.15.26/net/socket.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/net/socket.c 2022-03-10 09:47:50.000000000 +0100 @@ -142,6 +142,95 @@ static void sock_show_fdinfo(struct seq_ #define sock_show_fdinfo NULL #endif +#ifdef CONFIG_NET_OOB + +static inline bool sock_oob_capable(struct socket *sock) +{ + return sock->sk && sock->sk->oob_data; +} + +int __weak sock_oob_attach(struct socket *sock) +{ + return 0; +} + +void __weak sock_oob_detach(struct socket *sock) +{ +} + +int __weak sock_oob_bind(struct socket *sock, struct sockaddr *addr, int len) +{ + return 0; +} + +long __weak sock_inband_ioctl_redirect(struct socket *sock, + unsigned int cmd, unsigned long arg) +{ + return -ENOTTY; +} + +long __weak sock_oob_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + return -ENOTTY; +} + +ssize_t __weak sock_oob_write(struct file *filp, + const char __user *u_buf, size_t count) +{ + return -EOPNOTSUPP; +} + +ssize_t __weak sock_oob_read(struct file *filp, + char __user *u_buf, size_t count) +{ + return -EOPNOTSUPP; +} + +__poll_t __weak sock_oob_poll(struct file *filp, + struct oob_poll_wait *wait) +{ + return -EOPNOTSUPP; +} + +#define compat_sock_oob_ioctl compat_ptr_oob_ioctl + +#else /* !CONFIG_NET_OOB */ + +static inline bool sock_oob_capable(struct socket *sock) +{ + return false; +} + +static inline int sock_oob_attach(struct socket *sock) +{ + return 0; +} + +static inline void sock_oob_detach(struct socket *sock) +{ +} + +static int sock_oob_bind(struct socket *sock, + struct sockaddr *addr, int len) +{ + return 0; +} + +static inline long sock_inband_ioctl_redirect(struct socket *sock, + unsigned int cmd, unsigned long arg) +{ + return -ENOTTY; +} + +#define sock_oob_ioctl NULL +#define sock_oob_write NULL +#define sock_oob_read NULL +#define sock_oob_poll NULL +#define compat_sock_oob_ioctl NULL + +#endif /* !CONFIG_NET_OOB */ + /* * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear * in the operation structures but are done directly via the socketcall() multiplexor. @@ -154,8 +243,13 @@ static const struct file_operations sock .write_iter = sock_write_iter, .poll = sock_poll, .unlocked_ioctl = sock_ioctl, + .oob_ioctl = sock_oob_ioctl, + .oob_write = sock_oob_write, + .oob_read = sock_oob_read, + .oob_poll = sock_oob_poll, #ifdef CONFIG_COMPAT .compat_ioctl = compat_sock_ioctl, + .compat_oob_ioctl = compat_sock_oob_ioctl, #endif .mmap = sock_mmap, .release = sock_close, @@ -213,6 +307,7 @@ static const char * const pf_family_name [PF_SMC] = "PF_SMC", [PF_XDP] = "PF_XDP", [PF_MCTP] = "PF_MCTP", + [PF_OOB] = "PF_OOB", }; /* @@ -477,7 +572,7 @@ EXPORT_SYMBOL(sock_alloc_file); static int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; - int fd = get_unused_fd_flags(flags); + int fd = get_unused_fd_flags(flags), ret; if (unlikely(fd < 0)) { sock_release(sock); return fd; @@ -485,6 +580,14 @@ static int sock_map_fd(struct socket *so newfile = sock_alloc_file(sock, flags, NULL); if (!IS_ERR(newfile)) { + if (IS_ENABLED(CONFIG_NET_OOB) && (flags & SOCK_OOB)) { + ret = sock_oob_attach(sock); + if (ret < 0) { + put_unused_fd(fd); + sock_release(sock); + return ret; + } + } fd_install(fd, newfile); return fd; } @@ -641,6 +744,9 @@ EXPORT_SYMBOL(sock_alloc); static void __sock_release(struct socket *sock, struct inode *inode) { + if (sock_oob_capable(sock)) + sock_oob_detach(sock); + if (sock->ops) { struct module *owner = sock->ops->owner; @@ -1235,6 +1341,11 @@ static long sock_ioctl(struct file *file break; default: + if (sock_oob_capable(sock)) { + err = sock_inband_ioctl_redirect(sock, cmd, arg); + if (!err || err != -ENOIOCTLCMD) + break; + } err = sock_do_ioctl(net, sock, cmd, arg); break; } @@ -1548,10 +1659,18 @@ int __sys_socket(int family, int type, i BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); + BUILD_BUG_ON(SOCK_OOB & SOCK_TYPE_MASK); flags = type & ~SOCK_TYPE_MASK; - if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK | SOCK_OOB)) return -EINVAL; + /* + * Not every protocol family supports out-of-band operations, + * however PF_OOB certainly does: force SOCK_OOB in, so that + * sock_oob_attach() runs for this socket. + */ + if (IS_ENABLED(CONFIG_NET_OOB) && family == AF_OOB) + flags |= SOCK_OOB; type &= SOCK_TYPE_MASK; if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) @@ -1561,7 +1680,7 @@ int __sys_socket(int family, int type, i if (retval < 0) return retval; - return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); + return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK | O_OOB)); } SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) @@ -1692,6 +1811,9 @@ int __sys_bind(int fd, struct sockaddr _ err = security_socket_bind(sock, (struct sockaddr *)&address, addrlen); + if (sock_oob_capable(sock) && !err) + err = sock_oob_bind(sock, (struct sockaddr *) + &address, addrlen); if (!err) err = sock->ops->bind(sock, (struct sockaddr *) diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/scripts/mkcompile_h linux-dovetail-v5.15.y-dovetail/scripts/mkcompile_h --- linux-5.15.26/scripts/mkcompile_h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/scripts/mkcompile_h 2022-03-10 09:47:50.000000000 +0100 @@ -6,8 +6,9 @@ ARCH=$2 SMP=$3 PREEMPT=$4 PREEMPT_RT=$5 -CC_VERSION="$6" -LD=$7 +IRQPIPE=$6 +CC_VERSION="$7" +LD=$8 # Do not expand names set -f @@ -43,6 +44,7 @@ CONFIG_FLAGS="" if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi if [ -n "$PREEMPT_RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT_RT"; fi +if [ -n "$IRQPIPE" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS IRQPIPE"; fi # Truncate to maximum length UTS_LEN=64 diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/security/selinux/hooks.c linux-dovetail-v5.15.y-dovetail/security/selinux/hooks.c --- linux-5.15.26/security/selinux/hooks.c 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/security/selinux/hooks.c 2022-03-10 09:47:50.000000000 +0100 @@ -1322,7 +1322,9 @@ static inline u16 socket_type_to_securit return SECCLASS_XDP_SOCKET; case PF_MCTP: return SECCLASS_MCTP_SOCKET; -#if PF_MAX > 46 + case PF_OOB: + return SECCLASS_OOB_SOCKET; +#if PF_MAX > 47 #error New address family defined, please update this function. #endif } diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/security/selinux/include/classmap.h linux-dovetail-v5.15.y-dovetail/security/selinux/include/classmap.h --- linux-5.15.26/security/selinux/include/classmap.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/security/selinux/include/classmap.h 2022-03-10 09:47:50.000000000 +0100 @@ -248,6 +248,8 @@ struct security_class_mapping secclass_m { COMMON_SOCK_PERMS, NULL } }, { "mctp_socket", { COMMON_SOCK_PERMS, NULL } }, + { "oob_socket", + { COMMON_SOCK_PERMS, NULL } }, { "perf_event", { "open", "cpu", "kernel", "tracepoint", "read", "write", NULL } }, { "lockdown", @@ -257,6 +259,6 @@ struct security_class_mapping secclass_m { NULL } }; -#if PF_MAX > 46 +#if PF_MAX > 47 #error New address family defined, please update secclass_map. #endif diff -uprN -X linux-5.15.26/Documentation/dontdiff linux-5.15.26/tools/perf/trace/beauty/include/linux/socket.h linux-dovetail-v5.15.y-dovetail/tools/perf/trace/beauty/include/linux/socket.h --- linux-5.15.26/tools/perf/trace/beauty/include/linux/socket.h 2022-03-02 11:48:10.000000000 +0100 +++ linux-dovetail-v5.15.y-dovetail/tools/perf/trace/beauty/include/linux/socket.h 2022-03-10 09:47:50.000000000 +0100 @@ -226,8 +226,9 @@ struct ucred { #define AF_MCTP 45 /* Management component * transport protocol */ +#define AF_OOB 46 /* Out-of-band domain sockets */ -#define AF_MAX 46 /* For now.. */ +#define AF_MAX 47 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC