diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b1f3df39ed406..bf7cc612852c4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -41,6 +41,7 @@ config ARM select ARCH_SUPPORTS_CFI_CLANG select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE select ARCH_SUPPORTS_PER_VMA_LOCK + select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_MEMTEST @@ -80,7 +81,7 @@ config ARM select HAS_IOPORT select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL @@ -105,7 +106,7 @@ config ARM select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_EXIT_THREAD - select HAVE_GUP_FAST if ARM_LPAE + select HAVE_GUP_FAST if ARM_LPAE && !(PREEMPT_RT && HIGHPTE) select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER if !XIP_KERNEL @@ -128,6 +129,7 @@ config ARM select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 46169fe42c612..3fdc8771dbe73 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -474,6 +474,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); + if (interrupts_enabled(regs)) + local_irq_enable(); + if (user_mode(regs)) goto bad_area; @@ -544,6 +547,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + if (interrupts_enabled(regs)) + local_irq_enable(); + do_bad_area(addr, fsr, regs); return 0; } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 93402a1d9c9fc..72ee7d148989d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -170,6 +170,7 @@ config PPC select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx + select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_USE_MEMTEST @@ -273,6 +274,7 @@ config PPC select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_RETHOOK if KPROBES select HAVE_REGS_AND_STACK_ACCESS_API diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h index 283c346478565..4727f40052ddd 100644 --- a/arch/powerpc/include/asm/stackprotector.h +++ b/arch/powerpc/include/asm/stackprotector.h @@ -19,8 +19,13 @@ */ static __always_inline void boot_init_stack_canary(void) { - unsigned long canary = get_random_canary(); + unsigned long canary; +#ifndef CONFIG_PREEMPT_RT + canary = get_random_canary(); +#else + canary = ((unsigned long)&canary) & CANARY_MASK; +#endif current->stack_canary = canary; #ifdef CONFIG_PPC64 get_paca()->canary = canary; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 2f2702c867f76..8e5a7bab208ab 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -234,6 +234,7 @@ config KVM_E500MC config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && PPC_E500 + depends on !PREEMPT_RT select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index fa3c2fff082a8..ea4f42cfe825e 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -2,6 +2,7 @@ config PPC_PSERIES depends on PPC64 && PPC_BOOK3S bool "IBM pSeries & new (POWER5-based) iSeries" + select GENERIC_ALLOCATOR select HAVE_PCSPKR_PLATFORM select MPIC select OF_DYNAMIC diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index eec333dd2e598..0b952ad638f9c 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -245,7 +246,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, return ret; } -static DEFINE_PER_CPU(__be64 *, tce_page); +struct tce_page { + __be64 * page; + local_lock_t lock; +}; +static DEFINE_PER_CPU(struct tce_page, tce_page) = { + .lock = INIT_LOCAL_LOCK(lock), +}; static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, @@ -268,9 +275,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, direction, attrs); } - local_irq_save(flags); /* to protect tcep and the page behind it */ + /* to protect tcep and the page behind it */ + local_lock_irqsave(&tce_page.lock, flags); - tcep = __this_cpu_read(tce_page); + tcep = __this_cpu_read(tce_page.page); /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() @@ -279,12 +287,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, tcep = (__be64 *)__get_free_page(GFP_ATOMIC); /* If allocation fails, fall back to the loop implementation */ if (!tcep) { - local_irq_restore(flags); + local_unlock_irqrestore(&tce_page.lock, flags); return tce_build_pSeriesLP(tbl->it_index, tcenum, tceshift, npages, uaddr, direction, attrs); } - __this_cpu_write(tce_page, tcep); + __this_cpu_write(tce_page.page, tcep); } rpn = __pa(uaddr) >> tceshift; @@ -314,7 +322,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, tcenum += limit; } while (npages > 0 && !rc); - local_irq_restore(flags); + local_unlock_irqrestore(&tce_page.lock, flags); if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { ret = (int)rc; @@ -498,16 +506,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, DMA_BIDIRECTIONAL, 0); } - local_irq_disable(); /* to protect tcep and the page behind it */ - tcep = __this_cpu_read(tce_page); + /* to protect tcep and the page behind it */ + local_lock_irq(&tce_page.lock); + tcep = __this_cpu_read(tce_page.page); if (!tcep) { tcep = (__be64 *)__get_free_page(GFP_ATOMIC); if (!tcep) { - local_irq_enable(); + local_unlock_irq(&tce_page.lock); return -ENOMEM; } - __this_cpu_write(tce_page, tcep); + __this_cpu_write(tce_page.page, tcep); } proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; @@ -550,7 +559,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, /* error cleanup: caller will clear whole range */ - local_irq_enable(); + local_unlock_irq(&tce_page.lock); return rc; } diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 5e939004b6463..40a9234e6e5dc 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -3,7 +3,6 @@ config DRM_I915 tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" depends on DRM depends on X86 && PCI - depends on !PREEMPT_RT select INTEL_GTT if X86 select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index a187db6df2d36..dbdc4d9b2a33c 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -562,7 +562,8 @@ void intel_pipe_update_start(struct intel_atomic_state *state, */ intel_psr_wait_for_idle_locked(new_crtc_state); - local_irq_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); crtc->debug.min_vbl = evade.min; crtc->debug.max_vbl = evade.max; @@ -580,7 +581,8 @@ void intel_pipe_update_start(struct intel_atomic_state *state, return; irq_disable: - local_irq_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE) @@ -726,7 +728,8 @@ void intel_pipe_update_end(struct intel_atomic_state *state, if (!state->base.legacy_cursor_update) intel_vrr_send_push(NULL, new_crtc_state); - local_irq_enable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); if (intel_vgpu_active(dev_priv)) goto out; diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index 198e69efe9ac1..2d86b2c3f863b 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -929,13 +929,15 @@ intel_legacy_cursor_update(struct drm_plane *_plane, */ intel_psr_wait_for_idle_locked(crtc_state); - local_irq_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); intel_vblank_evade(&evade); drm_crtc_vblank_put(&crtc->base); } else { - local_irq_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); } if (new_plane_state->uapi.visible) { @@ -945,7 +947,8 @@ intel_legacy_cursor_update(struct drm_plane *_plane, intel_plane_disable_arm(NULL, plane, crtc_state); } - local_irq_enable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); intel_psr_unlock(crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display_trace.h b/drivers/gpu/drm/i915/display/intel_display_trace.h index 27ebc32cb61a5..a519d94700c36 100644 --- a/drivers/gpu/drm/i915/display/intel_display_trace.h +++ b/drivers/gpu/drm/i915/display/intel_display_trace.h @@ -13,6 +13,10 @@ #if !defined(__INTEL_DISPLAY_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) #define __INTEL_DISPLAY_TRACE_H__ +#if defined(CONFIG_PREEMPT_RT) && !defined(NOTRACE) +#define NOTRACE +#endif + #include #include #include diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 70ba7aa26bf49..d0197fd96d6cf 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -312,6 +312,20 @@ static void intel_vblank_section_exit(struct intel_display *display) struct drm_i915_private *i915 = to_i915(display->drm); spin_unlock(&i915->uncore.lock); } + +static void intel_vblank_section_enter_irqf(struct intel_display *display, unsigned long *flags) + __acquires(i915->uncore.lock) +{ + struct drm_i915_private *i915 = to_i915(display->drm); + spin_lock_irqsave(&i915->uncore.lock, *flags); +} + +static void intel_vblank_section_exit_irqf(struct intel_display *display, unsigned long flags) + __releases(i915->uncore.lock) +{ + struct drm_i915_private *i915 = to_i915(display->drm); + spin_unlock_irqrestore(&i915->uncore.lock, flags); +} #else static void intel_vblank_section_enter(struct intel_display *display) { @@ -320,6 +334,17 @@ static void intel_vblank_section_enter(struct intel_display *display) static void intel_vblank_section_exit(struct intel_display *display) { } + +static void intel_vblank_section_enter_irqf(struct intel_display *display, unsigned long *flags) +{ + *flags = 0; +} + +static void intel_vblank_section_exit_irqf(struct intel_display *display, unsigned long flags) +{ + if (flags) + return; +} #endif static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, @@ -356,10 +381,10 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, * timing critical raw register reads, potentially with * preemption disabled, so the following code must not block. */ - local_irq_save(irqflags); - intel_vblank_section_enter(display); + intel_vblank_section_enter_irqf(display, &irqflags); - /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); /* Get optional system timestamp before query. */ if (stime) @@ -423,10 +448,10 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, if (etime) *etime = ktime_get(); - /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); - intel_vblank_section_exit(display); - local_irq_restore(irqflags); + intel_vblank_section_exit_irqf(display, irqflags); /* * While in vblank, position will be negative @@ -464,13 +489,11 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc) unsigned long irqflags; int position; - local_irq_save(irqflags); - intel_vblank_section_enter(display); + intel_vblank_section_enter_irqf(display, &irqflags); position = __intel_get_crtc_scanline(crtc); - intel_vblank_section_exit(display); - local_irq_restore(irqflags); + intel_vblank_section_exit_irqf(display, irqflags); return position; } @@ -730,11 +753,13 @@ int intel_vblank_evade(struct intel_vblank_evade_ctx *evade) break; } - local_irq_enable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); timeout = schedule_timeout(timeout); - local_irq_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); } finish_wait(wq, &wait); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index b721bbd233567..f5a6143ea8a24 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1609,7 +1609,7 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) static unsigned long stop_timeout(const struct intel_engine_cs *engine) { - if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */ + if (in_atomic() || irqs_disabled() || rcu_preempt_depth()) /* inside atomic preempt-reset? */ return 0; /* diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 03baa7fa0a27c..799e943d9facb 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -1294,7 +1294,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock(&sched_engine->lock); + spin_lock_irq(&sched_engine->lock); /* * If the queue is higher priority than the last @@ -1394,7 +1394,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - spin_unlock(&sched_engine->lock); + spin_unlock_irq(&sched_engine->lock); return; } } @@ -1420,7 +1420,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.sched_engine->lock); - spin_unlock(&engine->sched_engine->lock); + spin_unlock_irq(&engine->sched_engine->lock); return; /* leave this for another sibling */ } @@ -1582,7 +1582,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ sched_engine->queue_priority_hint = queue_prio(sched_engine); i915_sched_engine_reset_on_empty(sched_engine); - spin_unlock(&sched_engine->lock); + spin_unlock_irq(&sched_engine->lock); /* * We can skip poking the HW if we ended up with exactly the same set @@ -1608,13 +1608,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } } -static void execlists_dequeue_irq(struct intel_engine_cs *engine) -{ - local_irq_disable(); /* Suspend interrupts across request submission */ - execlists_dequeue(engine); - local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ -} - static void clear_ports(struct i915_request **ports, int count) { memset_p((void **)ports, NULL, count); @@ -2469,7 +2462,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t) } if (!engine->execlists.pending[0]) { - execlists_dequeue_irq(engine); + execlists_dequeue(engine); start_timeslice(engine); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 053780f562c1a..b25fa8f4dc4bd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -362,7 +362,7 @@ static inline int intel_guc_send_busy_loop(struct intel_guc *guc, { int err; unsigned int sleep_period_ms = 1; - bool not_atomic = !in_atomic() && !irqs_disabled(); + bool not_atomic = !in_atomic() && !irqs_disabled() && !rcu_preempt_depth(); /* * FIXME: Have caller pass in if we are in an atomic context to avoid diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b9a2b2194c8ff..e24798e4b44ea 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -608,7 +608,6 @@ bool __i915_request_submit(struct i915_request *request) RQ_TRACE(request, "\n"); - GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->sched_engine->lock); /* @@ -717,7 +716,6 @@ void __i915_request_unsubmit(struct i915_request *request) */ RQ_TRACE(request, "\n"); - GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->sched_engine->lock); /* diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 7ed41ce9b7085..6b87ef6005c69 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -6,6 +6,10 @@ #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _I915_TRACE_H_ +#if defined(CONFIG_PREEMPT_RT) && !defined(NOTRACE) +#define NOTRACE +#endif + #include #include #include diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index f7fb40cfdb705..9cb40c2c4b124 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -267,8 +267,13 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) (Wmax)) #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) -/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) && IS_ENABLED(CONFIG_PREEMPT_COUNT) +/* + * If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. + * On PREEMPT_RT the context isn't becoming atomic because it is used in an + * interrupt handler or because a spinlock_t is acquired. This leads to + * warnings which don't occur otherwise and therefore the check is disabled. + */ +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) && IS_ENABLED(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) #else # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) diff --git a/drivers/gpu/drm/i915/intel_uncore_trace.h b/drivers/gpu/drm/i915/intel_uncore_trace.h index f13ff71edf2db..3c67e267fb602 100644 --- a/drivers/gpu/drm/i915/intel_uncore_trace.h +++ b/drivers/gpu/drm/i915/intel_uncore_trace.h @@ -7,6 +7,10 @@ #if !defined(__INTEL_UNCORE_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) #define __INTEL_UNCORE_TRACE_H__ +#if defined(CONFIG_PREEMPT_RT) && !defined(NOTRACE) +#define NOTRACE +#endif + #include "i915_reg_defs.h" #include diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index feb920c5b2e80..0468ea74793cf 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -391,12 +391,34 @@ void __init serial8250_register_ports(struct uart_driver *drv, struct device *de #ifdef CONFIG_SERIAL_8250_CONSOLE -static void univ8250_console_write(struct console *co, const char *s, - unsigned int count) +static void univ8250_console_write_atomic(struct console *co, + struct nbcon_write_context *wctxt) { struct uart_8250_port *up = &serial8250_ports[co->index]; - serial8250_console_write(up, s, count); + serial8250_console_write(up, wctxt, true); +} + +static void univ8250_console_write_thread(struct console *co, + struct nbcon_write_context *wctxt) +{ + struct uart_8250_port *up = &serial8250_ports[co->index]; + + serial8250_console_write(up, wctxt, false); +} + +static void univ8250_console_device_lock(struct console *co, unsigned long *flags) +{ + struct uart_port *up = &serial8250_ports[co->index].port; + + __uart_port_lock_irqsave(up, flags); +} + +static void univ8250_console_device_unlock(struct console *co, unsigned long flags) +{ + struct uart_port *up = &serial8250_ports[co->index].port; + + __uart_port_unlock_irqrestore(up, flags); } static int univ8250_console_setup(struct console *co, char *options) @@ -497,12 +519,15 @@ static int univ8250_console_match(struct console *co, char *name, int idx, static struct console univ8250_console = { .name = "ttyS", - .write = univ8250_console_write, + .write_atomic = univ8250_console_write_atomic, + .write_thread = univ8250_console_write_thread, + .device_lock = univ8250_console_device_lock, + .device_unlock = univ8250_console_device_unlock, .device = uart_console_device, .setup = univ8250_console_setup, .exit = univ8250_console_exit, .match = univ8250_console_match, - .flags = CON_PRINTBUFFER | CON_ANYTIME, + .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_NBCON, .index = -1, .data = &serial8250_reg, }; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 2da9db960d09f..ab5fe3e78eb9a 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -705,7 +705,12 @@ static void serial8250_clear_interrupts(struct uart_port *port) serial_port_in(port, UART_MSR); } -static void serial8250_clear_IER(struct uart_8250_port *up) +/* + * Only to be used directly by the callback helper serial8250_console_write(), + * which may not require the port lock. Use serial8250_clear_IER() instead for + * all other cases. + */ +static void __serial8250_clear_IER(struct uart_8250_port *up) { if (up->capabilities & UART_CAP_UUE) serial_out(up, UART_IER, UART_IER_UUE); @@ -713,6 +718,14 @@ static void serial8250_clear_IER(struct uart_8250_port *up) serial_out(up, UART_IER, 0); } +static inline void serial8250_clear_IER(struct uart_8250_port *up) +{ + /* Port locked to synchronize UART_IER access against the console */ + lockdep_assert_held_once(&up->port.lock); + + __serial8250_clear_IER(up); +} + /* * This is a quickie test to see how big the FIFO is. * It doesn't work at all the time, more's the pity. @@ -1290,9 +1303,6 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p, bool toggle_ier) { unsigned char mcr = serial8250_in_MCR(p); - /* Port locked to synchronize UART_IER access against the console. */ - lockdep_assert_held_once(&p->port.lock); - if (p->port.rs485.flags & SER_RS485_RTS_AFTER_SEND) mcr |= UART_MCR_RTS; else @@ -1308,6 +1318,16 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p, bool toggle_ier) serial8250_clear_and_reinit_fifos(p); if (toggle_ier) { + /* + * Port locked to synchronize UART_IER access against + * the console. The lockdep_assert must be restricted + * to this condition because only here is it + * guaranteed that the port lock is held. The other + * hardware access in this function is synchronized + * by console ownership. + */ + lockdep_assert_held_once(&p->port.lock); + p->ier |= UART_IER_RLSI | UART_IER_RDI; serial_port_out(&p->port, UART_IER, p->ier); } @@ -3243,7 +3263,11 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults); static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) { + struct uart_8250_port *up = up_to_u8250p(port); + serial_port_out(port, UART_TX, ch); + + up->console_line_ended = (ch == '\n'); } static void serial8250_console_wait_putchar(struct uart_port *port, unsigned char ch) @@ -3280,11 +3304,22 @@ static void serial8250_console_restore(struct uart_8250_port *up) serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); } -static void fifo_wait_for_lsr(struct uart_8250_port *up, unsigned int count) +static void fifo_wait_for_lsr(struct uart_8250_port *up, + struct nbcon_write_context *wctxt, + unsigned int count) { unsigned int i; for (i = 0; i < count; i++) { + /* + * Pass the ownership as quickly as possible to a higher + * priority context. Otherwise, its attempt to take over + * the ownership might timeout. The new owner will wait + * for UART_LSR_THRE before reusing the fifo. + */ + if (!nbcon_can_proceed(wctxt)) + return; + if (wait_for_lsr(up, UART_LSR_THRE)) return; } @@ -3297,20 +3332,29 @@ static void fifo_wait_for_lsr(struct uart_8250_port *up, unsigned int count) * to get empty. */ static void serial8250_console_fifo_write(struct uart_8250_port *up, - const char *s, unsigned int count) + struct nbcon_write_context *wctxt) { - const char *end = s + count; unsigned int fifosize = up->tx_loadsz; struct uart_port *port = &up->port; + const char *s = wctxt->outbuf; + const char *end = s + wctxt->len; unsigned int tx_count = 0; bool cr_sent = false; unsigned int i; while (s != end) { /* Allow timeout for each byte of a possibly full FIFO */ - fifo_wait_for_lsr(up, fifosize); + fifo_wait_for_lsr(up, wctxt, fifosize); + /* + * Fill the FIFO. If a handover or takeover occurs, writing + * must be aborted since wctxt->outbuf and wctxt->len are no + * longer valid. + */ for (i = 0; i < fifosize && s != end; ++i) { + if (!nbcon_enter_unsafe(wctxt)) + return; + if (*s == '\n' && !cr_sent) { serial8250_console_putchar(port, '\r'); cr_sent = true; @@ -3318,6 +3362,8 @@ static void serial8250_console_fifo_write(struct uart_8250_port *up, serial8250_console_putchar(port, *s++); cr_sent = false; } + + nbcon_exit_unsafe(wctxt); } tx_count = i; } @@ -3326,39 +3372,57 @@ static void serial8250_console_fifo_write(struct uart_8250_port *up, * Allow timeout for each byte written since the caller will only wait * for UART_LSR_BOTH_EMPTY using the timeout of a single character */ - fifo_wait_for_lsr(up, tx_count); + fifo_wait_for_lsr(up, wctxt, tx_count); +} + +static void serial8250_console_byte_write(struct uart_8250_port *up, + struct nbcon_write_context *wctxt) +{ + struct uart_port *port = &up->port; + const char *s = wctxt->outbuf; + const char *end = s + wctxt->len; + + /* + * Write out the message. If a handover or takeover occurs, writing + * must be aborted since wctxt->outbuf and wctxt->len are no longer + * valid. + */ + while (s != end) { + if (!nbcon_enter_unsafe(wctxt)) + return; + + uart_console_write(port, s++, 1, serial8250_console_wait_putchar); + + nbcon_exit_unsafe(wctxt); + } } /* - * Print a string to the serial port trying not to disturb - * any possible real use of the port... + * Print a string to the serial port trying not to disturb + * any possible real use of the port... * - * The console_lock must be held when we get here. - * - * Doing runtime PM is really a bad idea for the kernel console. - * Thus, we assume the function is called when device is powered up. + * Doing runtime PM is really a bad idea for the kernel console. + * Thus, assume it is called when device is powered up. */ -void serial8250_console_write(struct uart_8250_port *up, const char *s, - unsigned int count) +void serial8250_console_write(struct uart_8250_port *up, + struct nbcon_write_context *wctxt, + bool is_atomic) { struct uart_8250_em485 *em485 = up->em485; struct uart_port *port = &up->port; - unsigned long flags; - unsigned int ier, use_fifo; - int locked = 1; + unsigned int ier; + bool use_fifo; - touch_nmi_watchdog(); - - if (oops_in_progress) - locked = uart_port_trylock_irqsave(port, &flags); - else - uart_port_lock_irqsave(port, &flags); + if (!nbcon_enter_unsafe(wctxt)) + return; /* - * First save the IER then disable the interrupts + * First, save the IER, then disable the interrupts. The special + * variant to clear the IER is used because console printing may + * occur without holding the port lock. */ ier = serial_port_in(port, UART_IER); - serial8250_clear_IER(up); + __serial8250_clear_IER(up); /* check scratch reg to see if port powered off during system sleep */ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { @@ -3372,6 +3436,18 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, mdelay(port->rs485.delay_rts_before_send); } + /* If ownership was lost, no writing is allowed */ + if (!nbcon_can_proceed(wctxt)) + goto skip_write; + + /* + * If console printer did not fully output the previous line, it must + * have been handed or taken over. Insert a newline in order to + * maintain clean output. + */ + if (!up->console_line_ended) + uart_console_write(port, "\n", 1, serial8250_console_wait_putchar); + use_fifo = (up->capabilities & UART_CAP_FIFO) && /* * BCM283x requires to check the fifo @@ -3392,10 +3468,23 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, */ !(up->port.flags & UPF_CONS_FLOW); + nbcon_exit_unsafe(wctxt); + if (likely(use_fifo)) - serial8250_console_fifo_write(up, s, count); + serial8250_console_fifo_write(up, wctxt); else - uart_console_write(port, s, count, serial8250_console_wait_putchar); + serial8250_console_byte_write(up, wctxt); +skip_write: + /* + * If ownership was lost, this context must reacquire ownership and + * re-enter the unsafe section in order to perform final actions + * (such as re-enabling interrupts). + */ + if (!nbcon_can_proceed(wctxt)) { + do { + nbcon_reacquire_nobuf(wctxt); + } while (!nbcon_enter_unsafe(wctxt)); + } /* * Finally, wait for transmitter to become empty @@ -3418,11 +3507,18 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, * call it if we have saved something in the saved flags * while processing with interrupts off. */ - if (up->msr_saved_flags) - serial8250_modem_status(up); + if (up->msr_saved_flags) { + /* + * For atomic, it must be deferred to irq_work because this + * may be a context that does not permit waking up tasks. + */ + if (is_atomic) + irq_work_queue(&up->modem_status_work); + else + serial8250_modem_status(up); + } - if (locked) - uart_port_unlock_irqrestore(port, flags); + nbcon_exit_unsafe(wctxt); } static unsigned int probe_baud(struct uart_port *port) @@ -3440,8 +3536,24 @@ static unsigned int probe_baud(struct uart_port *port) return (port->uartclk / 16) / quot; } +/* + * irq_work handler to perform modem control. Only triggered via + * ->write_atomic() callback because it may be in a scheduler or + * NMI context, unable to wake tasks. + */ +static void modem_status_handler(struct irq_work *iwp) +{ + struct uart_8250_port *up = container_of(iwp, struct uart_8250_port, modem_status_work); + struct uart_port *port = &up->port; + + uart_port_lock(port); + serial8250_modem_status(up); + uart_port_unlock(port); +} + int serial8250_console_setup(struct uart_port *port, char *options, bool probe) { + struct uart_8250_port *up = up_to_u8250p(port); int baud = 9600; int bits = 8; int parity = 'n'; @@ -3451,6 +3563,9 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe) if (!port->iobase && !port->membase) return -ENODEV; + up->console_line_ended = true; + init_irq_work(&up->modem_status_work, modem_status_handler); + if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); else if (probe) diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 01efdce0fda07..c10b31a79fe61 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -150,8 +150,17 @@ struct uart_8250_port { #define LSR_SAVE_FLAGS UART_LSR_BRK_ERROR_BITS u16 lsr_saved_flags; u16 lsr_save_mask; + + /* + * Track when a console line has been fully written to the + * hardware, i.e. true when the most recent byte written to + * UART_TX by the console was '\n'. + */ + bool console_line_ended; + #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA unsigned char msr_saved_flags; + struct irq_work modem_status_work; struct uart_8250_dma *dma; const struct uart_8250_ops *ops; @@ -202,8 +211,8 @@ void serial8250_tx_chars(struct uart_8250_port *up); unsigned int serial8250_modem_status(struct uart_8250_port *up); void serial8250_init_port(struct uart_8250_port *up); void serial8250_set_defaults(struct uart_8250_port *up); -void serial8250_console_write(struct uart_8250_port *up, const char *s, - unsigned int count); +void serial8250_console_write(struct uart_8250_port *up, + struct nbcon_write_context *wctxt, bool in_atomic); int serial8250_console_setup(struct uart_port *port, char *options, bool probe); int serial8250_console_exit(struct uart_port *port); diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 54ea59ff8fbeb..da326800c1c9b 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -103,6 +103,19 @@ config PREEMPT_RT Select this if you are building a kernel for systems which require real-time guarantees. +config PREEMPT_RT_NEEDS_BH_LOCK + bool "Enforce softirq synchronisation on PREEMPT_RT" + depends on PREEMPT_RT + help + Enforce synchronisation across the softirqs context. On PREEMPT_RT + the softirq is preemptible. This enforces the same per-CPU BLK + semantic non-PREEMPT_RT builds have. This should not be needed + because per-CPU locks were added to avoid the per-CPU BKL. + + This switch provides the old behaviour for testing reasons. Select + this if you suspect an error with preemptible softirq and want test + the old synchronized behaviour. + config PREEMPT_COUNT bool diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index eefb67d9883c2..20ac8a5ac27d7 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -188,6 +188,15 @@ KERNEL_ATTR_RO(crash_elfcorehdr_size); #endif /* CONFIG_VMCORE_INFO */ +#if defined(CONFIG_PREEMPT_RT) +static ssize_t realtime_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", 1); +} +KERNEL_ATTR_RO(realtime); +#endif + /* whether file capabilities are enabled */ static ssize_t fscaps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -271,6 +280,9 @@ static struct attribute * kernel_attrs[] = { #ifndef CONFIG_TINY_RCU &rcu_expedited_attr.attr, &rcu_normal_attr.attr, +#endif +#ifdef CONFIG_PREEMPT_RT + &realtime_attr.attr, #endif NULL }; diff --git a/kernel/softirq.c b/kernel/softirq.c index 513b1945987cc..77198911b8dd4 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -165,7 +165,11 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) /* First entry of a task into a BH disabled section? */ if (!current->softirq_disable_cnt) { if (preemptible()) { - local_lock(&softirq_ctrl.lock); + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) + local_lock(&softirq_ctrl.lock); + else + migrate_disable(); + /* Required to meet the RCU bottomhalf requirements. */ rcu_read_lock(); } else { @@ -177,17 +181,34 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) * Track the per CPU softirq disabled state. On RT this is per CPU * state to allow preemption of bottom half disabled sections. */ - newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt); - /* - * Reflect the result in the task state to prevent recursion on the - * local lock and to make softirq_count() & al work. - */ - current->softirq_disable_cnt = newcnt; + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) { + newcnt = this_cpu_add_return(softirq_ctrl.cnt, cnt); + /* + * Reflect the result in the task state to prevent recursion on the + * local lock and to make softirq_count() & al work. + */ + current->softirq_disable_cnt = newcnt; - if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) { - raw_local_irq_save(flags); - lockdep_softirqs_off(ip); - raw_local_irq_restore(flags); + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) { + raw_local_irq_save(flags); + lockdep_softirqs_off(ip); + raw_local_irq_restore(flags); + } + } else { + bool sirq_dis = false; + + if (!current->softirq_disable_cnt) + sirq_dis = true; + + this_cpu_add(softirq_ctrl.cnt, cnt); + current->softirq_disable_cnt += cnt; + WARN_ON_ONCE(current->softirq_disable_cnt < 0); + + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_dis) { + raw_local_irq_save(flags); + lockdep_softirqs_off(ip); + raw_local_irq_restore(flags); + } } } EXPORT_SYMBOL(__local_bh_disable_ip); @@ -195,23 +216,42 @@ EXPORT_SYMBOL(__local_bh_disable_ip); static void __local_bh_enable(unsigned int cnt, bool unlock) { unsigned long flags; + bool sirq_en = false; int newcnt; - DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt != - this_cpu_read(softirq_ctrl.cnt)); + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) { + DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt != + this_cpu_read(softirq_ctrl.cnt)); + if (softirq_count() == cnt) + sirq_en = true; + } else { + if (current->softirq_disable_cnt == cnt) + sirq_en = true; + } - if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) { + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_en) { raw_local_irq_save(flags); lockdep_softirqs_on(_RET_IP_); raw_local_irq_restore(flags); } - newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt); - current->softirq_disable_cnt = newcnt; + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) { + newcnt = this_cpu_sub_return(softirq_ctrl.cnt, cnt); + current->softirq_disable_cnt = newcnt; - if (!newcnt && unlock) { - rcu_read_unlock(); - local_unlock(&softirq_ctrl.lock); + if (!newcnt && unlock) { + rcu_read_unlock(); + local_unlock(&softirq_ctrl.lock); + } + } else { + current->softirq_disable_cnt -= cnt; + this_cpu_sub(softirq_ctrl.cnt, cnt); + if (unlock && !current->softirq_disable_cnt) { + migrate_enable(); + rcu_read_unlock(); + } else { + WARN_ON_ONCE(current->softirq_disable_cnt < 0); + } } } @@ -228,7 +268,10 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) lock_map_release(&bh_lock_map); local_irq_save(flags); - curcnt = __this_cpu_read(softirq_ctrl.cnt); + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) + curcnt = this_cpu_read(softirq_ctrl.cnt); + else + curcnt = current->softirq_disable_cnt; /* * If this is not reenabling soft interrupts, no point in trying to @@ -805,6 +848,58 @@ static bool tasklet_clear_sched(struct tasklet_struct *t) return false; } +#ifdef CONFIG_PREEMPT_RT +struct tasklet_sync_callback { + spinlock_t cb_lock; + atomic_t cb_waiters; +}; + +static DEFINE_PER_CPU(struct tasklet_sync_callback, tasklet_sync_callback) = { + .cb_lock = __SPIN_LOCK_UNLOCKED(tasklet_sync_callback.cb_lock), + .cb_waiters = ATOMIC_INIT(0), +}; + +static void tasklet_lock_callback(void) +{ + spin_lock(this_cpu_ptr(&tasklet_sync_callback.cb_lock)); +} + +static void tasklet_unlock_callback(void) +{ + spin_unlock(this_cpu_ptr(&tasklet_sync_callback.cb_lock)); +} + +static void tasklet_callback_cancel_wait_running(void) +{ + struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback); + + atomic_inc(&sync_cb->cb_waiters); + spin_lock(&sync_cb->cb_lock); + atomic_dec(&sync_cb->cb_waiters); + spin_unlock(&sync_cb->cb_lock); +} + +static void tasklet_callback_sync_wait_running(void) +{ + struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback); + + if (atomic_read(&sync_cb->cb_waiters)) { + spin_unlock(&sync_cb->cb_lock); + spin_lock(&sync_cb->cb_lock); + } +} + +#else /* !CONFIG_PREEMPT_RT: */ + +static void tasklet_lock_callback(void) { } +static void tasklet_unlock_callback(void) { } +static void tasklet_callback_sync_wait_running(void) { } + +#ifdef CONFIG_SMP +static void tasklet_callback_cancel_wait_running(void) { } +#endif +#endif /* !CONFIG_PREEMPT_RT */ + static void tasklet_action_common(struct tasklet_head *tl_head, unsigned int softirq_nr) { @@ -816,6 +911,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head, tl_head->tail = &tl_head->head; local_irq_enable(); + tasklet_lock_callback(); while (list) { struct tasklet_struct *t = list; @@ -835,6 +931,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head, } } tasklet_unlock(t); + tasklet_callback_sync_wait_running(); continue; } tasklet_unlock(t); @@ -847,6 +944,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head, __raise_softirq_irqoff(softirq_nr); local_irq_enable(); } + tasklet_unlock_callback(); } static __latent_entropy void tasklet_action(void) @@ -897,12 +995,9 @@ void tasklet_unlock_spin_wait(struct tasklet_struct *t) /* * Prevent a live lock when current preempted soft * interrupt processing or prevents ksoftirqd from - * running. If the tasklet runs on a different CPU - * then this has no effect other than doing the BH - * disable/enable dance for nothing. + * running. */ - local_bh_disable(); - local_bh_enable(); + tasklet_callback_cancel_wait_running(); } else { cpu_relax(); } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c6b79b3675c31..d6c94ee8edfc5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -222,7 +222,9 @@ struct worker_pool { struct workqueue_attrs *attrs; /* I: worker attributes */ struct hlist_node hash_node; /* PL: unbound_pool_hash node */ int refcnt; /* PL: refcnt for unbound pools */ - +#ifdef CONFIG_PREEMPT_RT + spinlock_t cb_lock; /* BH worker cancel lock */ +#endif /* * Destruction of pool is RCU protected to allow dereferences * from get_work_pool(). @@ -3078,6 +3080,31 @@ __acquires(&pool->lock) goto restart; } +#ifdef CONFIG_PREEMPT_RT +static void worker_lock_callback(struct worker_pool *pool) +{ + spin_lock(&pool->cb_lock); +} + +static void worker_unlock_callback(struct worker_pool *pool) +{ + spin_unlock(&pool->cb_lock); +} + +static void workqueue_callback_cancel_wait_running(struct worker_pool *pool) +{ + spin_lock(&pool->cb_lock); + spin_unlock(&pool->cb_lock); +} + +#else + +static void worker_lock_callback(struct worker_pool *pool) { } +static void worker_unlock_callback(struct worker_pool *pool) { } +static void workqueue_callback_cancel_wait_running(struct worker_pool *pool) { } + +#endif + /** * manage_workers - manage worker pool * @worker: self @@ -3557,6 +3584,7 @@ static void bh_worker(struct worker *worker) int nr_restarts = BH_WORKER_RESTARTS; unsigned long end = jiffies + BH_WORKER_JIFFIES; + worker_lock_callback(pool); raw_spin_lock_irq(&pool->lock); worker_leave_idle(worker); @@ -3585,6 +3613,7 @@ static void bh_worker(struct worker *worker) worker_enter_idle(worker); kick_pool(pool); raw_spin_unlock_irq(&pool->lock); + worker_unlock_callback(pool); } /* @@ -4222,17 +4251,17 @@ static bool __flush_work(struct work_struct *work, bool from_cancel) (data & WORK_OFFQ_BH)) { /* * On RT, prevent a live lock when %current preempted - * soft interrupt processing or prevents ksoftirqd from - * running by keeping flipping BH. If the BH work item - * runs on a different CPU then this has no effect other - * than doing the BH disable/enable dance for nothing. - * This is copied from - * kernel/softirq.c::tasklet_unlock_spin_wait(). + * soft interrupt processing by blocking on lock which + * is owned by the thread invoking the callback. */ while (!try_wait_for_completion(&barr.done)) { if (IS_ENABLED(CONFIG_PREEMPT_RT)) { - local_bh_disable(); - local_bh_enable(); + struct worker_pool *pool; + + guard(rcu)(); + pool = get_work_pool(work); + if (pool) + workqueue_callback_cancel_wait_running(pool); } else { cpu_relax(); } @@ -4782,6 +4811,9 @@ static int init_worker_pool(struct worker_pool *pool) ida_init(&pool->worker_ida); INIT_HLIST_NODE(&pool->hash_node); pool->refcnt = 1; +#ifdef CONFIG_PREEMPT_RT + spin_lock_init(&pool->cb_lock); +#endif /* shouldn't fail above this point */ pool->attrs = alloc_workqueue_attrs(); diff --git a/localversion-rt b/localversion-rt new file mode 100644 index 0000000000000..0efe7ba1930e1 --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ +-rt5 diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index ff8e5b64bf6b7..b43911562f4d1 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -8,11 +8,13 @@ struct gro_cell { struct sk_buff_head napi_skbs; struct napi_struct napi; + local_lock_t bh_lock; }; int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) { struct net_device *dev = skb->dev; + bool have_bh_lock = false; struct gro_cell *cell; int res; @@ -25,6 +27,8 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) goto unlock; } + local_lock_nested_bh(&gcells->cells->bh_lock); + have_bh_lock = true; cell = this_cpu_ptr(gcells->cells); if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(net_hotdata.max_backlog)) { @@ -39,6 +43,9 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); + if (have_bh_lock) + local_unlock_nested_bh(&gcells->cells->bh_lock); + res = NET_RX_SUCCESS; unlock: @@ -54,6 +61,7 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) struct sk_buff *skb; int work_done = 0; + __local_lock_nested_bh(&cell->bh_lock); while (work_done < budget) { skb = __skb_dequeue(&cell->napi_skbs); if (!skb) @@ -64,6 +72,7 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) if (work_done < budget) napi_complete_done(napi, work_done); + __local_unlock_nested_bh(&cell->bh_lock); return work_done; } @@ -79,6 +88,7 @@ int gro_cells_init(struct gro_cells *gcells, struct net_device *dev) struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); __skb_queue_head_init(&cell->napi_skbs); + local_lock_init(&cell->bh_lock); set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 50fd6809380fa..3a04665adf992 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -60,7 +60,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("List and change connection tracking table"); struct ctnetlink_list_dump_ctx { - struct nf_conn *last; + unsigned long last_id; unsigned int cpu; bool done; }; @@ -1733,16 +1733,6 @@ static int ctnetlink_get_conntrack(struct sk_buff *skb, return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); } -static int ctnetlink_done_list(struct netlink_callback *cb) -{ - struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; - - if (ctx->last) - nf_ct_put(ctx->last); - - return 0; -} - #ifdef CONFIG_NF_CONNTRACK_EVENTS static int ctnetlink_dump_one_entry(struct sk_buff *skb, struct netlink_callback *cb, @@ -1757,11 +1747,11 @@ static int ctnetlink_dump_one_entry(struct sk_buff *skb, if (l3proto && nf_ct_l3num(ct) != l3proto) return 0; - if (ctx->last) { - if (ct != ctx->last) + if (ctx->last_id) { + if (ctnetlink_get_id(ct) != ctx->last_id) return 0; - ctx->last = NULL; + ctx->last_id = 0; } /* We can't dump extension info for the unconfirmed @@ -1775,12 +1765,8 @@ static int ctnetlink_dump_one_entry(struct sk_buff *skb, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct, dying, 0); - if (res < 0) { - if (!refcount_inc_not_zero(&ct->ct_general.use)) - return 0; - - ctx->last = ct; - } + if (res < 0) + ctx->last_id = ctnetlink_get_id(ct); return res; } @@ -1796,10 +1782,10 @@ static int ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) { struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; - struct nf_conn *last = ctx->last; #ifdef CONFIG_NF_CONNTRACK_EVENTS const struct net *net = sock_net(skb->sk); struct nf_conntrack_net_ecache *ecache_net; + unsigned long last_id = ctx->last_id; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; #endif @@ -1807,7 +1793,7 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) if (ctx->done) return 0; - ctx->last = NULL; + ctx->last_id = 0; #ifdef CONFIG_NF_CONNTRACK_EVENTS ecache_net = nf_conn_pernet_ecache(net); @@ -1818,24 +1804,21 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) int res; ct = nf_ct_tuplehash_to_ctrack(h); - if (last && last != ct) + if (last_id && last_id != ctnetlink_get_id(ct)) continue; res = ctnetlink_dump_one_entry(skb, cb, ct, true); if (res < 0) { spin_unlock_bh(&ecache_net->dying_lock); - nf_ct_put(last); return skb->len; } - nf_ct_put(last); - last = NULL; + last_id = 0; } spin_unlock_bh(&ecache_net->dying_lock); #endif ctx->done = true; - nf_ct_put(last); return skb->len; } @@ -1847,7 +1830,6 @@ static int ctnetlink_get_ct_dying(struct sk_buff *skb, if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnetlink_dump_dying, - .done = ctnetlink_done_list, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } @@ -1862,7 +1844,6 @@ static int ctnetlink_get_ct_unconfirmed(struct sk_buff *skb, if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnetlink_dump_unconfirmed, - .done = ctnetlink_done_list, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 793790d79d138..337daa777c353 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -397,7 +397,7 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, } /** - * pipapo_get() - Get matching element reference given key data + * pipapo_get_slow() - Get matching element reference given key data * @m: storage containing the set elements * @data: Key data to be matched against existing elements * @genmask: If set, check that element is active in given genmask @@ -414,12 +414,12 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, * * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise. */ -static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, - const u8 *data, u8 genmask, - u64 tstamp) +static struct nft_pipapo_elem *pipapo_get_slow(const struct nft_pipapo_match *m, + const u8 *data, u8 genmask, + u64 tstamp) { + unsigned long *res_map, *fill_map, *map; struct nft_pipapo_scratch *scratch; - unsigned long *res_map, *fill_map; const struct nft_pipapo_field *f; bool map_index; int i; @@ -429,11 +429,13 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, scratch = *raw_cpu_ptr(m->scratch); if (unlikely(!scratch)) goto out; + __local_lock_nested_bh(&scratch->bh_lock); map_index = scratch->map_index; - res_map = scratch->map + (map_index ? m->bsize_max : 0); - fill_map = scratch->map + (map_index ? 0 : m->bsize_max); + map = NFT_PIPAPO_LT_ALIGN(&scratch->__map[0]); + res_map = map + (map_index ? m->bsize_max : 0); + fill_map = map + (map_index ? 0 : m->bsize_max); pipapo_resmap_init(m, res_map); @@ -464,6 +466,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, last); if (b < 0) { scratch->map_index = map_index; + __local_unlock_nested_bh(&scratch->bh_lock); local_bh_enable(); return NULL; @@ -483,6 +486,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, * *next* bitmap (not initial) for the next packet. */ scratch->map_index = map_index; + __local_unlock_nested_bh(&scratch->bh_lock); local_bh_enable(); return e; } @@ -497,11 +501,47 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, data += NFT_PIPAPO_GROUPS_PADDING(f); } + __local_unlock_nested_bh(&scratch->bh_lock); out: local_bh_enable(); return NULL; } +/** + * pipapo_get() - Get matching element reference given key data + * @m: Storage containing the set elements + * @data: Key data to be matched against existing elements + * @genmask: If set, check that element is active in given genmask + * @tstamp: Timestamp to check for expired elements + * + * This is a dispatcher function, either calling out the generic C + * implementation or, if available, the AVX2 one. + * This helper is only called from the control plane, with either RCU + * read lock or transaction mutex held. + * + * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise. + */ +static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, + const u8 *data, u8 genmask, + u64 tstamp) +{ + struct nft_pipapo_elem *e; + + local_bh_disable(); + +#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) + if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX) && + irq_fpu_usable()) { + e = pipapo_get_avx2(m, data, genmask, tstamp); + local_bh_enable(); + return e; + } +#endif + e = pipapo_get_slow(m, data, genmask, tstamp); + local_bh_enable(); + return e; +} + /** * nft_pipapo_lookup() - Dataplane fronted for main lookup function * @net: Network namespace @@ -510,8 +550,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, * * This function is called from the data path. It will search for * an element matching the given key in the current active copy. - * Unlike other set types, this uses NFT_GENMASK_ANY instead of - * nft_genmask_cur(). + * Unlike other set types, this uses 0 instead of nft_genmask_cur(). * * This is because new (future) elements are not reachable from * priv->match, they get added to priv->clone instead. @@ -521,8 +560,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, * inconsistent state: matching old entries get skipped but thew * newly matching entries are unreachable. * - * GENMASK will still find the 'now old' entries which ensures consistent - * priv->match view. + * GENMASK_ANY doesn't work for the same reason: old-gen entries get + * skipped, new-gen entries are only reachable from priv->clone. * * nft_pipapo_commit swaps ->clone and ->match shortly after the * genbit flip. As ->clone doesn't contain the old entries in the first @@ -539,7 +578,7 @@ nft_pipapo_lookup(const struct net *net, const struct nft_set *set, const struct nft_pipapo_elem *e; m = rcu_dereference(priv->match); - e = pipapo_get(m, (const u8 *)key, NFT_GENMASK_ANY, get_jiffies_64()); + e = pipapo_get_slow(m, (const u8 *)key, 0, get_jiffies_64()); return e ? &e->ext : NULL; } @@ -1152,22 +1191,17 @@ static void pipapo_map(struct nft_pipapo_match *m, } /** - * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address + * pipapo_free_scratch() - Free per-CPU map at original address * @m: Matching data * @cpu: CPU number */ static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu) { struct nft_pipapo_scratch *s; - void *mem; s = *per_cpu_ptr(m->scratch, cpu); - if (!s) - return; - mem = s; - mem -= s->align_off; - kvfree(mem); + kvfree(s); } /** @@ -1184,11 +1218,8 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, for_each_possible_cpu(i) { struct nft_pipapo_scratch *scratch; -#ifdef NFT_PIPAPO_ALIGN - void *scratch_aligned; - u32 align_off; -#endif - scratch = kvzalloc_node(struct_size(scratch, map, bsize_max * 2) + + + scratch = kvzalloc_node(struct_size(scratch, __map, bsize_max * 2) + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL_ACCOUNT, cpu_to_node(i)); if (!scratch) { @@ -1203,23 +1234,7 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, } pipapo_free_scratch(clone, i); - -#ifdef NFT_PIPAPO_ALIGN - /* Align &scratch->map (not the struct itself): the extra - * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node() - * above guarantee we can waste up to those bytes in order - * to align the map field regardless of its offset within - * the struct. - */ - BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM); - - scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map); - scratch_aligned -= offsetof(struct nft_pipapo_scratch, map); - align_off = scratch_aligned - (void *)scratch; - - scratch = scratch_aligned; - scratch->align_off = align_off; -#endif + local_lock_init(&scratch->bh_lock); *per_cpu_ptr(clone->scratch, i) = scratch; } diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 4a2ff85ce1c43..eaab422aa56ab 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -124,14 +124,14 @@ struct nft_pipapo_field { /** * struct nft_pipapo_scratch - percpu data used for lookup and matching + * @bh_lock: PREEMPT_RT local spinlock * @map_index: Current working bitmap index, toggled between field matches - * @align_off: Offset to get the originally allocated address - * @map: store partial matching results during lookup + * @__map: store partial matching results during lookup */ struct nft_pipapo_scratch { + local_lock_t bh_lock; u8 map_index; - u32 align_off; - unsigned long map[]; + unsigned long __map[]; }; /** diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index c0884fa68c798..9235efb10dd52 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1133,75 +1133,59 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns } /** - * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation - * @net: Network namespace - * @set: nftables API set representation - * @key: nftables API element representation containing key data + * pipapo_get_avx2() - Lookup function for AVX2 implementation + * @m: Storage containing the set elements + * @data: Key data to be matched against existing elements + * @genmask: If set, check that element is active in given genmask + * @tstamp: Timestamp to check for expired elements * * For more details, see DOC: Theory of Operation in nft_set_pipapo.c. * * This implementation exploits the repetitive characteristic of the algorithm * to provide a fast, vectorised version using the AVX2 SIMD instruction set. * - * Return: true on match, false otherwise. + * The caller must check that the FPU is usable. + * This function must be called with BH disabled. + * + * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise. */ -const struct nft_set_ext * -nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, - const u32 *key) +struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, + const u8 *data, u8 genmask, + u64 tstamp) { - struct nft_pipapo *priv = nft_set_priv(set); - const struct nft_set_ext *ext = NULL; struct nft_pipapo_scratch *scratch; - const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; - const u8 *rp = (const u8 *)key; - unsigned long *res, *fill; + unsigned long *res, *fill, *map; bool map_index; int i; - local_bh_disable(); + scratch = *raw_cpu_ptr(m->scratch); + if (unlikely(!scratch)) + return NULL; - if (unlikely(!irq_fpu_usable())) { - ext = nft_pipapo_lookup(net, set, key); + __local_lock_nested_bh(&scratch->bh_lock); + map_index = scratch->map_index; + map = NFT_PIPAPO_LT_ALIGN(&scratch->__map[0]); + res = map + (map_index ? m->bsize_max : 0); + fill = map + (map_index ? 0 : m->bsize_max); - local_bh_enable(); - return ext; - } + pipapo_resmap_init_avx2(m, res); - m = rcu_dereference(priv->match); - - /* This also protects access to all data related to scratch maps. - * - * Note that we don't need a valid MXCSR state for any of the + /* Note that we don't need a valid MXCSR state for any of the * operations we use here, so pass 0 as mask and spare a LDMXCSR * instruction. */ kernel_fpu_begin_mask(0); - scratch = *raw_cpu_ptr(m->scratch); - if (unlikely(!scratch)) { - kernel_fpu_end(); - local_bh_enable(); - return NULL; - } - - map_index = scratch->map_index; - - res = scratch->map + (map_index ? m->bsize_max : 0); - fill = scratch->map + (map_index ? 0 : m->bsize_max); - - pipapo_resmap_init_avx2(m, res); - nft_pipapo_avx2_prepare(); -next_match: nft_pipapo_for_each_field(f, i, m) { bool last = i == m->field_count - 1, first = !i; int ret = 0; #define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \ (ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \ - ret, rp, \ + ret, data, \ first, last)) if (likely(f->bb == 8)) { @@ -1217,7 +1201,7 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16); } else { ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f, - ret, rp, + ret, data, first, last); } } else { @@ -1233,7 +1217,7 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32); } else { ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f, - ret, rp, + ret, data, first, last); } } @@ -1241,28 +1225,78 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, #undef NFT_SET_PIPAPO_AVX2_LOOKUP - if (ret < 0) - goto out; - - if (last) { - const struct nft_set_ext *e = &f->mt[ret].e->ext; - - if (unlikely(nft_set_elem_expired(e))) - goto next_match; - - ext = e; - goto out; +next_match: + if (ret < 0) { + scratch->map_index = map_index; + kernel_fpu_end(); + __local_unlock_nested_bh(&scratch->bh_lock); + return NULL; } + if (last) { + struct nft_pipapo_elem *e; + + e = f->mt[ret].e; + if (unlikely(__nft_set_elem_expired(&e->ext, tstamp) || + !nft_set_elem_active(&e->ext, genmask))) { + ret = pipapo_refill(res, f->bsize, f->rules, + fill, f->mt, last); + goto next_match; + } + + scratch->map_index = map_index; + kernel_fpu_end(); + __local_unlock_nested_bh(&scratch->bh_lock); + return e; + } + + map_index = !map_index; swap(res, fill); - rp += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); + data += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); } -out: - if (i % 2) - scratch->map_index = !map_index; kernel_fpu_end(); + __local_unlock_nested_bh(&scratch->bh_lock); + return NULL; +} + +/** + * nft_pipapo_avx2_lookup() - Dataplane frontend for AVX2 implementation + * @net: Network namespace + * @set: nftables API set representation + * @key: nftables API element representation containing key data + * + * This function is called from the data path. It will search for + * an element matching the given key in the current active copy using + * the AVX2 routines if the FPU is usable or fall back to the generic + * implementation of the algorithm otherwise. + * + * Return: nftables API extension pointer or NULL if no match. + */ +const struct nft_set_ext * +nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + const u32 *key) +{ + struct nft_pipapo *priv = nft_set_priv(set); + const struct nft_pipapo_match *m; + const u8 *rp = (const u8 *)key; + const struct nft_pipapo_elem *e; + + local_bh_disable(); + + if (unlikely(!irq_fpu_usable())) { + const struct nft_set_ext *ext; + + ext = nft_pipapo_lookup(net, set, key); + + local_bh_enable(); + return ext; + } + + m = rcu_dereference(priv->match); + + e = pipapo_get_avx2(m, rp, 0, get_jiffies_64()); local_bh_enable(); - return ext; + return e ? &e->ext : NULL; } diff --git a/net/netfilter/nft_set_pipapo_avx2.h b/net/netfilter/nft_set_pipapo_avx2.h index dbb6aaca8a7ad..c2999b63da3f3 100644 --- a/net/netfilter/nft_set_pipapo_avx2.h +++ b/net/netfilter/nft_set_pipapo_avx2.h @@ -5,8 +5,12 @@ #include #define NFT_PIPAPO_ALIGN (XSAVE_YMM_SIZE / BITS_PER_BYTE) +struct nft_pipapo_match; bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est); +struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, + const u8 *data, u8 genmask, + u64 tstamp); #endif /* defined(CONFIG_X86_64) && !defined(CONFIG_UML) */ #endif /* _NFT_SET_PIPAPO_AVX2_H */