Index: linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst
===================================================================
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:67 @ Brief summary of control files.
 				     threads
  cgroup.procs			     show list of processes
  cgroup.event_control		     an interface for event_fd()
+				     This knob is not available on CONFIG_PREEMPT_RT systems.
  memory.usage_in_bytes		     show current usage for memory
 				     (See 5.5 for details)
  memory.memsw.usage_in_bytes	     show current usage for memory+Swap
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:79 @ Brief summary of control files.
  memory.max_usage_in_bytes	     show max memory usage recorded
  memory.memsw.max_usage_in_bytes     show max memory+Swap usage recorded
  memory.soft_limit_in_bytes	     set/show soft limit of memory usage
+				     This knob is not available on CONFIG_PREEMPT_RT systems.
  memory.stat			     show various statistics
  memory.use_hierarchy		     set/show hierarchical account enabled
                                      This knob is deprecated and shouldn't be
Index: linux-5.15/Documentation/dev-tools/kcov.rst
===================================================================
--- linux-5.15.orig/Documentation/dev-tools/kcov.rst
+++ linux-5.15/Documentation/dev-tools/kcov.rst
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:53 @ program using kcov:
     #include <sys/mman.h>
     #include <unistd.h>
     #include <fcntl.h>
+    #include <linux/types.h>
 
     #define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
     #define KCOV_ENABLE			_IO('c', 100)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:181 @ Comparison operands collection is simila
 	/* Read number of comparisons collected. */
 	n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
 	for (i = 0; i < n; i++) {
+		uint64_t ip;
+
 		type = cover[i * KCOV_WORDS_PER_CMP + 1];
 		/* arg1 and arg2 - operands of the comparison. */
 		arg1 = cover[i * KCOV_WORDS_PER_CMP + 2];
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:257 @ selectively from different subsystems.
 
 .. code-block:: c
 
+    /* Same includes and defines as above. */
+
     struct kcov_remote_arg {
 	__u32		trace_mode;
 	__u32		area_size;
Index: linux-5.15/arch/alpha/include/asm/spinlock_types.h
===================================================================
--- linux-5.15.orig/arch/alpha/include/asm/spinlock_types.h
+++ linux-5.15/arch/alpha/include/asm/spinlock_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5 @
 #ifndef _ALPHA_SPINLOCK_TYPES_H
 #define _ALPHA_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
 # error "please don't include this file directly"
 #endif
 
Index: linux-5.15/arch/arm/Kconfig
===================================================================
--- linux-5.15.orig/arch/arm/Kconfig
+++ linux-5.15/arch/arm/Kconfig
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:35 @ config ARM
 	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
+	select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_USE_MEMTEST
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:72 @ config ARM
 	select HARDIRQS_SW_RESEND
 	select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
 	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
-	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
+	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT
 	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
 	select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
 	select HAVE_ARCH_MMAP_RND_BITS if MMU
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:113 @ config ARM
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_PREEMPT_LAZY
 	select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RSEQ
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:129 @ config ARM
 	select OLD_SIGSUSPEND3
 	select PCI_SYSCALL if PCI
 	select PERF_USE_VMALLOC
+	select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
 	select RTC_LIB
 	select SYS_SUPPORTS_APM_EMULATION
 	select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
Index: linux-5.15/arch/arm/include/asm/spinlock_types.h
===================================================================
--- linux-5.15.orig/arch/arm/include/asm/spinlock_types.h
+++ linux-5.15/arch/arm/include/asm/spinlock_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5 @
 #ifndef __ASM_SPINLOCK_TYPES_H
 #define __ASM_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
 # error "please don't include this file directly"
 #endif
 
Index: linux-5.15/arch/arm/include/asm/thread_info.h
===================================================================
--- linux-5.15.orig/arch/arm/include/asm/thread_info.h
+++ linux-5.15/arch/arm/include/asm/thread_info.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:55 @ struct cpu_context_save {
 struct thread_info {
 	unsigned long		flags;		/* low level flags */
 	int			preempt_count;	/* 0 => preemptable, <0 => bug */
+	int			preempt_lazy_count; /* 0 => preemptable, <0 => bug */
 	struct task_struct	*task;		/* main task structure */
 	__u32			cpu;		/* cpu */
 	__u32			cpu_domain;	/* cpu domain */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:134 @ extern int vfp_restore_user_hwstate(stru
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
 #define TIF_UPROBE		3	/* breakpointed or singlestepping */
 #define TIF_NOTIFY_SIGNAL	4	/* signal notifications exist */
+#define TIF_NEED_RESCHED_LAZY	9
 
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:154 @ extern int vfp_restore_user_hwstate(stru
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
 #define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
 
 /* Checks for any syscall work in entry-common.S */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:164 @ extern int vfp_restore_user_hwstate(stru
 /*
  * Change these and you break ASM code in entry-common.S
  */
-#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
+				 _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
 				 _TIF_NOTIFY_SIGNAL)
 
Index: linux-5.15/arch/arm/kernel/asm-offsets.c
===================================================================
--- linux-5.15.orig/arch/arm/kernel/asm-offsets.c
+++ linux-5.15/arch/arm/kernel/asm-offsets.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:46 @ int main(void)
   BLANK();
   DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags));
   DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
+  DEFINE(TI_PREEMPT_LAZY,	offsetof(struct thread_info, preempt_lazy_count));
   DEFINE(TI_TASK,		offsetof(struct thread_info, task));
   DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
   DEFINE(TI_CPU_DOMAIN,		offsetof(struct thread_info, cpu_domain));
Index: linux-5.15/arch/arm/kernel/entry-armv.S
===================================================================
--- linux-5.15.orig/arch/arm/kernel/entry-armv.S
+++ linux-5.15/arch/arm/kernel/entry-armv.S
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:209 @ __irq_svc:
 
 #ifdef CONFIG_PREEMPTION
 	ldr	r8, [tsk, #TI_PREEMPT]		@ get preempt count
-	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
 	teq	r8, #0				@ if preempt count != 0
+	bne	1f				@ return from exeption
+	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
+	tst	r0, #_TIF_NEED_RESCHED		@ if NEED_RESCHED is set
+	blne	svc_preempt			@ preempt!
+
+	ldr	r8, [tsk, #TI_PREEMPT_LAZY]	@ get preempt lazy count
+	teq	r8, #0				@ if preempt lazy count != 0
 	movne	r0, #0				@ force flags to 0
-	tst	r0, #_TIF_NEED_RESCHED
+	tst	r0, #_TIF_NEED_RESCHED_LAZY
 	blne	svc_preempt
+1:
 #endif
 
 	svc_exit r5, irq = 1			@ return from exception
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:235 @ svc_preempt:
 1:	bl	preempt_schedule_irq		@ irq en/disable is done inside
 	ldr	r0, [tsk, #TI_FLAGS]		@ get new tasks TI_FLAGS
 	tst	r0, #_TIF_NEED_RESCHED
+	bne	1b
+	tst	r0, #_TIF_NEED_RESCHED_LAZY
 	reteq	r8				@ go again
-	b	1b
+	ldr	r0, [tsk, #TI_PREEMPT_LAZY]	@ get preempt lazy count
+	teq	r0, #0				@ if preempt lazy count != 0
+	beq	1b
+	ret	r8				@ go again
+
 #endif
 
 __und_fault:
Index: linux-5.15/arch/arm/kernel/signal.c
===================================================================
--- linux-5.15.orig/arch/arm/kernel/signal.c
+++ linux-5.15/arch/arm/kernel/signal.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:610 @ do_work_pending(struct pt_regs *regs, un
 	 */
 	trace_hardirqs_off();
 	do {
-		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+		if (likely(thread_flags & (_TIF_NEED_RESCHED |
+					   _TIF_NEED_RESCHED_LAZY))) {
 			schedule();
 		} else {
 			if (unlikely(!user_mode(regs)))
Index: linux-5.15/arch/arm/mm/fault.c
===================================================================
--- linux-5.15.orig/arch/arm/mm/fault.c
+++ linux-5.15/arch/arm/mm/fault.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:403 @ do_translation_fault(unsigned long addr,
 	if (addr < TASK_SIZE)
 		return do_page_fault(addr, fsr, regs);
 
+	if (interrupts_enabled(regs))
+		local_irq_enable();
+
 	if (user_mode(regs))
 		goto bad_area;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:476 @ do_translation_fault(unsigned long addr,
 static int
 do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 {
+	if (interrupts_enabled(regs))
+		local_irq_enable();
+
 	do_bad_area(addr, fsr, regs);
 	return 0;
 }
Index: linux-5.15/arch/arm64/Kconfig
===================================================================
--- linux-5.15.orig/arch/arm64/Kconfig
+++ linux-5.15/arch/arm64/Kconfig
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:91 @ config ARM64
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
 	select ARCH_SUPPORTS_NUMA_BALANCING
+	select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
 	select ARCH_WANT_DEFAULT_BPF_JIT
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:195 @ config ARM64
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_PREEMPT_LAZY
 	select HAVE_FUNCTION_ARG_ACCESS_API
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select MMU_GATHER_RCU_TABLE_FREE
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:217 @ config ARM64
 	select PCI_DOMAINS_GENERIC if PCI
 	select PCI_ECAM if (ACPI && PCI)
 	select PCI_SYSCALL if PCI
+	select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
 	select POWER_RESET
 	select POWER_SUPPLY
 	select SPARSE_IRQ
Index: linux-5.15/arch/arm64/include/asm/pgtable.h
===================================================================
--- linux-5.15.orig/arch/arm64/include/asm/pgtable.h
+++ linux-5.15/arch/arm64/include/asm/pgtable.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1004 @ static inline void update_mmu_cache(stru
  */
 static inline bool arch_faults_on_old_pte(void)
 {
-	WARN_ON(preemptible());
+	WARN_ON(is_migratable());
 
 	return !cpu_has_hw_af();
 }
Index: linux-5.15/arch/arm64/include/asm/preempt.h
===================================================================
--- linux-5.15.orig/arch/arm64/include/asm/preempt.h
+++ linux-5.15/arch/arm64/include/asm/preempt.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:73 @ static inline bool __preempt_count_dec_a
 	 * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
 	 * pair.
 	 */
-	return !pc || !READ_ONCE(ti->preempt_count);
+	if (!pc || !READ_ONCE(ti->preempt_count))
+		return true;
+#ifdef CONFIG_PREEMPT_LAZY
+	if ((pc & ~PREEMPT_NEED_RESCHED))
+		return false;
+	if (current_thread_info()->preempt_lazy_count)
+		return false;
+	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+	return false;
+#endif
 }
 
 static inline bool should_resched(int preempt_offset)
 {
+#ifdef CONFIG_PREEMPT_LAZY
+	u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+	if (pc == preempt_offset)
+		return true;
+
+	if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset)
+		return false;
+
+	if (current_thread_info()->preempt_lazy_count)
+		return false;
+	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
 	u64 pc = READ_ONCE(current_thread_info()->preempt_count);
 	return pc == preempt_offset;
+#endif
 }
 
 #ifdef CONFIG_PREEMPTION
Index: linux-5.15/arch/arm64/include/asm/signal.h
===================================================================
--- linux-5.15.orig/arch/arm64/include/asm/signal.h
+++ linux-5.15/arch/arm64/include/asm/signal.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:25 @ static inline void __user *arch_untagged
 }
 #define arch_untagged_si_addr arch_untagged_si_addr
 
+#if defined(CONFIG_PREEMPT_RT)
+#define ARCH_RT_DELAYS_SIGNAL_SEND
+#endif
+
 #endif
Index: linux-5.15/arch/arm64/include/asm/spinlock_types.h
===================================================================
--- linux-5.15.orig/arch/arm64/include/asm/spinlock_types.h
+++ linux-5.15/arch/arm64/include/asm/spinlock_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:8 @
 #ifndef __ASM_SPINLOCK_TYPES_H
 #define __ASM_SPINLOCK_TYPES_H
 
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
+#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H)
 # error "please don't include this file directly"
 #endif
 
Index: linux-5.15/arch/arm64/include/asm/thread_info.h
===================================================================
--- linux-5.15.orig/arch/arm64/include/asm/thread_info.h
+++ linux-5.15/arch/arm64/include/asm/thread_info.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:29 @ struct thread_info {
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
 	u64			ttbr0;		/* saved TTBR0_EL1 */
 #endif
+	int			preempt_lazy_count;	/* 0 => preemptable, <0 => bug */
 	union {
 		u64		preempt_count;	/* 0 => preemptible, <0 => bug */
 		struct {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:71 @ int arch_dup_task_struct(struct task_str
 #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
 #define TIF_MTE_ASYNC_FAULT	5	/* MTE Asynchronous Tag Check Fault */
 #define TIF_NOTIFY_SIGNAL	6	/* signal notifications exist */
+#define TIF_NEED_RESCHED_LAZY	7
 #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
 #define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
 #define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:102 @ int arch_dup_task_struct(struct task_str
 #define _TIF_SVE		(1 << TIF_SVE)
 #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
 #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
 
-#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
+				 _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
 				 _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
 				 _TIF_NOTIFY_SIGNAL)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:114 @ int arch_dup_task_struct(struct task_str
 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
 				 _TIF_SYSCALL_EMU)
 
+#define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+
 #ifdef CONFIG_SHADOW_CALL_STACK
 #define INIT_SCS							\
 	.scs_base	= init_shadow_call_stack,			\
Index: linux-5.15/arch/arm64/kernel/asm-offsets.c
===================================================================
--- linux-5.15.orig/arch/arm64/kernel/asm-offsets.c
+++ linux-5.15/arch/arm64/kernel/asm-offsets.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:34 @ int main(void)
   BLANK();
   DEFINE(TSK_TI_FLAGS,		offsetof(struct task_struct, thread_info.flags));
   DEFINE(TSK_TI_PREEMPT,	offsetof(struct task_struct, thread_info.preempt_count));
+  DEFINE(TSK_TI_PREEMPT_LAZY,	offsetof(struct task_struct, thread_info.preempt_lazy_count));
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
   DEFINE(TSK_TI_TTBR0,		offsetof(struct task_struct, thread_info.ttbr0));
 #endif
Index: linux-5.15/arch/arm64/kernel/fpsimd.c
===================================================================
--- linux-5.15.orig/arch/arm64/kernel/fpsimd.c
+++ linux-5.15/arch/arm64/kernel/fpsimd.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:182 @ static void __get_cpu_fpsimd_context(voi
  *
  * The double-underscore version must only be called if you know the task
  * can't be preempted.
+ *
+ * On RT kernels local_bh_disable() is not sufficient because it only
+ * serializes soft interrupt related sections via a local lock, but stays
+ * preemptible. Disabling preemption is the right choice here as bottom
+ * half processing is always in thread context on RT kernels so it
+ * implicitly prevents bottom half processing as well.
  */
 static void get_cpu_fpsimd_context(void)
 {
-	local_bh_disable();
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		local_bh_disable();
+	else
+		preempt_disable();
 	__get_cpu_fpsimd_context();
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:215 @ static void __put_cpu_fpsimd_context(voi
 static void put_cpu_fpsimd_context(void)
 {
 	__put_cpu_fpsimd_context();
-	local_bh_enable();
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		local_bh_enable();
+	else
+		preempt_enable();
 }
 
 static bool have_cpu_fpsimd_context(void)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1048 @ void fpsimd_thread_switch(struct task_st
 void fpsimd_flush_thread(void)
 {
 	int vl, supported_vl;
+	void *sve_state = NULL;
 
 	if (!system_supports_fpsimd())
 		return;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1061 @ void fpsimd_flush_thread(void)
 
 	if (system_supports_sve()) {
 		clear_thread_flag(TIF_SVE);
-		sve_free(current);
+
+		/* Defer kfree() while in atomic context */
+		sve_state = current->thread.sve_state;
+		current->thread.sve_state = NULL;
 
 		/*
 		 * Reset the task vector length as required.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1098 @ void fpsimd_flush_thread(void)
 	}
 
 	put_cpu_fpsimd_context();
+	kfree(sve_state);
 }
 
 /*
Index: linux-5.15/arch/arm64/kernel/signal.c
===================================================================
--- linux-5.15.orig/arch/arm64/kernel/signal.c
+++ linux-5.15/arch/arm64/kernel/signal.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:925 @ static void do_signal(struct pt_regs *re
 void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
 {
 	do {
-		if (thread_flags & _TIF_NEED_RESCHED) {
+		if (thread_flags & _TIF_NEED_RESCHED_MASK) {
 			/* Unmask Debug and SError for the next task */
 			local_daif_restore(DAIF_PROCCTX_NOIRQ);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:933 @ void do_notify_resume(struct pt_regs *re
 		} else {
 			local_daif_restore(DAIF_PROCCTX);
 
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+			if (unlikely(current->forced_info.si_signo)) {
+				struct task_struct *t = current;
+				force_sig_info(&t->forced_info);
+				t->forced_info.si_signo = 0;
+			}
+#endif
+
 			if (thread_flags & _TIF_UPROBE)
 				uprobe_notify_resume(regs);
 
Index: linux-5.15/arch/arm64/kvm/arm.c
===================================================================
--- linux-5.15.orig/arch/arm64/kvm/arm.c
+++ linux-5.15/arch/arm64/kvm/arm.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:831 @ int kvm_arch_vcpu_ioctl_run(struct kvm_v
 		 * involves poking the GIC, which must be done in a
 		 * non-preemptible context.
 		 */
-		preempt_disable();
+		migrate_disable();
 
 		kvm_pmu_flush_hwstate(vcpu);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:855 @ int kvm_arch_vcpu_ioctl_run(struct kvm_v
 				kvm_timer_sync_user(vcpu);
 			kvm_vgic_sync_hwstate(vcpu);
 			local_irq_enable();
-			preempt_enable();
+			migrate_enable();
 			continue;
 		}
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:924 @ int kvm_arch_vcpu_ioctl_run(struct kvm_v
 		/* Exit types that need handling before we can be preempted */
 		handle_exit_early(vcpu, ret);
 
-		preempt_enable();
+		migrate_enable();
 
 		/*
 		 * The ARMv8 architecture doesn't give the hypervisor
Index: linux-5.15/arch/csky/include/asm/spinlock_types.h
===================================================================
--- linux-5.15.orig/arch/csky/include/asm/spinlock_types.h
+++ linux-5.15/arch/csky/include/asm/spinlock_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:6 @
 #ifndef __ASM_CSKY_SPINLOCK_TYPES_H
 #define __ASM_CSKY_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
 # error "please don't include this file directly"
 #endif
 
Index: linux-5.15/arch/hexagon/include/asm/spinlock_types.h
===================================================================
--- linux-5.15.orig/arch/hexagon/include/asm/spinlock_types.h
+++ linux-5.15/arch/hexagon/include/asm/spinlock_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:11 @
 #ifndef _ASM_SPINLOCK_TYPES_H
 #define _ASM_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
 # error "please don't include this file directly"
 #endif
 
Index: linux-5.15/arch/ia64/include/asm/spinlock_types.h
===================================================================
--- linux-5.15.orig/arch/ia64/include/asm/spinlock_types.h
+++ linux-5.15/arch/ia64/include/asm/spinlock_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5 @
 #ifndef _ASM_IA64_SPINLOCK_TYPES_H
 #define _ASM_IA64_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
 # error "please don't include this file directly"
 #endif
 
Index: linux-5.15/arch/powerpc/Kconfig
===================================================================
--- linux-5.15.orig/arch/powerpc/Kconfig
+++ linux-5.15/arch/powerpc/Kconfig
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:154 @ config PPC
 	select ARCH_STACKWALK
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC	if PPC_BOOK3S || PPC_8xx || 40x
+	select ARCH_SUPPORTS_RT			if HAVE_POSIX_CPU_TIMERS_TASK_WORK
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
 	select ARCH_USE_MEMTEST
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:222 @ config PPC
 	select HAVE_HW_BREAKPOINT		if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
 	select HAVE_IOREMAP_PROT
 	select HAVE_IRQ_TIME_ACCOUNTING
+	select HAVE_POSIX_CPU_TIMERS_TASK_WORK	if !KVM
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZMA			if DEFAULT_UIMAGE
 	select HAVE_KERNEL_LZO			if DEFAULT_UIMAGE
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:239 @ config PPC
 	select HAVE_PERF_EVENTS_NMI		if PPC64
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_PREEMPT_LAZY
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE
 	select HAVE_RSEQ
Index: linux-5.15/arch/powerpc/include/asm/simple_spinlock_types.h
===================================================================
--- linux-5.15.orig/arch/powerpc/include/asm/simple_spinlock_types.h
+++ linux-5.15/arch/powerpc/include/asm/simple_spinlock_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5 @
 #ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
 #define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
 # error "please don't include this file directly"
 #endif
 
Index: linux-5.15/arch/powerpc/include/asm/smp.h
===================================================================
--- linux-5.15.orig/arch/powerpc/include/asm/smp.h
+++ linux-5.15/arch/powerpc/include/asm/smp.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:65 @ struct smp_ops_t {
 
 extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
 extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
+extern void smp_send_debugger_break_cpu(unsigned int cpu);
 extern void smp_send_debugger_break(void);
 extern void start_secondary_resume(void);
 extern void smp_generic_give_timebase(void);
Index: linux-5.15/arch/powerpc/include/asm/spinlock_types.h
===================================================================
--- linux-5.15.orig/arch/powerpc/include/asm/spinlock_types.h
+++ linux-5.15/arch/powerpc/include/asm/spinlock_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5 @
 #ifndef _ASM_POWERPC_SPINLOCK_TYPES_H
 #define _ASM_POWERPC_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
 # error "please don't include this file directly"
 #endif
 
Index: linux-5.15/arch/powerpc/include/asm/stackprotector.h
===================================================================
--- linux-5.15.orig/arch/powerpc/include/asm/stackprotector.h
+++ linux-5.15/arch/powerpc/include/asm/stackprotector.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:27 @ static __always_inline void boot_init_st
 	unsigned long canary;
 
 	/* Try to get a semi random initial value. */
+#ifdef CONFIG_PREEMPT_RT
+	canary = (unsigned long)&canary;
+#else
 	canary = get_random_canary();
+#endif
 	canary ^= mftb();
 	canary ^= LINUX_VERSION_CODE;
 	canary &= CANARY_MASK;
Index: linux-5.15/arch/powerpc/include/asm/thread_info.h
===================================================================
--- linux-5.15.orig/arch/powerpc/include/asm/thread_info.h
+++ linux-5.15/arch/powerpc/include/asm/thread_info.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:56 @
 struct thread_info {
 	int		preempt_count;		/* 0 => preemptable,
 						   <0 => BUG */
+	int             preempt_lazy_count;	/* 0 => preemptable,
+						   <0 => BUG */
 	unsigned long	local_flags;		/* private flags for thread */
 #ifdef CONFIG_LIVEPATCH
 	unsigned long *livepatch_sp;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:104 @ void arch_setup_new_exec(void);
 #define TIF_PATCH_PENDING	6	/* pending live patching update */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SINGLESTEP		8	/* singlestepping active */
+#define TIF_NEED_RESCHED_LAZY	9	/* lazy rescheduling necessary */
 #define TIF_SECCOMP		10	/* secure computing */
 #define TIF_RESTOREALL		11	/* Restore all regs (implies NOERROR) */
 #define TIF_NOERROR		12	/* Force successful syscall return */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:120 @ void arch_setup_new_exec(void);
 #define TIF_POLLING_NRFLAG	19	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_32BIT		20	/* 32 bit binary */
 
+
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:132 @ void arch_setup_new_exec(void);
 #define _TIF_PATCH_PENDING	(1<<TIF_PATCH_PENDING)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
+#define _TIF_NEED_RESCHED_LAZY	(1<<TIF_NEED_RESCHED_LAZY)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
 #define _TIF_RESTOREALL		(1<<TIF_RESTOREALL)
 #define _TIF_NOERROR		(1<<TIF_NOERROR)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:146 @ void arch_setup_new_exec(void);
 				 _TIF_SYSCALL_EMU)
 
 #define _TIF_USER_WORK_MASK	(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+				 _TIF_NEED_RESCHED_LAZY | \
 				 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
 				 _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
 				 _TIF_NOTIFY_SIGNAL)
 #define _TIF_PERSYSCALL_MASK	(_TIF_RESTOREALL|_TIF_NOERROR)
+#define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
 
 /* Bits in local_flags */
 /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
Index: linux-5.15/arch/powerpc/kernel/interrupt.c
===================================================================
--- linux-5.15.orig/arch/powerpc/kernel/interrupt.c
+++ linux-5.15/arch/powerpc/kernel/interrupt.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:351 @ again:
 	ti_flags = READ_ONCE(current_thread_info()->flags);
 	while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
 		local_irq_enable();
-		if (ti_flags & _TIF_NEED_RESCHED) {
+		if (ti_flags & _TIF_NEED_RESCHED_MASK) {
 			schedule();
 		} else {
 			/*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:557 @ notrace unsigned long interrupt_exit_ker
 		/* Returning to a kernel context with local irqs enabled. */
 		WARN_ON_ONCE(!(regs->msr & MSR_EE));
 again:
-		if (IS_ENABLED(CONFIG_PREEMPT)) {
+		if (IS_ENABLED(CONFIG_PREEMPTION)) {
 			/* Return to preemptible kernel context */
 			if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) {
 				if (preempt_count() == 0)
 					preempt_schedule_irq();
+			} else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) {
+				if ((preempt_count() == 0) &&
+				    (current_thread_info()->preempt_lazy_count == 0))
+					preempt_schedule_irq();
 			}
 		}
 
Index: linux-5.15/arch/powerpc/kernel/irq.c
===================================================================
--- linux-5.15.orig/arch/powerpc/kernel/irq.c
+++ linux-5.15/arch/powerpc/kernel/irq.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:693 @ static inline void check_stack_overflow(
 	}
 }
 
+#ifndef CONFIG_PREEMPT_RT
 static __always_inline void call_do_softirq(const void *sp)
 {
 	/* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:712 @ static __always_inline void call_do_soft
 		   "r11", "r12"
 	);
 }
+#endif
 
 static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
 {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:825 @ void *mcheckirq_ctx[NR_CPUS] __read_most
 void *softirq_ctx[NR_CPUS] __read_mostly;
 void *hardirq_ctx[NR_CPUS] __read_mostly;
 
+#ifndef CONFIG_PREEMPT_RT
 void do_softirq_own_stack(void)
 {
 	call_do_softirq(softirq_ctx[smp_processor_id()]);
 }
+#endif
 
 irq_hw_number_t virq_to_hw(unsigned int virq)
 {
Index: linux-5.15/arch/powerpc/kernel/kgdb.c
===================================================================
--- linux-5.15.orig/arch/powerpc/kernel/kgdb.c
+++ linux-5.15/arch/powerpc/kernel/kgdb.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:123 @ int kgdb_skipexception(int exception, st
 
 static int kgdb_debugger_ipi(struct pt_regs *regs)
 {
-	kgdb_nmicallback(raw_smp_processor_id(), regs);
+	int cpu = raw_smp_processor_id();
+
+	if (!kgdb_roundup_delay(cpu))
+		kgdb_nmicallback(cpu, regs);
 	return 0;
 }
 
 #ifdef CONFIG_SMP
+void kgdb_roundup_cpu(unsigned int cpu)
+{
+	smp_send_debugger_break_cpu(cpu);
+}
+
 void kgdb_roundup_cpus(void)
 {
 	smp_send_debugger_break();
Index: linux-5.15/arch/powerpc/kernel/smp.c
===================================================================
--- linux-5.15.orig/arch/powerpc/kernel/smp.c
+++ linux-5.15/arch/powerpc/kernel/smp.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:593 @ static void debugger_ipi_callback(struct
 	debugger_ipi(regs);
 }
 
+void smp_send_debugger_break_cpu(unsigned int cpu)
+{
+	smp_send_nmi_ipi(cpu, debugger_ipi_callback, 1000000);
+}
+
 void smp_send_debugger_break(void)
 {
 	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
Index: linux-5.15/arch/powerpc/kernel/traps.c
===================================================================
--- linux-5.15.orig/arch/powerpc/kernel/traps.c
+++ linux-5.15/arch/powerpc/kernel/traps.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:263 @ static char *get_mmu_str(void)
 
 static int __die(const char *str, struct pt_regs *regs, long err)
 {
+	const char *pr = "";
+
 	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
 
+	if (IS_ENABLED(CONFIG_PREEMPTION))
+		pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
+
 	printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
 	       IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
 	       PAGE_SIZE / 1024, get_mmu_str(),
-	       IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+	       pr,
 	       IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
 	       IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
 	       debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
Index: linux-5.15/arch/powerpc/kvm/Kconfig
===================================================================
--- linux-5.15.orig/arch/powerpc/kvm/Kconfig
+++ linux-5.15/arch/powerpc/kvm/Kconfig
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:181 @ config KVM_E500MC
 config KVM_MPIC
 	bool "KVM in-kernel MPIC emulation"
 	depends on KVM && E500
+	depends on !PREEMPT_RT
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
Index: linux-5.15/arch/powerpc/platforms/pseries/iommu.c
===================================================================
--- linux-5.15.orig/arch/powerpc/platforms/pseries/iommu.c
+++ linux-5.15/arch/powerpc/platforms/pseries/iommu.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:27 @
 #include <linux/of.h>
 #include <linux/iommu.h>
 #include <linux/rculist.h>
+#include <linux/local_lock.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:199 @ static int tce_build_pSeriesLP(unsigned
 	return ret;
 }
 
-static DEFINE_PER_CPU(__be64 *, tce_page);
+struct tce_page {
+	__be64 * page;
+	local_lock_t lock;
+};
+static DEFINE_PER_CPU(struct tce_page, tce_page) = {
+	.lock = INIT_LOCAL_LOCK(lock),
+};
 
 static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 				     long npages, unsigned long uaddr,
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:228 @ static int tce_buildmulti_pSeriesLP(stru
 		                           direction, attrs);
 	}
 
-	local_irq_save(flags);	/* to protect tcep and the page behind it */
+	/* to protect tcep and the page behind it */
+	local_lock_irqsave(&tce_page.lock, flags);
 
-	tcep = __this_cpu_read(tce_page);
+	tcep = __this_cpu_read(tce_page.page);
 
 	/* This is safe to do since interrupts are off when we're called
 	 * from iommu_alloc{,_sg}()
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:240 @ static int tce_buildmulti_pSeriesLP(stru
 		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
 		/* If allocation fails, fall back to the loop implementation */
 		if (!tcep) {
-			local_irq_restore(flags);
+			local_unlock_irqrestore(&tce_page.lock, flags);
 			return tce_build_pSeriesLP(tbl->it_index, tcenum,
 					tceshift,
 					npages, uaddr, direction, attrs);
 		}
-		__this_cpu_write(tce_page, tcep);
+		__this_cpu_write(tce_page.page, tcep);
 	}
 
 	rpn = __pa(uaddr) >> tceshift;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:275 @ static int tce_buildmulti_pSeriesLP(stru
 		tcenum += limit;
 	} while (npages > 0 && !rc);
 
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&tce_page.lock, flags);
 
 	if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
 		ret = (int)rc;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:451 @ static int tce_setrange_multi_pSeriesLP(
 				DMA_BIDIRECTIONAL, 0);
 	}
 
-	local_irq_disable();	/* to protect tcep and the page behind it */
-	tcep = __this_cpu_read(tce_page);
+	/* to protect tcep and the page behind it */
+	local_lock_irq(&tce_page.lock);
+	tcep = __this_cpu_read(tce_page.page);
 
 	if (!tcep) {
 		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
 		if (!tcep) {
-			local_irq_enable();
+			local_unlock_irq(&tce_page.lock);
 			return -ENOMEM;
 		}
-		__this_cpu_write(tce_page, tcep);
+		__this_cpu_write(tce_page.page, tcep);
 	}
 
 	proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:504 @ static int tce_setrange_multi_pSeriesLP(
 
 	/* error cleanup: caller will clear whole range */
 
-	local_irq_enable();
+	local_unlock_irq(&tce_page.lock);
 	return rc;
 }
 
Index: linux-5.15/arch/riscv/include/asm/spinlock_types.h
===================================================================
--- linux-5.15.orig/arch/riscv/include/asm/spinlock_types.h
+++ linux-5.15/arch/riscv/include/asm/spinlock_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:9 @
 #ifndef _ASM_RISCV_SPINLOCK_TYPES_H
 #define _ASM_RISCV_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
 # error "please don't include this file directly"
 #endif
 
Index: linux-5.15/arch/s390/include/asm/spinlock_types.h
===================================================================
--- linux-5.15.orig/arch/s390/include/asm/spinlock_types.h
+++ linux-5.15/arch/s390/include/asm/spinlock_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5 @
 #ifndef __ASM_SPINLOCK_TYPES_H
 #define __ASM_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
 # error "please don't include this file directly"
 #endif
 
Index: linux-5.15/arch/sh/include/asm/spinlock_types.h
===================================================================
--- linux-5.15.orig/arch/sh/include/asm/spinlock_types.h
+++ linux-5.15/arch/sh/include/asm/spinlock_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5 @
 #ifndef __ASM_SH_SPINLOCK_TYPES_H
 #define __ASM_SH_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
 # error "please don't include this file directly"
 #endif
 
Index: linux-5.15/arch/sh/kernel/irq.c
===================================================================
--- linux-5.15.orig/arch/sh/kernel/irq.c
+++ linux-5.15/arch/sh/kernel/irq.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:152 @ void irq_ctx_exit(int cpu)
 	hardirq_ctx[cpu] = NULL;
 }
 
+#ifndef CONFIG_PREEMPT_RT
 void do_softirq_own_stack(void)
 {
 	struct thread_info *curctx;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:180 @ void do_softirq_own_stack(void)
 		  "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
 	);
 }
+#endif
 #else
 static inline void handle_one_irq(unsigned int irq)
 {
Index: linux-5.15/arch/sparc/kernel/irq_64.c
===================================================================
--- linux-5.15.orig/arch/sparc/kernel/irq_64.c
+++ linux-5.15/arch/sparc/kernel/irq_64.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:858 @ void __irq_entry handler_irq(int pil, st
 	set_irq_regs(old_regs);
 }
 
+#ifndef CONFIG_PREEMPT_RT
 void do_softirq_own_stack(void)
 {
 	void *orig_sp, *sp = softirq_stack[smp_processor_id()];
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:873 @ void do_softirq_own_stack(void)
 	__asm__ __volatile__("mov %0, %%sp"
 			     : : "r" (orig_sp));
 }
+#endif
 
 #ifdef CONFIG_HOTPLUG_CPU
 void fixup_irqs(void)
Index: linux-5.15/arch/x86/Kconfig
===================================================================
--- linux-5.15.orig/arch/x86/Kconfig
+++ linux-5.15/arch/x86/Kconfig
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:110 @ config X86
 	select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP	if NR_CPUS <= 4096
 	select ARCH_SUPPORTS_LTO_CLANG
 	select ARCH_SUPPORTS_LTO_CLANG_THIN
+	select ARCH_SUPPORTS_RT
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_MEMTEST
 	select ARCH_USE_QUEUED_RWLOCKS
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:235 @ config X86
 	select HAVE_PCI
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_PREEMPT_LAZY
 	select MMU_GATHER_RCU_TABLE_FREE		if PARAVIRT
 	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
 	select HAVE_REGS_AND_STACK_ACCESS_API
Index: linux-5.15/arch/x86/include/asm/irq_stack.h
===================================================================
--- linux-5.15.orig/arch/x86/include/asm/irq_stack.h
+++ linux-5.15/arch/x86/include/asm/irq_stack.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:205 @
 			      IRQ_CONSTRAINTS, regs, vector);		\
 }
 
+#ifndef CONFIG_PREEMPT_RT
 /*
  * Macro to invoke __do_softirq on the irq stack. This is only called from
  * task context when bottom halves are about to be reenabled and soft
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:219 @
 	__this_cpu_write(hardirq_stack_inuse, false);			\
 }
 
+#endif
+
 #else /* CONFIG_X86_64 */
 /* System vector handlers always run on the stack they interrupted. */
 #define run_sysvec_on_irqstack_cond(func, regs)				\
Index: linux-5.15/arch/x86/include/asm/preempt.h
===================================================================
--- linux-5.15.orig/arch/x86/include/asm/preempt.h
+++ linux-5.15/arch/x86/include/asm/preempt.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:93 @ static __always_inline void __preempt_co
  * a decrement which hits zero means we have no preempt_count and should
  * reschedule.
  */
-static __always_inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool ____preempt_count_dec_and_test(void)
 {
 	return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
 }
 
+static __always_inline bool __preempt_count_dec_and_test(void)
+{
+	if (____preempt_count_dec_and_test())
+		return true;
+#ifdef CONFIG_PREEMPT_LAZY
+	if (preempt_count())
+		return false;
+	if (current_thread_info()->preempt_lazy_count)
+		return false;
+	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+	return false;
+#endif
+}
+
 /*
  * Returns true when we need to resched and can (barring IRQ state).
  */
 static __always_inline bool should_resched(int preempt_offset)
 {
+#ifdef CONFIG_PREEMPT_LAZY
+	u32 tmp;
+	tmp = raw_cpu_read_4(__preempt_count);
+	if (tmp == preempt_offset)
+		return true;
+
+	/* preempt count == 0 ? */
+	tmp &= ~PREEMPT_NEED_RESCHED;
+	if (tmp != preempt_offset)
+		return false;
+	/* XXX PREEMPT_LOCK_OFFSET */
+	if (current_thread_info()->preempt_lazy_count)
+		return false;
+	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
 	return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
+#endif
 }
 
 #ifdef CONFIG_PREEMPTION
Index: linux-5.15/arch/x86/include/asm/signal.h
===================================================================
--- linux-5.15.orig/arch/x86/include/asm/signal.h
+++ linux-5.15/arch/x86/include/asm/signal.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:31 @ typedef struct {
 #define SA_IA32_ABI	0x02000000u
 #define SA_X32_ABI	0x01000000u
 
+/*
+ * Because some traps use the IST stack, we must keep preemption
+ * disabled while calling do_trap(), but do_trap() may call
+ * force_sig_info() which will grab the signal spin_locks for the
+ * task, which in PREEMPT_RT are mutexes.  By defining
+ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
+ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
+ * trap.
+ */
+#if defined(CONFIG_PREEMPT_RT)
+#define ARCH_RT_DELAYS_SIGNAL_SEND
+#endif
+
 #ifndef CONFIG_COMPAT
 #define compat_sigset_t compat_sigset_t
 typedef sigset_t compat_sigset_t;
Index: linux-5.15/arch/x86/include/asm/stackprotector.h
===================================================================
--- linux-5.15.orig/arch/x86/include/asm/stackprotector.h
+++ linux-5.15/arch/x86/include/asm/stackprotector.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:53 @
  */
 static __always_inline void boot_init_stack_canary(void)
 {
-	u64 canary;
+	u64 canary = 0;
 	u64 tsc;
 
 #ifdef CONFIG_X86_64
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:64 @ static __always_inline void boot_init_st
 	 * of randomness. The TSC only matters for very early init,
 	 * there it already has some randomness on most systems. Later
 	 * on during the bootup the random pool has true entropy too.
+	 * For preempt-rt we need to weaken the randomness a bit, as
+	 * we can't call into the random generator from atomic context
+	 * due to locking constraints. We just leave canary
+	 * uninitialized and use the TSC based randomness on top of it.
 	 */
+#ifndef CONFIG_PREEMPT_RT
 	get_random_bytes(&canary, sizeof(canary));
+#endif
 	tsc = rdtsc();
 	canary += tsc + (tsc << 32UL);
 	canary &= CANARY_MASK;
Index: linux-5.15/arch/x86/include/asm/thread_info.h
===================================================================
--- linux-5.15.orig/arch/x86/include/asm/thread_info.h
+++ linux-5.15/arch/x86/include/asm/thread_info.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:60 @ struct thread_info {
 	unsigned long		flags;		/* low level flags */
 	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
 	u32			status;		/* thread synchronous flags */
+	int			preempt_lazy_count;	/* 0 => lazy preemptable
+							   <0 => BUG */
 };
 
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.flags		= 0,			\
+	.preempt_lazy_count	= 0,		\
 }
 
 #else /* !__ASSEMBLY__ */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:96 @ struct thread_info {
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_NOTIFY_SIGNAL	17	/* signal notifications exist */
 #define TIF_SLD			18	/* Restore split lock detection on context switch */
+#define TIF_NEED_RESCHED_LAZY	19	/* lazy rescheduling necessary */
 #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
 #define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:121 @ struct thread_info {
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
 #define _TIF_SLD		(1 << TIF_SLD)
+#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
 #define _TIF_SPEC_FORCE_UPDATE	(1 << TIF_SPEC_FORCE_UPDATE)
Index: linux-5.15/arch/x86/kernel/irq_32.c
===================================================================
--- linux-5.15.orig/arch/x86/kernel/irq_32.c
+++ linux-5.15/arch/x86/kernel/irq_32.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:135 @ int irq_init_percpu_irqstack(unsigned in
 	return 0;
 }
 
+#ifndef CONFIG_PREEMPT_RT
 void do_softirq_own_stack(void)
 {
 	struct irq_stack *irqstk;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:152 @ void do_softirq_own_stack(void)
 
 	call_on_stack(__do_softirq, isp);
 }
+#endif
 
 void __handle_irq(struct irq_desc *desc, struct pt_regs *regs)
 {
Index: linux-5.15/arch/x86/kernel/kgdb.c
===================================================================
--- linux-5.15.orig/arch/x86/kernel/kgdb.c
+++ linux-5.15/arch/x86/kernel/kgdb.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:505 @ static int kgdb_nmi_handler(unsigned int
 		if (atomic_read(&kgdb_active) != -1) {
 			/* KGDB CPU roundup */
 			cpu = raw_smp_processor_id();
-			kgdb_nmicallback(cpu, regs);
-			set_bit(cpu, was_in_debug_nmi);
-			touch_nmi_watchdog();
+
+			if (!kgdb_roundup_delay(cpu)) {
+				kgdb_nmicallback(cpu, regs);
+				set_bit(cpu, was_in_debug_nmi);
+				touch_nmi_watchdog();
+			}
 
 			return NMI_HANDLED;
 		}
Index: linux-5.15/arch/x86/kvm/x86.c
===================================================================
--- linux-5.15.orig/arch/x86/kvm/x86.c
+++ linux-5.15/arch/x86/kvm/x86.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:8572 @ int kvm_arch_init(void *opaque)
 		goto out;
 	}
 
+#ifdef CONFIG_PREEMPT_RT
+	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+		pr_err("RT requires X86_FEATURE_CONSTANT_TSC\n");
+		r = -EOPNOTSUPP;
+		goto out;
+	}
+#endif
+
 	r = -ENOMEM;
 
 	x86_emulator_cache = kvm_alloc_emulator_cache();
Index: linux-5.15/arch/xtensa/include/asm/spinlock_types.h
===================================================================
--- linux-5.15.orig/arch/xtensa/include/asm/spinlock_types.h
+++ linux-5.15/arch/xtensa/include/asm/spinlock_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5 @
 #ifndef __ASM_SPINLOCK_TYPES_H
 #define __ASM_SPINLOCK_TYPES_H
 
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
+#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H)
 # error "please don't include this file directly"
 #endif
 
Index: linux-5.15/block/blk-mq.c
===================================================================
--- linux-5.15.orig/block/blk-mq.c
+++ linux-5.15/block/blk-mq.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1570 @ static void __blk_mq_delay_run_hw_queue(
 		return;
 
 	if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
-		int cpu = get_cpu();
+		int cpu = get_cpu_light();
 		if (cpumask_test_cpu(cpu, hctx->cpumask)) {
 			__blk_mq_run_hw_queue(hctx);
-			put_cpu();
+			put_cpu_light();
 			return;
 		}
 
-		put_cpu();
+		put_cpu_light();
 	}
 
 	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
Index: linux-5.15/crypto/testmgr.c
===================================================================
--- linux-5.15.orig/crypto/testmgr.c
+++ linux-5.15/crypto/testmgr.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1064 @ static void generate_random_testvec_conf
 
 static void crypto_disable_simd_for_test(void)
 {
-	preempt_disable();
+	migrate_disable();
 	__this_cpu_write(crypto_simd_disabled_for_test, true);
 }
 
 static void crypto_reenable_simd_for_test(void)
 {
 	__this_cpu_write(crypto_simd_disabled_for_test, false);
-	preempt_enable();
+	migrate_enable();
 }
 
 /*
Index: linux-5.15/drivers/block/zram/zram_drv.c
===================================================================
--- linux-5.15.orig/drivers/block/zram/zram_drv.c
+++ linux-5.15/drivers/block/zram/zram_drv.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:62 @ static void zram_free_page(struct zram *
 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
 				u32 index, int offset, struct bio *bio);
 
+#ifdef CONFIG_PREEMPT_RT
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages)
+{
+	size_t index;
+
+	for (index = 0; index < num_pages; index++)
+		spin_lock_init(&zram->table[index].lock);
+}
+
+static int zram_slot_trylock(struct zram *zram, u32 index)
+{
+	int ret;
+
+	ret = spin_trylock(&zram->table[index].lock);
+	if (ret)
+		__set_bit(ZRAM_LOCK, &zram->table[index].flags);
+	return ret;
+}
+
+static void zram_slot_lock(struct zram *zram, u32 index)
+{
+	spin_lock(&zram->table[index].lock);
+	__set_bit(ZRAM_LOCK, &zram->table[index].flags);
+}
+
+static void zram_slot_unlock(struct zram *zram, u32 index)
+{
+	__clear_bit(ZRAM_LOCK, &zram->table[index].flags);
+	spin_unlock(&zram->table[index].lock);
+}
+
+#else
+
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { }
 
 static int zram_slot_trylock(struct zram *zram, u32 index)
 {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:111 @ static void zram_slot_unlock(struct zram
 {
 	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
 }
+#endif
 
 static inline bool init_done(struct zram *zram)
 {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1207 @ static bool zram_meta_alloc(struct zram
 
 	if (!huge_class_size)
 		huge_class_size = zs_huge_class_size(zram->mem_pool);
+	zram_meta_init_table_locks(zram, num_pages);
 	return true;
 }
 
Index: linux-5.15/drivers/block/zram/zram_drv.h
===================================================================
--- linux-5.15.orig/drivers/block/zram/zram_drv.h
+++ linux-5.15/drivers/block/zram/zram_drv.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:66 @ struct zram_table_entry {
 		unsigned long element;
 	};
 	unsigned long flags;
+	spinlock_t lock;
 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
 	ktime_t ac_time;
 #endif
Index: linux-5.15/drivers/char/tpm/tpm_tis.c
===================================================================
--- linux-5.15.orig/drivers/char/tpm/tpm_tis.c
+++ linux-5.15/drivers/char/tpm/tpm_tis.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:53 @ static inline struct tpm_tis_tcg_phy *to
 	return container_of(data, struct tpm_tis_tcg_phy, priv);
 }
 
+#ifdef CONFIG_PREEMPT_RT
+/*
+ * Flushes previous write operations to chip so that a subsequent
+ * ioread*()s won't stall a cpu.
+ */
+static inline void tpm_tis_flush(void __iomem *iobase)
+{
+	ioread8(iobase + TPM_ACCESS(0));
+}
+#else
+#define tpm_tis_flush(iobase) do { } while (0)
+#endif
+
+static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr)
+{
+	iowrite8(b, iobase + addr);
+	tpm_tis_flush(iobase);
+}
+
+static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
+{
+	iowrite32(b, iobase + addr);
+	tpm_tis_flush(iobase);
+}
+
 static int interrupts = -1;
 module_param(interrupts, int, 0444);
 MODULE_PARM_DESC(interrupts, "Enable interrupts");
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:198 @ static int tpm_tcg_write_bytes(struct tp
 	struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
 
 	while (len--)
-		iowrite8(*value++, phy->iobase + addr);
+		tpm_tis_iowrite8(*value++, phy->iobase, addr);
 
 	return 0;
 }
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:225 @ static int tpm_tcg_write32(struct tpm_ti
 {
 	struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
 
-	iowrite32(value, phy->iobase + addr);
+	tpm_tis_iowrite32(value, phy->iobase, addr);
 
 	return 0;
 }
Index: linux-5.15/drivers/firmware/efi/efi.c
===================================================================
--- linux-5.15.orig/drivers/firmware/efi/efi.c
+++ linux-5.15/drivers/firmware/efi/efi.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:70 @ struct mm_struct efi_mm = {
 
 struct workqueue_struct *efi_rts_wq;
 
-static bool disable_runtime;
+static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT);
 static int __init setup_noefi(char *arg)
 {
 	disable_runtime = true;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:101 @ static int __init parse_efi_cmdline(char
 	if (parse_option_str(str, "noruntime"))
 		disable_runtime = true;
 
+	if (parse_option_str(str, "runtime"))
+		disable_runtime = false;
+
 	if (parse_option_str(str, "nosoftreserve"))
 		set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags);
 
Index: linux-5.15/drivers/gpu/drm/i915/display/intel_crtc.c
===================================================================
--- linux-5.15.orig/drivers/gpu/drm/i915/display/intel_crtc.c
+++ linux-5.15/drivers/gpu/drm/i915/display/intel_crtc.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:428 @ void intel_pipe_update_start(const struc
 	 */
 	intel_psr_wait_for_idle(new_crtc_state);
 
-	local_irq_disable();
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		local_irq_disable();
 
 	crtc->debug.min_vbl = min;
 	crtc->debug.max_vbl = max;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:454 @ void intel_pipe_update_start(const struc
 			break;
 		}
 
-		local_irq_enable();
+		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+			local_irq_enable();
 
 		timeout = schedule_timeout(timeout);
 
-		local_irq_disable();
+		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+			local_irq_disable();
 	}
 
 	finish_wait(wq, &wait);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:493 @ void intel_pipe_update_start(const struc
 	return;
 
 irq_disable:
-	local_irq_disable();
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		local_irq_disable();
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:573 @ void intel_pipe_update_end(struct intel_
 		new_crtc_state->uapi.event = NULL;
 	}
 
-	local_irq_enable();
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		local_irq_enable();
 
 	/* Send VRR Push to terminate Vblank */
 	intel_vrr_send_push(new_crtc_state);
Index: linux-5.15/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
===================================================================
--- linux-5.15.orig/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ linux-5.15/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:314 @ void __intel_breadcrumbs_park(struct int
 	/* Kick the work once more to drain the signalers, and disarm the irq */
 	irq_work_sync(&b->irq_work);
 	while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
-		local_irq_disable();
-		signal_irq_work(&b->irq_work);
-		local_irq_enable();
+		irq_work_queue(&b->irq_work);
 		cond_resched();
+		irq_work_sync(&b->irq_work);
 	}
 }
 
Index: linux-5.15/drivers/gpu/drm/i915/gt/intel_context.h
===================================================================
--- linux-5.15.orig/drivers/gpu/drm/i915/gt/intel_context.h
+++ linux-5.15/drivers/gpu/drm/i915/gt/intel_context.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:166 @ static inline void intel_context_enter(s
 
 static inline void intel_context_mark_active(struct intel_context *ce)
 {
-	lockdep_assert_held(&ce->timeline->mutex);
+	lockdep_assert(lockdep_is_held(&ce->timeline->mutex) ||
+		       test_bit(CONTEXT_IS_PARKED, &ce->flags));
 	++ce->active_count;
 }
 
Index: linux-5.15/drivers/gpu/drm/i915/gt/intel_context_types.h
===================================================================
--- linux-5.15.orig/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ linux-5.15/drivers/gpu/drm/i915/gt/intel_context_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:115 @ struct intel_context {
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
 #define CONTEXT_NOPREEMPT		8
 #define CONTEXT_LRCA_DIRTY		9
+#define CONTEXT_IS_PARKED		10
 
 	struct {
 		u64 timeout_us;
Index: linux-5.15/drivers/gpu/drm/i915/gt/intel_engine_pm.c
===================================================================
--- linux-5.15.orig/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ linux-5.15/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:83 @ static int __engine_unpark(struct intel_
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_LOCKDEP)
-
-static unsigned long __timeline_mark_lock(struct intel_context *ce)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
-
-	return flags;
-}
-
-static void __timeline_mark_unlock(struct intel_context *ce,
-				   unsigned long flags)
-{
-	mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
-	local_irq_restore(flags);
-}
-
-#else
-
-static unsigned long __timeline_mark_lock(struct intel_context *ce)
-{
-	return 0;
-}
-
-static void __timeline_mark_unlock(struct intel_context *ce,
-				   unsigned long flags)
-{
-}
-
-#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
-
 static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
 	struct i915_request *rq = to_request(fence);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:129 @ static bool switch_to_kernel_context(str
 {
 	struct intel_context *ce = engine->kernel_context;
 	struct i915_request *rq;
-	unsigned long flags;
 	bool result = true;
 
 	/* GPU is pointing to the void, as good as in the kernel context. */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:170 @ static bool switch_to_kernel_context(str
 	 * engine->wakeref.count, we may see the request completion and retire
 	 * it causing an underflow of the engine->wakeref.
 	 */
-	flags = __timeline_mark_lock(ce);
+	set_bit(CONTEXT_IS_PARKED, &ce->flags);
 	GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
 
 	rq = __i915_request_create(ce, GFP_NOWAIT);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:202 @ static bool switch_to_kernel_context(str
 
 	result = false;
 out_unlock:
-	__timeline_mark_unlock(ce, flags);
+	clear_bit(CONTEXT_IS_PARKED, &ce->flags);
 	return result;
 }
 
Index: linux-5.15/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
===================================================================
--- linux-5.15.orig/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ linux-5.15/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1289 @ static void execlists_dequeue(struct int
 	 * and context switches) submission.
 	 */
 
-	spin_lock(&sched_engine->lock);
+	spin_lock_irq(&sched_engine->lock);
 
 	/*
 	 * If the queue is higher priority than the last
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1389 @ static void execlists_dequeue(struct int
 				 * Even if ELSP[1] is occupied and not worthy
 				 * of timeslices, our queue might be.
 				 */
-				spin_unlock(&sched_engine->lock);
+				spin_unlock_irq(&sched_engine->lock);
 				return;
 			}
 		}
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1415 @ static void execlists_dequeue(struct int
 
 		if (last && !can_merge_rq(last, rq)) {
 			spin_unlock(&ve->base.sched_engine->lock);
-			spin_unlock(&engine->sched_engine->lock);
+			spin_unlock_irq(&engine->sched_engine->lock);
 			return; /* leave this for another sibling */
 		}
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1577 @ done:
 	 */
 	sched_engine->queue_priority_hint = queue_prio(sched_engine);
 	i915_sched_engine_reset_on_empty(sched_engine);
-	spin_unlock(&sched_engine->lock);
+	spin_unlock_irq(&sched_engine->lock);
 
 	/*
 	 * We can skip poking the HW if we ended up with exactly the same set
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1603 @ done:
 	}
 }
 
-static void execlists_dequeue_irq(struct intel_engine_cs *engine)
-{
-	local_irq_disable(); /* Suspend interrupts across request submission */
-	execlists_dequeue(engine);
-	local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
-}
-
 static void clear_ports(struct i915_request **ports, int count)
 {
 	memset_p((void **)ports, NULL, count);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2438 @ static void execlists_submission_tasklet
 	}
 
 	if (!engine->execlists.pending[0]) {
-		execlists_dequeue_irq(engine);
+		execlists_dequeue(engine);
 		start_timeslice(engine);
 	}
 
Index: linux-5.15/drivers/gpu/drm/i915/i915_irq.c
===================================================================
--- linux-5.15.orig/drivers/gpu/drm/i915/i915_irq.c
+++ linux-5.15/drivers/gpu/drm/i915/i915_irq.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:889 @ static bool i915_get_crtc_scanoutpos(str
 	 */
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 
-	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
 
 	/* Get optional system timestamp before query. */
 	if (stime)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:954 @ static bool i915_get_crtc_scanoutpos(str
 	if (etime)
 		*etime = ktime_get();
 
-	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
 
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 
Index: linux-5.15/drivers/gpu/drm/i915/i915_request.c
===================================================================
--- linux-5.15.orig/drivers/gpu/drm/i915/i915_request.c
+++ linux-5.15/drivers/gpu/drm/i915/i915_request.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:562 @ bool __i915_request_submit(struct i915_r
 
 	RQ_TRACE(request, "\n");
 
-	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&engine->sched_engine->lock);
 
 	/*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:670 @ void __i915_request_unsubmit(struct i915
 	 */
 	RQ_TRACE(request, "\n");
 
-	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&engine->sched_engine->lock);
 
 	/*
Index: linux-5.15/drivers/gpu/drm/i915/i915_request.h
===================================================================
--- linux-5.15.orig/drivers/gpu/drm/i915/i915_request.h
+++ linux-5.15/drivers/gpu/drm/i915/i915_request.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:612 @ i915_request_timeline(const struct i915_
 {
 	/* Valid only while the request is being constructed (or retired). */
 	return rcu_dereference_protected(rq->timeline,
-					 lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
+					 lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex) ||
+					 test_bit(CONTEXT_IS_PARKED, &rq->context->flags));
 }
 
 static inline struct i915_gem_context *
Index: linux-5.15/drivers/gpu/drm/i915/i915_trace.h
===================================================================
--- linux-5.15.orig/drivers/gpu/drm/i915/i915_trace.h
+++ linux-5.15/drivers/gpu/drm/i915/i915_trace.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5 @
 #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
 #define _I915_TRACE_H_
 
+#ifdef CONFIG_PREEMPT_RT
+#define NOTRACE
+#endif
+
 #include <linux/stringify.h>
 #include <linux/types.h>
 #include <linux/tracepoint.h>
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:826 @ DEFINE_EVENT(i915_request, i915_request_
 	     TP_ARGS(rq)
 );
 
-#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
+#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE)
 DEFINE_EVENT(i915_request, i915_request_guc_submit,
 	     TP_PROTO(struct i915_request *rq),
 	     TP_ARGS(rq)
Index: linux-5.15/drivers/gpu/drm/i915/i915_utils.h
===================================================================
--- linux-5.15.orig/drivers/gpu/drm/i915/i915_utils.h
+++ linux-5.15/drivers/gpu/drm/i915/i915_utils.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:346 @ wait_remaining_ms_from_jiffies(unsigned
 #define wait_for(COND, MS)		_wait_for((COND), (MS) * 1000, 10, 1000)
 
 /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
+#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
 # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
 #else
 # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
Index: linux-5.15/drivers/i2c/busses/i2c-cht-wc.c
===================================================================
--- linux-5.15.orig/drivers/i2c/busses/i2c-cht-wc.c
+++ linux-5.15/drivers/i2c/busses/i2c-cht-wc.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:102 @ static irqreturn_t cht_wc_i2c_adap_threa
 	 * interrupt handler as well, so running the client irq handler from
 	 * this thread will cause things to lock up.
 	 */
-	if (reg & CHT_WC_EXTCHGRIRQ_CLIENT_IRQ) {
-		/*
-		 * generic_handle_irq expects local IRQs to be disabled
-		 * as normally it is called from interrupt context.
-		 */
-		local_irq_disable();
-		generic_handle_irq(adap->client_irq);
-		local_irq_enable();
-	}
+	if (reg & CHT_WC_EXTCHGRIRQ_CLIENT_IRQ)
+		generic_handle_irq_safe(adap->client_irq);
 
 	return IRQ_HANDLED;
 }
Index: linux-5.15/drivers/i2c/i2c-core-base.c
===================================================================
--- linux-5.15.orig/drivers/i2c/i2c-core-base.c
+++ linux-5.15/drivers/i2c/i2c-core-base.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1425 @ int i2c_handle_smbus_host_notify(struct
 	if (irq <= 0)
 		return -ENXIO;
 
-	generic_handle_irq(irq);
+	generic_handle_irq_safe(irq);
 
 	return 0;
 }
Index: linux-5.15/drivers/leds/trigger/Kconfig
===================================================================
--- linux-5.15.orig/drivers/leds/trigger/Kconfig
+++ linux-5.15/drivers/leds/trigger/Kconfig
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:67 @ config LEDS_TRIGGER_BACKLIGHT
 
 config LEDS_TRIGGER_CPU
 	bool "LED CPU Trigger"
+	depends on !PREEMPT_RT
 	help
 	  This allows LEDs to be controlled by active CPUs. This shows
 	  the active CPUs across an array of LEDs so you can see which
Index: linux-5.15/drivers/md/raid5.c
===================================================================
--- linux-5.15.orig/drivers/md/raid5.c
+++ linux-5.15/drivers/md/raid5.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2221 @ static void raid_run_ops(struct stripe_h
 	struct raid5_percpu *percpu;
 	unsigned long cpu;
 
-	cpu = get_cpu();
+	cpu = get_cpu_light();
 	percpu = per_cpu_ptr(conf->percpu, cpu);
+	spin_lock(&percpu->lock);
 	if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
 		ops_run_biofill(sh);
 		overlap_clear++;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2282 @ static void raid_run_ops(struct stripe_h
 			if (test_and_clear_bit(R5_Overlap, &dev->flags))
 				wake_up(&sh->raid_conf->wait_for_overlap);
 		}
-	put_cpu();
+	spin_unlock(&percpu->lock);
+	put_cpu_light();
 }
 
 static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:7115 @ static int raid456_cpu_up_prepare(unsign
 			__func__, cpu);
 		return -ENOMEM;
 	}
+	spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
 	return 0;
 }
 
Index: linux-5.15/drivers/md/raid5.h
===================================================================
--- linux-5.15.orig/drivers/md/raid5.h
+++ linux-5.15/drivers/md/raid5.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:638 @ struct r5conf {
 	int			recovery_disabled;
 	/* per cpu variables */
 	struct raid5_percpu {
+		spinlock_t	lock;		/* Protection for -RT */
 		struct page	*spare_page; /* Used when checking P/Q in raid6 */
 		void		*scribble;  /* space for constructing buffer
 					     * lists and performing address
Index: linux-5.15/drivers/mfd/ezx-pcap.c
===================================================================
--- linux-5.15.orig/drivers/mfd/ezx-pcap.c
+++ linux-5.15/drivers/mfd/ezx-pcap.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:196 @ static void pcap_isr_work(struct work_st
 		ezx_pcap_write(pcap, PCAP_REG_MSR, isr | msr);
 		ezx_pcap_write(pcap, PCAP_REG_ISR, isr);
 
-		local_irq_disable();
 		service = isr & ~msr;
 		for (irq = pcap->irq_base; service; service >>= 1, irq++) {
 			if (service & 1)
-				generic_handle_irq(irq);
+				generic_handle_irq_safe(irq);
 		}
-		local_irq_enable();
 		ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr);
 	} while (gpio_get_value(pdata->gpio));
 }
Index: linux-5.15/drivers/misc/hi6421v600-irq.c
===================================================================
--- linux-5.15.orig/drivers/misc/hi6421v600-irq.c
+++ linux-5.15/drivers/misc/hi6421v600-irq.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:121 @ static irqreturn_t hi6421v600_irq_handle
 			 * If both powerkey down and up IRQs are received,
 			 * handle them at the right order
 			 */
-			generic_handle_irq(priv->irqs[POWERKEY_DOWN]);
-			generic_handle_irq(priv->irqs[POWERKEY_UP]);
+			generic_handle_irq_safe(priv->irqs[POWERKEY_DOWN]);
+			generic_handle_irq_safe(priv->irqs[POWERKEY_UP]);
 			pending &= ~HISI_IRQ_POWERKEY_UP_DOWN;
 		}
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:130 @ static irqreturn_t hi6421v600_irq_handle
 			continue;
 
 		for_each_set_bit(offset, &pending, BITS_PER_BYTE) {
-			generic_handle_irq(priv->irqs[offset + i * BITS_PER_BYTE]);
+			generic_handle_irq_safe(priv->irqs[offset + i * BITS_PER_BYTE]);
 		}
 	}
 
Index: linux-5.15/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
===================================================================
--- linux-5.15.orig/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
+++ linux-5.15/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:461 @ nfp_abm_qdisc_graft(struct nfp_abm_link
 static void
 nfp_abm_stats_calculate(struct nfp_alink_stats *new,
 			struct nfp_alink_stats *old,
-			struct gnet_stats_basic_packed *bstats,
+			struct gnet_stats_basic_sync *bstats,
 			struct gnet_stats_queue *qstats)
 {
 	_bstats_update(bstats, new->tx_bytes - old->tx_bytes,
Index: linux-5.15/drivers/net/usb/lan78xx.c
===================================================================
--- linux-5.15.orig/drivers/net/usb/lan78xx.c
+++ linux-5.15/drivers/net/usb/lan78xx.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1370 @ static void lan78xx_status(struct lan78x
 		netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
 		lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
 
-		if (dev->domain_data.phyirq > 0) {
-			local_irq_disable();
-			generic_handle_irq(dev->domain_data.phyirq);
-			local_irq_enable();
-		}
+		if (dev->domain_data.phyirq > 0)
+			generic_handle_irq_safe(dev->domain_data.phyirq);
 	} else {
 		netdev_warn(dev->net,
 			    "unexpected interrupt: 0x%08x\n", intdata);
Index: linux-5.15/drivers/scsi/fcoe/fcoe.c
===================================================================
--- linux-5.15.orig/drivers/scsi/fcoe/fcoe.c
+++ linux-5.15/drivers/scsi/fcoe/fcoe.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1453 @ err2:
 static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
 {
 	struct fcoe_percpu_s *fps;
-	int rc;
+	int rc, cpu = get_cpu_light();
 
-	fps = &get_cpu_var(fcoe_percpu);
+	fps = &per_cpu(fcoe_percpu, cpu);
 	rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
-	put_cpu_var(fcoe_percpu);
+	put_cpu_light();
 
 	return rc;
 }
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1642 @ static inline int fcoe_filter_frames(str
 		return 0;
 	}
 
-	stats = per_cpu_ptr(lport->stats, get_cpu());
+	stats = per_cpu_ptr(lport->stats, get_cpu_light());
 	stats->InvalidCRCCount++;
 	if (stats->InvalidCRCCount < 5)
 		printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
-	put_cpu();
+	put_cpu_light();
 	return -EINVAL;
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1687 @ static void fcoe_recv_frame(struct sk_bu
 	 */
 	hp = (struct fcoe_hdr *) skb_network_header(skb);
 
-	stats = per_cpu_ptr(lport->stats, get_cpu());
+	stats = per_cpu_ptr(lport->stats, get_cpu_light());
 	if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
 		if (stats->ErrorFrames < 5)
 			printk(KERN_WARNING "fcoe: FCoE version "
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1719 @ static void fcoe_recv_frame(struct sk_bu
 		goto drop;
 
 	if (!fcoe_filter_frames(lport, fp)) {
-		put_cpu();
+		put_cpu_light();
 		fc_exch_recv(lport, fp);
 		return;
 	}
 drop:
 	stats->ErrorFrames++;
-	put_cpu();
+	put_cpu_light();
 	kfree_skb(skb);
 }
 
Index: linux-5.15/drivers/scsi/fcoe/fcoe_ctlr.c
===================================================================
--- linux-5.15.orig/drivers/scsi/fcoe/fcoe_ctlr.c
+++ linux-5.15/drivers/scsi/fcoe/fcoe_ctlr.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:831 @ static unsigned long fcoe_ctlr_age_fcfs(
 
 	INIT_LIST_HEAD(&del_list);
 
-	stats = per_cpu_ptr(fip->lp->stats, get_cpu());
+	stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
 
 	list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
 		deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:867 @ static unsigned long fcoe_ctlr_age_fcfs(
 				sel_time = fcf->time;
 		}
 	}
-	put_cpu();
+	put_cpu_light();
 
 	list_for_each_entry_safe(fcf, next, &del_list, list) {
 		/* Removes fcf from current list */
Index: linux-5.15/drivers/scsi/libfc/fc_exch.c
===================================================================
--- linux-5.15.orig/drivers/scsi/libfc/fc_exch.c
+++ linux-5.15/drivers/scsi/libfc/fc_exch.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:828 @ static struct fc_exch *fc_exch_em_alloc(
 	}
 	memset(ep, 0, sizeof(*ep));
 
-	cpu = get_cpu();
+	cpu = get_cpu_light();
 	pool = per_cpu_ptr(mp->pool, cpu);
 	spin_lock_bh(&pool->lock);
-	put_cpu();
+	put_cpu_light();
 
 	/* peek cache of free slot */
 	if (pool->left != FC_XID_UNKNOWN) {
Index: linux-5.15/drivers/staging/greybus/gpio.c
===================================================================
--- linux-5.15.orig/drivers/staging/greybus/gpio.c
+++ linux-5.15/drivers/staging/greybus/gpio.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:394 @ static int gb_gpio_request_handler(struc
 		return -EINVAL;
 	}
 
-	local_irq_disable();
-	ret = generic_handle_irq(irq);
-	local_irq_enable();
-
+	ret = generic_handle_irq_safe(irq);
 	if (ret)
 		dev_err(dev, "failed to invoke irq handler\n");
 
Index: linux-5.15/drivers/tty/serial/8250/8250.h
===================================================================
--- linux-5.15.orig/drivers/tty/serial/8250/8250.h
+++ linux-5.15/drivers/tty/serial/8250/8250.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:159 @ static inline void serial_dl_write(struc
 	up->dl_write(up, value);
 }
 
+static inline void serial8250_set_IER(struct uart_8250_port *up,
+				      unsigned char ier)
+{
+	struct uart_port *port = &up->port;
+	unsigned long flags;
+	bool is_console;
+
+	is_console = uart_console(port);
+
+	if (is_console)
+		console_atomic_lock(flags);
+
+	serial_out(up, UART_IER, ier);
+
+	if (is_console)
+		console_atomic_unlock(flags);
+}
+
+static inline unsigned char serial8250_clear_IER(struct uart_8250_port *up)
+{
+	struct uart_port *port = &up->port;
+	unsigned int clearval = 0;
+	unsigned long flags;
+	unsigned int prior;
+	bool is_console;
+
+	is_console = uart_console(port);
+
+	if (up->capabilities & UART_CAP_UUE)
+		clearval = UART_IER_UUE;
+
+	if (is_console)
+		console_atomic_lock(flags);
+
+	prior = serial_port_in(port, UART_IER);
+	serial_port_out(port, UART_IER, clearval);
+
+	if (is_console)
+		console_atomic_unlock(flags);
+
+	return prior;
+}
+
 static inline bool serial8250_set_THRI(struct uart_8250_port *up)
 {
 	if (up->ier & UART_IER_THRI)
 		return false;
 	up->ier |= UART_IER_THRI;
-	serial_out(up, UART_IER, up->ier);
+	serial8250_set_IER(up, up->ier);
 	return true;
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:216 @ static inline bool serial8250_clear_THRI
 	if (!(up->ier & UART_IER_THRI))
 		return false;
 	up->ier &= ~UART_IER_THRI;
-	serial_out(up, UART_IER, up->ier);
+	serial8250_set_IER(up, up->ier);
 	return true;
 }
 
Index: linux-5.15/drivers/tty/serial/8250/8250_core.c
===================================================================
--- linux-5.15.orig/drivers/tty/serial/8250/8250_core.c
+++ linux-5.15/drivers/tty/serial/8250/8250_core.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:268 @ static void serial8250_backup_timeout(st
 	 * Must disable interrupts or else we risk racing with the interrupt
 	 * based handler.
 	 */
-	if (up->port.irq) {
-		ier = serial_in(up, UART_IER);
-		serial_out(up, UART_IER, 0);
-	}
+	if (up->port.irq)
+		ier = serial8250_clear_IER(up);
 
 	iir = serial_in(up, UART_IIR);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:292 @ static void serial8250_backup_timeout(st
 		serial8250_tx_chars(up);
 
 	if (up->port.irq)
-		serial_out(up, UART_IER, ier);
+		serial8250_set_IER(up, ier);
 
 	spin_unlock_irqrestore(&up->port.lock, flags);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:579 @ serial8250_register_ports(struct uart_dr
 
 #ifdef CONFIG_SERIAL_8250_CONSOLE
 
+static void univ8250_console_write_atomic(struct console *co, const char *s,
+					  unsigned int count)
+{
+	struct uart_8250_port *up = &serial8250_ports[co->index];
+
+	serial8250_console_write_atomic(up, s, count);
+}
+
 static void univ8250_console_write(struct console *co, const char *s,
 				   unsigned int count)
 {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:680 @ static int univ8250_console_match(struct
 
 static struct console univ8250_console = {
 	.name		= "ttyS",
+	.write_atomic	= univ8250_console_write_atomic,
 	.write		= univ8250_console_write,
 	.device		= uart_console_device,
 	.setup		= univ8250_console_setup,
Index: linux-5.15/drivers/tty/serial/8250/8250_fsl.c
===================================================================
--- linux-5.15.orig/drivers/tty/serial/8250/8250_fsl.c
+++ linux-5.15/drivers/tty/serial/8250/8250_fsl.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:63 @ int fsl8250_handle_irq(struct uart_port
 
 	/* Stop processing interrupts on input overrun */
 	if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) {
+		unsigned long flags;
 		unsigned long delay;
+		bool is_console;
 
+		is_console = uart_console(port);
+
+		if (is_console)
+			console_atomic_lock(flags);
 		up->ier = port->serial_in(port, UART_IER);
+		if (is_console)
+			console_atomic_unlock(flags);
+
 		if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
 			port->ops->stop_rx(port);
 		} else {
Index: linux-5.15/drivers/tty/serial/8250/8250_ingenic.c
===================================================================
--- linux-5.15.orig/drivers/tty/serial/8250/8250_ingenic.c
+++ linux-5.15/drivers/tty/serial/8250/8250_ingenic.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:149 @ OF_EARLYCON_DECLARE(x1000_uart, "ingenic
 
 static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value)
 {
+	unsigned long flags;
+	bool is_console;
 	int ier;
 
 	switch (offset) {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:172 @ static void ingenic_uart_serial_out(stru
 		 * If we have enabled modem status IRQs we should enable
 		 * modem mode.
 		 */
+		is_console = uart_console(p);
+		if (is_console)
+			console_atomic_lock(flags);
 		ier = p->serial_in(p, UART_IER);
+		if (is_console)
+			console_atomic_unlock(flags);
 
 		if (ier & UART_IER_MSI)
 			value |= UART_MCR_MDCE | UART_MCR_FCM;
Index: linux-5.15/drivers/tty/serial/8250/8250_mtk.c
===================================================================
--- linux-5.15.orig/drivers/tty/serial/8250/8250_mtk.c
+++ linux-5.15/drivers/tty/serial/8250/8250_mtk.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:225 @ static void mtk8250_shutdown(struct uart
 
 static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask)
 {
-	serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask));
+	struct uart_port *port = &up->port;
+	unsigned long flags;
+	unsigned int ier;
+	bool is_console;
+
+	is_console = uart_console(port);
+
+	if (is_console)
+		console_atomic_lock(flags);
+
+	ier = serial_in(up, UART_IER);
+	serial_out(up, UART_IER, ier & (~mask));
+
+	if (is_console)
+		console_atomic_unlock(flags);
 }
 
 static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask)
 {
-	serial_out(up, UART_IER, serial_in(up, UART_IER) | mask);
+	struct uart_port *port = &up->port;
+	unsigned long flags;
+	unsigned int ier;
+
+	if (uart_console(port))
+		console_atomic_lock(flags);
+
+	ier = serial_in(up, UART_IER);
+	serial_out(up, UART_IER, ier | mask);
+
+	if (uart_console(port))
+		console_atomic_unlock(flags);
 }
 
 static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
Index: linux-5.15/drivers/tty/serial/8250/8250_port.c
===================================================================
--- linux-5.15.orig/drivers/tty/serial/8250/8250_port.c
+++ linux-5.15/drivers/tty/serial/8250/8250_port.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:754 @ static void serial8250_set_sleep(struct
 			serial_out(p, UART_EFR, UART_EFR_ECB);
 			serial_out(p, UART_LCR, 0);
 		}
-		serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
+		serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0);
 		if (p->capabilities & UART_CAP_EFR) {
 			serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
 			serial_out(p, UART_EFR, efr);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1429 @ static void serial8250_stop_rx(struct ua
 
 	up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
 	up->port.read_status_mask &= ~UART_LSR_DR;
-	serial_port_out(port, UART_IER, up->ier);
+	serial8250_set_IER(up, up->ier);
 
 	serial8250_rpm_put(up);
 }
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1459 @ void serial8250_em485_stop_tx(struct uar
 		serial8250_clear_and_reinit_fifos(p);
 
 		p->ier |= UART_IER_RLSI | UART_IER_RDI;
-		serial_port_out(&p->port, UART_IER, p->ier);
+		serial8250_set_IER(p, p->ier);
 	}
 }
 EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1695 @ static void serial8250_disable_ms(struct
 	mctrl_gpio_disable_ms(up->gpios);
 
 	up->ier &= ~UART_IER_MSI;
-	serial_port_out(port, UART_IER, up->ier);
+	serial8250_set_IER(up, up->ier);
 }
 
 static void serial8250_enable_ms(struct uart_port *port)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1711 @ static void serial8250_enable_ms(struct
 	up->ier |= UART_IER_MSI;
 
 	serial8250_rpm_get(up);
-	serial_port_out(port, UART_IER, up->ier);
+	serial8250_set_IER(up, up->ier);
 	serial8250_rpm_put(up);
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2136 @ static void serial8250_put_poll_char(str
 	struct uart_8250_port *up = up_to_u8250p(port);
 
 	serial8250_rpm_get(up);
-	/*
-	 *	First save the IER then disable the interrupts
-	 */
-	ier = serial_port_in(port, UART_IER);
-	if (up->capabilities & UART_CAP_UUE)
-		serial_port_out(port, UART_IER, UART_IER_UUE);
-	else
-		serial_port_out(port, UART_IER, 0);
+	ier = serial8250_clear_IER(up);
 
 	wait_for_xmitr(up, BOTH_EMPTY);
 	/*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2149 @ static void serial8250_put_poll_char(str
 	 *	and restore the IER
 	 */
 	wait_for_xmitr(up, BOTH_EMPTY);
-	serial_port_out(port, UART_IER, ier);
+	serial8250_set_IER(up, ier);
 	serial8250_rpm_put(up);
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2454 @ void serial8250_do_shutdown(struct uart_
 	 */
 	spin_lock_irqsave(&port->lock, flags);
 	up->ier = 0;
-	serial_port_out(port, UART_IER, 0);
+	serial8250_set_IER(up, 0);
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	synchronize_irq(port->irq);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2836 @ serial8250_do_set_termios(struct uart_po
 	if (up->capabilities & UART_CAP_RTOIE)
 		up->ier |= UART_IER_RTOIE;
 
-	serial_port_out(port, UART_IER, up->ier);
+	serial8250_set_IER(up, up->ier);
 
 	if (up->capabilities & UART_CAP_EFR) {
 		unsigned char efr = 0;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3301 @ EXPORT_SYMBOL_GPL(serial8250_set_default
 
 #ifdef CONFIG_SERIAL_8250_CONSOLE
 
-static void serial8250_console_putchar(struct uart_port *port, int ch)
+static void serial8250_console_putchar_locked(struct uart_port *port, int ch)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3309 @ static void serial8250_console_putchar(s
 	serial_port_out(port, UART_TX, ch);
 }
 
+static void serial8250_console_putchar(struct uart_port *port, int ch)
+{
+	struct uart_8250_port *up = up_to_u8250p(port);
+	unsigned long flags;
+
+	wait_for_xmitr(up, UART_LSR_THRE);
+
+	console_atomic_lock(flags);
+	serial8250_console_putchar_locked(port, ch);
+	console_atomic_unlock(flags);
+}
+
 /*
  *	Restore serial console when h/w power-off detected
  */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3347 @ static void serial8250_console_restore(s
 	serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
 }
 
+void serial8250_console_write_atomic(struct uart_8250_port *up,
+				     const char *s, unsigned int count)
+{
+	struct uart_port *port = &up->port;
+	unsigned long flags;
+	unsigned int ier;
+
+	console_atomic_lock(flags);
+
+	touch_nmi_watchdog();
+
+	ier = serial8250_clear_IER(up);
+
+	if (atomic_fetch_inc(&up->console_printing)) {
+		uart_console_write(port, "\n", 1,
+				   serial8250_console_putchar_locked);
+	}
+	uart_console_write(port, s, count, serial8250_console_putchar_locked);
+	atomic_dec(&up->console_printing);
+
+	wait_for_xmitr(up, BOTH_EMPTY);
+	serial8250_set_IER(up, ier);
+
+	console_atomic_unlock(flags);
+}
+
 /*
  *	Print a string to the serial port trying not to disturb
  *	any possible real use of the port...
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3389 @ void serial8250_console_write(struct uar
 	struct uart_port *port = &up->port;
 	unsigned long flags;
 	unsigned int ier;
-	int locked = 1;
 
 	touch_nmi_watchdog();
 
-	if (oops_in_progress)
-		locked = spin_trylock_irqsave(&port->lock, flags);
-	else
-		spin_lock_irqsave(&port->lock, flags);
-
-	/*
-	 *	First save the IER then disable the interrupts
-	 */
-	ier = serial_port_in(port, UART_IER);
+	spin_lock_irqsave(&port->lock, flags);
 
-	if (up->capabilities & UART_CAP_UUE)
-		serial_port_out(port, UART_IER, UART_IER_UUE);
-	else
-		serial_port_out(port, UART_IER, 0);
+	ier = serial8250_clear_IER(up);
 
 	/* check scratch reg to see if port powered off during system sleep */
 	if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3408 @ void serial8250_console_write(struct uar
 		mdelay(port->rs485.delay_rts_before_send);
 	}
 
+	atomic_inc(&up->console_printing);
 	uart_console_write(port, s, count, serial8250_console_putchar);
+	atomic_dec(&up->console_printing);
 
 	/*
 	 *	Finally, wait for transmitter to become empty
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3423 @ void serial8250_console_write(struct uar
 		if (em485->tx_stopped)
 			up->rs485_stop_tx(up);
 	}
-
-	serial_port_out(port, UART_IER, ier);
+	serial8250_set_IER(up, ier);
 
 	/*
 	 *	The receive handling will happen properly because the
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3435 @ void serial8250_console_write(struct uar
 	if (up->msr_saved_flags)
 		serial8250_modem_status(up);
 
-	if (locked)
-		spin_unlock_irqrestore(&port->lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
 }
 
 static unsigned int probe_baud(struct uart_port *port)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3455 @ static unsigned int probe_baud(struct ua
 
 int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
 {
+	struct uart_8250_port *up = up_to_u8250p(port);
 	int baud = 9600;
 	int bits = 8;
 	int parity = 'n';
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3465 @ int serial8250_console_setup(struct uart
 	if (!port->iobase && !port->membase)
 		return -ENODEV;
 
+	atomic_set(&up->console_printing, 0);
+
 	if (options)
 		uart_parse_options(options, &baud, &parity, &bits, &flow);
 	else if (probe)
Index: linux-5.15/drivers/tty/serial/amba-pl011.c
===================================================================
--- linux-5.15.orig/drivers/tty/serial/amba-pl011.c
+++ linux-5.15/drivers/tty/serial/amba-pl011.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2343 @ pl011_console_write(struct console *co,
 {
 	struct uart_amba_port *uap = amba_ports[co->index];
 	unsigned int old_cr = 0, new_cr;
-	unsigned long flags;
+	unsigned long flags = 0;
 	int locked = 1;
 
 	clk_enable(uap->clk);
 
-	local_irq_save(flags);
+	/*
+	 * local_irq_save(flags);
+	 *
+	 * This local_irq_save() is nonsense. If we come in via sysrq
+	 * handling then interrupts are already disabled. Aside of
+	 * that the port.sysrq check is racy on SMP regardless.
+	*/
 	if (uap->port.sysrq)
 		locked = 0;
 	else if (oops_in_progress)
-		locked = spin_trylock(&uap->port.lock);
+		locked = spin_trylock_irqsave(&uap->port.lock, flags);
 	else
-		spin_lock(&uap->port.lock);
+		spin_lock_irqsave(&uap->port.lock, flags);
 
 	/*
 	 *	First save the CR then disable the interrupts
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2386 @ pl011_console_write(struct console *co,
 		pl011_write(old_cr, uap, REG_CR);
 
 	if (locked)
-		spin_unlock(&uap->port.lock);
-	local_irq_restore(flags);
+		spin_unlock_irqrestore(&uap->port.lock, flags);
 
 	clk_disable(uap->clk);
 }
Index: linux-5.15/drivers/tty/serial/omap-serial.c
===================================================================
--- linux-5.15.orig/drivers/tty/serial/omap-serial.c
+++ linux-5.15/drivers/tty/serial/omap-serial.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1258 @ serial_omap_console_write(struct console
 	unsigned int ier;
 	int locked = 1;
 
-	local_irq_save(flags);
-	if (up->port.sysrq)
-		locked = 0;
-	else if (oops_in_progress)
-		locked = spin_trylock(&up->port.lock);
+	if (up->port.sysrq || oops_in_progress)
+		locked = spin_trylock_irqsave(&up->port.lock, flags);
 	else
-		spin_lock(&up->port.lock);
+		spin_lock_irqsave(&up->port.lock, flags);
 
 	/*
 	 * First save the IER then disable the interrupts
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1288 @ serial_omap_console_write(struct console
 		check_modem_status(up);
 
 	if (locked)
-		spin_unlock(&up->port.lock);
-	local_irq_restore(flags);
+		spin_unlock_irqrestore(&up->port.lock, flags);
 }
 
 static int __init
Index: linux-5.15/drivers/virt/acrn/irqfd.c
===================================================================
--- linux-5.15.orig/drivers/virt/acrn/irqfd.c
+++ linux-5.15/drivers/virt/acrn/irqfd.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:20 @
 #include "acrn_drv.h"
 
 static LIST_HEAD(acrn_irqfd_clients);
-static DEFINE_MUTEX(acrn_irqfds_mutex);
 
 /**
  * struct hsm_irqfd - Properties of HSM irqfd
Index: linux-5.15/fs/afs/dir_silly.c
===================================================================
--- linux-5.15.orig/fs/afs/dir_silly.c
+++ linux-5.15/fs/afs/dir_silly.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:242 @ int afs_silly_iput(struct dentry *dentry
 	struct dentry *alias;
 	int ret;
 
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
 
 	_enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
 
Index: linux-5.15/fs/cifs/readdir.c
===================================================================
--- linux-5.15.orig/fs/cifs/readdir.c
+++ linux-5.15/fs/cifs/readdir.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:72 @ cifs_prime_dcache(struct dentry *parent,
 	struct inode *inode;
 	struct super_block *sb = parent->d_sb;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
 
 	cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
 
Index: linux-5.15/fs/dcache.c
===================================================================
--- linux-5.15.orig/fs/dcache.c
+++ linux-5.15/fs/dcache.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2541 @ EXPORT_SYMBOL(d_rehash);
 
 static inline unsigned start_dir_add(struct inode *dir)
 {
-
+	/*
+	 * The caller has a spinlock_t (dentry::d_lock) acquired which disables
+	 * preemption on !PREEMPT_RT. On PREEMPT_RT the lock does not disable
+	 * preemption and it has be done explicitly.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
 	for (;;) {
 		unsigned n = dir->i_dir_seq;
 		if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2559 @ static inline unsigned start_dir_add(str
 static inline void end_dir_add(struct inode *dir, unsigned n)
 {
 	smp_store_release(&dir->i_dir_seq, n + 2);
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
 }
 
 static void d_wait_lookup(struct dentry *dentry)
 {
-	if (d_in_lookup(dentry)) {
-		DECLARE_WAITQUEUE(wait, current);
-		add_wait_queue(dentry->d_wait, &wait);
-		do {
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			spin_unlock(&dentry->d_lock);
-			schedule();
-			spin_lock(&dentry->d_lock);
-		} while (d_in_lookup(dentry));
-	}
+	struct swait_queue __wait;
+
+	if (!d_in_lookup(dentry))
+		return;
+
+	INIT_LIST_HEAD(&__wait.task_list);
+	do {
+		prepare_to_swait_exclusive(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE);
+		spin_unlock(&dentry->d_lock);
+		schedule();
+		spin_lock(&dentry->d_lock);
+	} while (d_in_lookup(dentry));
+	finish_swait(dentry->d_wait, &__wait);
 }
 
 struct dentry *d_alloc_parallel(struct dentry *parent,
 				const struct qstr *name,
-				wait_queue_head_t *wq)
+				struct swait_queue_head *wq)
 {
 	unsigned int hash = name->hash;
 	struct hlist_bl_head *b = in_lookup_hash(parent, hash);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2697 @ void __d_lookup_done(struct dentry *dent
 	hlist_bl_lock(b);
 	dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
 	__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
-	wake_up_all(dentry->d_wait);
+	swake_up_all(dentry->d_wait);
 	dentry->d_wait = NULL;
 	hlist_bl_unlock(b);
 	INIT_HLIST_NODE(&dentry->d_u.d_alias);
Index: linux-5.15/fs/fscache/internal.h
===================================================================
--- linux-5.15.orig/fs/fscache/internal.h
+++ linux-5.15/fs/fscache/internal.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:84 @ extern unsigned fscache_debug;
 extern struct kobject *fscache_root;
 extern struct workqueue_struct *fscache_object_wq;
 extern struct workqueue_struct *fscache_op_wq;
-DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
 
 extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n);
 
Index: linux-5.15/fs/fscache/main.c
===================================================================
--- linux-5.15.orig/fs/fscache/main.c
+++ linux-5.15/fs/fscache/main.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:44 @ struct kobject *fscache_root;
 struct workqueue_struct *fscache_object_wq;
 struct workqueue_struct *fscache_op_wq;
 
-DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
-
 /* these values serve as lower bounds, will be adjusted in fscache_init() */
 static unsigned fscache_object_max_active = 4;
 static unsigned fscache_op_max_active = 2;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:139 @ unsigned int fscache_hash(unsigned int s
 static int __init fscache_init(void)
 {
 	unsigned int nr_cpus = num_possible_cpus();
-	unsigned int cpu;
 	int ret;
 
 	fscache_object_max_active =
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:161 @ static int __init fscache_init(void)
 	if (!fscache_op_wq)
 		goto error_op_wq;
 
-	for_each_possible_cpu(cpu)
-		init_waitqueue_head(&per_cpu(fscache_object_cong_wait, cpu));
-
 	ret = fscache_proc_init();
 	if (ret < 0)
 		goto error_proc;
Index: linux-5.15/fs/fscache/object.c
===================================================================
--- linux-5.15.orig/fs/fscache/object.c
+++ linux-5.15/fs/fscache/object.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:801 @ void fscache_object_destroy(struct fscac
 }
 EXPORT_SYMBOL(fscache_object_destroy);
 
+static DECLARE_WAIT_QUEUE_HEAD(fscache_object_cong_wait);
+
 /*
  * enqueue an object for metadata-type processing
  */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:811 @ void fscache_enqueue_object(struct fscac
 	_enter("{OBJ%x}", object->debug_id);
 
 	if (fscache_get_object(object, fscache_obj_get_queue) >= 0) {
-		wait_queue_head_t *cong_wq =
-			&get_cpu_var(fscache_object_cong_wait);
 
 		if (queue_work(fscache_object_wq, &object->work)) {
 			if (fscache_object_congested())
-				wake_up(cong_wq);
+				wake_up(&fscache_object_cong_wait);
 		} else
 			fscache_put_object(object, fscache_obj_put_queue);
-
-		put_cpu_var(fscache_object_cong_wait);
 	}
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:834 @ void fscache_enqueue_object(struct fscac
  */
 bool fscache_object_sleep_till_congested(signed long *timeoutp)
 {
-	wait_queue_head_t *cong_wq = this_cpu_ptr(&fscache_object_cong_wait);
 	DEFINE_WAIT(wait);
 
 	if (fscache_object_congested())
 		return true;
 
-	add_wait_queue_exclusive(cong_wq, &wait);
+	add_wait_queue_exclusive(&fscache_object_cong_wait, &wait);
 	if (!fscache_object_congested())
 		*timeoutp = schedule_timeout(*timeoutp);
-	finish_wait(cong_wq, &wait);
+	finish_wait(&fscache_object_cong_wait, &wait);
 
 	return fscache_object_congested();
 }
Index: linux-5.15/fs/fuse/readdir.c
===================================================================
--- linux-5.15.orig/fs/fuse/readdir.c
+++ linux-5.15/fs/fuse/readdir.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:163 @ static int fuse_direntplus_link(struct f
 	struct inode *dir = d_inode(parent);
 	struct fuse_conn *fc;
 	struct inode *inode;
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
 
 	if (!o->nodeid) {
 		/*
Index: linux-5.15/fs/namei.c
===================================================================
--- linux-5.15.orig/fs/namei.c
+++ linux-5.15/fs/namei.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1638 @ static struct dentry *__lookup_slow(cons
 {
 	struct dentry *dentry, *old;
 	struct inode *inode = dir->d_inode;
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
 
 	/* Don't go there if it's already dead */
 	if (unlikely(IS_DEADDIR(inode)))
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3249 @ static struct dentry *lookup_open(struct
 	struct dentry *dentry;
 	int error, create_error = 0;
 	umode_t mode = op->mode;
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
 
 	if (unlikely(IS_DEADDIR(dir_inode)))
 		return ERR_PTR(-ENOENT);
Index: linux-5.15/fs/namespace.c
===================================================================
--- linux-5.15.orig/fs/namespace.c
+++ linux-5.15/fs/namespace.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:347 @ int __mnt_want_write(struct vfsmount *m)
 	 * incremented count after it has set MNT_WRITE_HOLD.
 	 */
 	smp_mb();
-	while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
-		cpu_relax();
+	might_lock(&mount_lock.lock);
+	while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
+		if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+			cpu_relax();
+		} else {
+			/*
+			 * This prevents priority inversion, if the task
+			 * setting MNT_WRITE_HOLD got preempted on a remote
+			 * CPU, and it prevents life lock if the task setting
+			 * MNT_WRITE_HOLD has a lower priority and is bound to
+			 * the same CPU as the task that is spinning here.
+			 */
+			preempt_enable();
+			lock_mount_hash();
+			unlock_mount_hash();
+			preempt_disable();
+		}
+	}
 	/*
 	 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
 	 * be set to match its requirements. So we must not load that until
Index: linux-5.15/fs/nfs/dir.c
===================================================================
--- linux-5.15.orig/fs/nfs/dir.c
+++ linux-5.15/fs/nfs/dir.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:640 @ void nfs_prime_dcache(struct dentry *par
 		unsigned long dir_verifier)
 {
 	struct qstr filename = QSTR_INIT(entry->name, entry->len);
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
 	struct dentry *dentry;
 	struct dentry *alias;
 	struct inode *inode;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1876 @ int nfs_atomic_open(struct inode *dir, s
 		    struct file *file, unsigned open_flags,
 		    umode_t mode)
 {
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
 	struct nfs_open_context *ctx;
 	struct dentry *res;
 	struct iattr attr = { .ia_valid = ATTR_OPEN };
Index: linux-5.15/fs/nfs/unlink.c
===================================================================
--- linux-5.15.orig/fs/nfs/unlink.c
+++ linux-5.15/fs/nfs/unlink.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:16 @
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
 #include <linux/sched.h>
-#include <linux/wait.h>
+#include <linux/swait.h>
 #include <linux/namei.h>
 #include <linux/fsnotify.h>
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:187 @ nfs_async_unlink(struct dentry *dentry,
 
 	data->cred = get_current_cred();
 	data->res.dir_attr = &data->dir_attr;
-	init_waitqueue_head(&data->wq);
+	init_swait_queue_head(&data->wq);
 
 	status = -EBUSY;
 	spin_lock(&dentry->d_lock);
Index: linux-5.15/fs/proc/base.c
===================================================================
--- linux-5.15.orig/fs/proc/base.c
+++ linux-5.15/fs/proc/base.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:99 @
 #include <linux/posix-timers.h>
 #include <linux/time_namespace.h>
 #include <linux/resctrl.h>
+#include <linux/swait.h>
 #include <linux/cn_proc.h>
 #include <trace/events/oom.h>
 #include "internal.h"
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2075 @ bool proc_fill_cache(struct file *file,
 
 	child = d_hash_and_lookup(dir, &qname);
 	if (!child) {
-		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+		DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
 		child = d_alloc_parallel(dir, &qname, &wq);
 		if (IS_ERR(child))
 			goto end_instantiate;
Index: linux-5.15/fs/proc/proc_sysctl.c
===================================================================
--- linux-5.15.orig/fs/proc/proc_sysctl.c
+++ linux-5.15/fs/proc/proc_sysctl.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:682 @ static bool proc_sys_fill_cache(struct f
 
 	child = d_lookup(dir, &qname);
 	if (!child) {
-		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+		DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
 		child = d_alloc_parallel(dir, &qname, &wq);
 		if (IS_ERR(child))
 			return false;
Index: linux-5.15/include/asm-generic/softirq_stack.h
===================================================================
--- linux-5.15.orig/include/asm-generic/softirq_stack.h
+++ linux-5.15/include/asm-generic/softirq_stack.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5 @
 #ifndef __ASM_GENERIC_SOFTIRQ_STACK_H
 #define __ASM_GENERIC_SOFTIRQ_STACK_H
 
-#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK
+#if defined(CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK) && !defined(CONFIG_PREEMPT_RT)
 void do_softirq_own_stack(void);
 #else
 static inline void do_softirq_own_stack(void)
Index: linux-5.15/include/linux/console.h
===================================================================
--- linux-5.15.orig/include/linux/console.h
+++ linux-5.15/include/linux/console.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:19 @
 
 #include <linux/atomic.h>
 #include <linux/types.h>
+#include <linux/printk.h>
+#include <linux/seqlock.h>
+
+struct latched_seq {
+	seqcount_latch_t	latch;
+	u64			val[2];
+};
 
 struct vc_data;
 struct console_font_op;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:146 @ static inline int con_debug_leave(void)
 #define CON_ANYTIME	(16) /* Safe to call when cpu is offline */
 #define CON_BRL		(32) /* Used for a braille device */
 #define CON_EXTENDED	(64) /* Use the extended output format a la /dev/kmsg */
+#define CON_HANDOVER	(128) /* Device was previously a boot console. */
 
 struct console {
 	char	name[16];
 	void	(*write)(struct console *, const char *, unsigned);
+	void	(*write_atomic)(struct console *co, const char *s, unsigned int count);
 	int	(*read)(struct console *, char *, unsigned);
 	struct tty_driver *(*device)(struct console *, int *);
 	void	(*unblank)(void);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:161 @ struct console {
 	short	flags;
 	short	index;
 	int	cflag;
+#ifdef CONFIG_PRINTK
+	char	sync_buf[CONSOLE_LOG_MAX];
+	struct latched_seq printk_seq;
+	struct latched_seq printk_sync_seq;
+#ifdef CONFIG_HAVE_NMI
+	struct latched_seq printk_sync_nmi_seq;
+#endif
+#endif /* CONFIG_PRINTK */
+
+	struct task_struct *thread;
 	uint	ispeed;
 	uint	ospeed;
 	void	*data;
Index: linux-5.15/include/linux/dcache.h
===================================================================
--- linux-5.15.orig/include/linux/dcache.h
+++ linux-5.15/include/linux/dcache.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:111 @ struct dentry {
 
 	union {
 		struct list_head d_lru;		/* LRU list */
-		wait_queue_head_t *d_wait;	/* in-lookup ones only */
+		struct swait_queue_head *d_wait;	/* in-lookup ones only */
 	};
 	struct list_head d_child;	/* child of parent list */
 	struct list_head d_subdirs;	/* our children */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:243 @ extern void d_set_d_op(struct dentry *de
 extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
 extern struct dentry * d_alloc_anon(struct super_block *);
 extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
-					wait_queue_head_t *);
+					struct swait_queue_head *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
 extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
Index: linux-5.15/include/linux/entry-common.h
===================================================================
--- linux-5.15.orig/include/linux/entry-common.h
+++ linux-5.15/include/linux/entry-common.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:60 @
 # define ARCH_EXIT_TO_USER_MODE_WORK		(0)
 #endif
 
+#ifdef CONFIG_PREEMPT_LAZY
+# define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+#else
+# define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED)
+#endif
+
 #define EXIT_TO_USER_MODE_WORK						\
 	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |		\
-	 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
+	 _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
 	 ARCH_EXIT_TO_USER_MODE_WORK)
 
 /**
Index: linux-5.15/include/linux/irq_work.h
===================================================================
--- linux-5.15.orig/include/linux/irq_work.h
+++ linux-5.15/include/linux/irq_work.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:6 @
 #define _LINUX_IRQ_WORK_H
 
 #include <linux/smp_types.h>
+#include <linux/rcuwait.h>
 
 /*
  * An entry can be in one of four states:
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:20 @
 struct irq_work {
 	struct __call_single_node node;
 	void (*func)(struct irq_work *);
+	struct rcuwait irqwait;
 };
 
 #define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){	\
 	.node = { .u_flags = (_flags), },			\
 	.func = (_func),					\
+	.irqwait = __RCUWAIT_INITIALIZER(irqwait),		\
 }
 
 #define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:52 @ static inline bool irq_work_is_busy(stru
 	return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY;
 }
 
+static inline bool irq_work_is_hard(struct irq_work *work)
+{
+	return atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ;
+}
+
 bool irq_work_queue(struct irq_work *work);
 bool irq_work_queue_on(struct irq_work *work, int cpu);
 
Index: linux-5.15/include/linux/irqdesc.h
===================================================================
--- linux-5.15.orig/include/linux/irqdesc.h
+++ linux-5.15/include/linux/irqdesc.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:163 @ static inline void generic_handle_irq_de
 
 int handle_irq_desc(struct irq_desc *desc);
 int generic_handle_irq(unsigned int irq);
+int generic_handle_irq_safe(unsigned int irq);
 
 #ifdef CONFIG_IRQ_DOMAIN
 /*
Index: linux-5.15/include/linux/irqflags.h
===================================================================
--- linux-5.15.orig/include/linux/irqflags.h
+++ linux-5.15/include/linux/irqflags.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:74 @ do {						\
 do {						\
 	__this_cpu_dec(hardirq_context);	\
 } while (0)
-# define lockdep_softirq_enter()		\
-do {						\
-	current->softirq_context++;		\
-} while (0)
-# define lockdep_softirq_exit()			\
-do {						\
-	current->softirq_context--;		\
-} while (0)
 
 # define lockdep_hrtimer_enter(__hrtimer)		\
 ({							\
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:135 @ do {						\
 # define lockdep_irq_work_exit(__work)		do { } while (0)
 #endif
 
+#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT)
+# define lockdep_softirq_enter()		\
+do {						\
+	current->softirq_context++;		\
+} while (0)
+# define lockdep_softirq_exit()			\
+do {						\
+	current->softirq_context--;		\
+} while (0)
+
+#else
+# define lockdep_softirq_enter()		do { } while (0)
+# define lockdep_softirq_exit()			do { } while (0)
+#endif
+
 #if defined(CONFIG_IRQSOFF_TRACER) || \
 	defined(CONFIG_PREEMPT_TRACER)
  extern void stop_critical_timings(void);
Index: linux-5.15/include/linux/kernel.h
===================================================================
--- linux-5.15.orig/include/linux/kernel.h
+++ linux-5.15/include/linux/kernel.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:114 @ static __always_inline void might_resche
 #endif /* CONFIG_PREEMPT_* */
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-extern void ___might_sleep(const char *file, int line, int preempt_offset);
-extern void __might_sleep(const char *file, int line, int preempt_offset);
+extern void __might_resched(const char *file, int line, unsigned int offsets);
+extern void __might_sleep(const char *file, int line);
 extern void __cant_sleep(const char *file, int line, int preempt_offset);
 extern void __cant_migrate(const char *file, int line);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:132 @ extern void __cant_migrate(const char *f
  * supposed to.
  */
 # define might_sleep() \
-	do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
+	do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
 /**
  * cant_sleep - annotation for functions that cannot sleep
  *
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:171 @ extern void __cant_migrate(const char *f
  */
 # define non_block_end() WARN_ON(current->non_block_count-- == 0)
 #else
-  static inline void ___might_sleep(const char *file, int line,
-				   int preempt_offset) { }
-  static inline void __might_sleep(const char *file, int line,
-				   int preempt_offset) { }
+  static inline void __might_resched(const char *file, int line,
+				     unsigned int offsets) { }
+static inline void __might_sleep(const char *file, int line) { }
 # define might_sleep() do { might_resched(); } while (0)
 # define cant_sleep() do { } while (0)
 # define cant_migrate()		do { } while (0)
Index: linux-5.15/include/linux/kgdb.h
===================================================================
--- linux-5.15.orig/include/linux/kgdb.h
+++ linux-5.15/include/linux/kgdb.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:215 @ extern void kgdb_call_nmi_hook(void *ign
  */
 extern void kgdb_roundup_cpus(void);
 
+extern void kgdb_roundup_cpu(unsigned int cpu);
+
 /**
  *	kgdb_arch_set_pc - Generic call back to the program counter
  *	@regs: Current &struct pt_regs.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:370 @ extern void kgdb_free_init_mem(void);
 #define dbg_late_init()
 static inline void kgdb_panic(const char *msg) {}
 static inline void kgdb_free_init_mem(void) { }
+static inline void kgdb_roundup_cpu(unsigned int cpu) {}
 #endif /* ! CONFIG_KGDB */
 #endif /* _KGDB_H_ */
Index: linux-5.15/include/linux/mm_types.h
===================================================================
--- linux-5.15.orig/include/linux/mm_types.h
+++ linux-5.15/include/linux/mm_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:15 @
 #include <linux/completion.h>
 #include <linux/cpumask.h>
 #include <linux/uprobes.h>
+#include <linux/rcupdate.h>
 #include <linux/page-flags-layout.h>
 #include <linux/workqueue.h>
 #include <linux/seqlock.h>
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:578 @ struct mm_struct {
 		bool tlb_flush_batched;
 #endif
 		struct uprobes_state uprobes_state;
+#ifdef CONFIG_PREEMPT_RT
+		struct rcu_head delayed_drop;
+#endif
 #ifdef CONFIG_HUGETLB_PAGE
 		atomic_long_t hugetlb_usage;
 #endif
Index: linux-5.15/include/linux/netdevice.h
===================================================================
--- linux-5.15.orig/include/linux/netdevice.h
+++ linux-5.15/include/linux/netdevice.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1941 @ enum netdev_ml_priv_type {
  *	@sfp_bus:	attached &struct sfp_bus structure.
  *
  *	@qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
- *	@qdisc_running_key: lockdep class annotating Qdisc->running seqcount
  *
  *	@proto_down:	protocol port state information can be sent to the
  *			switch driver and used to set the phys state of the
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2274 @ struct net_device {
 	struct phy_device	*phydev;
 	struct sfp_bus		*sfp_bus;
 	struct lock_class_key	*qdisc_tx_busylock;
-	struct lock_class_key	*qdisc_running_key;
 	bool			proto_down;
 	unsigned		wol_enabled:1;
 	unsigned		threaded:1;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2384 @ static inline void netdev_for_each_tx_qu
 #define netdev_lockdep_set_classes(dev)				\
 {								\
 	static struct lock_class_key qdisc_tx_busylock_key;	\
-	static struct lock_class_key qdisc_running_key;		\
 	static struct lock_class_key qdisc_xmit_lock_key;	\
 	static struct lock_class_key dev_addr_list_lock_key;	\
 	unsigned int i;						\
 								\
 	(dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key;	\
-	(dev)->qdisc_running_key = &qdisc_running_key;		\
 	lockdep_set_class(&(dev)->addr_list_lock,		\
 			  &dev_addr_list_lock_key);		\
 	for (i = 0; i < (dev)->num_tx_queues; i++)		\
Index: linux-5.15/include/linux/nfs_xdr.h
===================================================================
--- linux-5.15.orig/include/linux/nfs_xdr.h
+++ linux-5.15/include/linux/nfs_xdr.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1695 @ struct nfs_unlinkdata {
 	struct nfs_removeargs args;
 	struct nfs_removeres res;
 	struct dentry *dentry;
-	wait_queue_head_t wq;
+	struct swait_queue_head wq;
 	const struct cred *cred;
 	struct nfs_fattr dir_attr;
 	long timeout;
Index: linux-5.15/include/linux/preempt.h
===================================================================
--- linux-5.15.orig/include/linux/preempt.h
+++ linux-5.15/include/linux/preempt.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:125 @
  * The preempt_count offset after spin_lock()
  */
 #if !defined(CONFIG_PREEMPT_RT)
-#define PREEMPT_LOCK_OFFSET	PREEMPT_DISABLE_OFFSET
+#define PREEMPT_LOCK_OFFSET		PREEMPT_DISABLE_OFFSET
 #else
-#define PREEMPT_LOCK_OFFSET	0
+/* Locks on RT do not disable preemption */
+#define PREEMPT_LOCK_OFFSET		0
 #endif
 
 /*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:178 @ extern void preempt_count_sub(int val);
 #define preempt_count_inc() preempt_count_add(1)
 #define preempt_count_dec() preempt_count_sub(1)
 
+#ifdef CONFIG_PREEMPT_LAZY
+#define add_preempt_lazy_count(val)	do { preempt_lazy_count() += (val); } while (0)
+#define sub_preempt_lazy_count(val)	do { preempt_lazy_count() -= (val); } while (0)
+#define inc_preempt_lazy_count()	add_preempt_lazy_count(1)
+#define dec_preempt_lazy_count()	sub_preempt_lazy_count(1)
+#define preempt_lazy_count()		(current_thread_info()->preempt_lazy_count)
+#else
+#define add_preempt_lazy_count(val)	do { } while (0)
+#define sub_preempt_lazy_count(val)	do { } while (0)
+#define inc_preempt_lazy_count()	do { } while (0)
+#define dec_preempt_lazy_count()	do { } while (0)
+#define preempt_lazy_count()		(0)
+#endif
+
 #ifdef CONFIG_PREEMPT_COUNT
 
 #define preempt_disable() \
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:200 @ do { \
 	barrier(); \
 } while (0)
 
+#define preempt_lazy_disable() \
+do { \
+	inc_preempt_lazy_count(); \
+	barrier(); \
+} while (0)
+
 #define sched_preempt_enable_no_resched() \
 do { \
 	barrier(); \
 	preempt_count_dec(); \
 } while (0)
 
-#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
+#ifndef CONFIG_PREEMPT_RT
+# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
+# define preempt_check_resched_rt() barrier();
+#else
+# define preempt_enable_no_resched() preempt_enable()
+# define preempt_check_resched_rt() preempt_check_resched()
+#endif
 
 #define preemptible()	(preempt_count() == 0 && !irqs_disabled())
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:243 @ do { \
 		__preempt_schedule(); \
 } while (0)
 
+/*
+ * open code preempt_check_resched() because it is not exported to modules and
+ * used by local_unlock() or bpf_enable_instrumentation().
+ */
+#define preempt_lazy_enable() \
+do { \
+	dec_preempt_lazy_count(); \
+	barrier(); \
+	if (should_resched(0)) \
+		__preempt_schedule(); \
+} while (0)
+
 #else /* !CONFIG_PREEMPTION */
 #define preempt_enable() \
 do { \
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:262 @ do { \
 	preempt_count_dec(); \
 } while (0)
 
+#define preempt_lazy_enable() \
+do { \
+	dec_preempt_lazy_count(); \
+	barrier(); \
+} while (0)
+
 #define preempt_enable_notrace() \
 do { \
 	barrier(); \
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:306 @ do { \
 #define preempt_disable_notrace()		barrier()
 #define preempt_enable_no_resched_notrace()	barrier()
 #define preempt_enable_notrace()		barrier()
+#define preempt_check_resched_rt()		barrier()
 #define preemptible()				0
 
+#define preempt_lazy_disable()			barrier()
+#define preempt_lazy_enable()			barrier()
+
 #endif /* CONFIG_PREEMPT_COUNT */
 
 #ifdef MODULE
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:330 @ do { \
 } while (0)
 #define preempt_fold_need_resched() \
 do { \
-	if (tif_need_resched()) \
+	if (tif_need_resched_now()) \
 		set_preempt_need_resched(); \
 } while (0)
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:446 @ extern void migrate_enable(void);
 
 #else
 
-static inline void migrate_disable(void) { }
-static inline void migrate_enable(void) { }
+static inline void migrate_disable(void)
+{
+	preempt_lazy_disable();
+}
+
+static inline void migrate_enable(void)
+{
+	preempt_lazy_enable();
+}
 
 #endif /* CONFIG_SMP */
 
Index: linux-5.15/include/linux/printk.h
===================================================================
--- linux-5.15.orig/include/linux/printk.h
+++ linux-5.15/include/linux/printk.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:50 @ static inline const char *printk_skip_he
 
 #define CONSOLE_EXT_LOG_MAX	8192
 
+/*
+ * The maximum size of a record formatted for console printing
+ * (i.e. with the prefix prepended to every line).
+ */
+#define CONSOLE_LOG_MAX		1024
+
 /* printk's without a loglevel use this.. */
 #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:164 @ int vprintk(const char *fmt, va_list arg
 asmlinkage __printf(1, 2) __cold
 int _printk(const char *fmt, ...);
 
+bool pr_flush(int timeout_ms, bool reset_on_progress);
+
 /*
  * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
  */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:235 @ static inline void printk_deferred_exit(
 {
 }
 
+static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
+{
+	return true;
+}
+
 static inline int printk_ratelimit(void)
 {
 	return 0;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:300 @ static inline void printk_trigger_flush(
 extern int __printk_cpu_trylock(void);
 extern void __printk_wait_on_cpu_lock(void);
 extern void __printk_cpu_unlock(void);
+extern bool kgdb_roundup_delay(unsigned int cpu);
+
+#else
+
+#define __printk_cpu_trylock()		1
+#define __printk_wait_on_cpu_lock()
+#define __printk_cpu_unlock()
+
+static inline bool kgdb_roundup_delay(unsigned int cpu)
+{
+	return false;
+}
+#endif /* CONFIG_SMP */
 
 /**
- * printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
- *                             lock and disable interrupts.
+ * raw_printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
+ *                                 lock and disable interrupts.
  * @flags: Stack-allocated storage for saving local interrupt state,
- *         to be passed to printk_cpu_unlock_irqrestore().
+ *         to be passed to raw_printk_cpu_unlock_irqrestore().
  *
  * If the lock is owned by another CPU, spin until it becomes available.
  * Interrupts are restored while spinning.
  */
-#define printk_cpu_lock_irqsave(flags)		\
+#define raw_printk_cpu_lock_irqsave(flags)	\
 	for (;;) {				\
 		local_irq_save(flags);		\
 		if (__printk_cpu_trylock())	\
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:333 @ extern void __printk_cpu_unlock(void);
 	}
 
 /**
- * printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant spinning
- *                                  lock and restore interrupts.
- * @flags: Caller's saved interrupt state, from printk_cpu_lock_irqsave().
+ * raw_printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant
+ *                                      spinning lock and restore interrupts.
+ * @flags: Caller's saved interrupt state from raw_printk_cpu_lock_irqsave().
  */
-#define printk_cpu_unlock_irqrestore(flags)	\
+#define raw_printk_cpu_unlock_irqrestore(flags)	\
 	do {					\
 		__printk_cpu_unlock();		\
 		local_irq_restore(flags);	\
-	} while (0)				\
+	} while (0)
 
-#else
-
-#define printk_cpu_lock_irqsave(flags) ((void)flags)
-#define printk_cpu_unlock_irqrestore(flags) ((void)flags)
+/*
+ * Used to synchronize atomic consoles.
+ *
+ * The same as raw_printk_cpu_lock_irqsave() except that hardware interrupts
+ * are _not_ restored while spinning.
+ */
+#define console_atomic_lock(flags)		\
+	do {					\
+		local_irq_save(flags);		\
+		while (!__printk_cpu_trylock())	\
+			cpu_relax();		\
+	} while (0)
 
-#endif /* CONFIG_SMP */
+#define console_atomic_unlock raw_printk_cpu_unlock_irqrestore
 
 extern int kptr_restrict;
 
Index: linux-5.15/include/linux/ratelimit_types.h
===================================================================
--- linux-5.15.orig/include/linux/ratelimit_types.h
+++ linux-5.15/include/linux/ratelimit_types.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:7 @
 
 #include <linux/bits.h>
 #include <linux/param.h>
-#include <linux/spinlock_types.h>
+#include <linux/spinlock_types_raw.h>
 
 #define DEFAULT_RATELIMIT_INTERVAL	(5 * HZ)
 #define DEFAULT_RATELIMIT_BURST		10
Index: linux-5.15/include/linux/rcupdate.h
===================================================================
--- linux-5.15.orig/include/linux/rcupdate.h
+++ linux-5.15/include/linux/rcupdate.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:97 @ void rcu_init_tasks_generic(void);
 static inline void rcu_init_tasks_generic(void) { }
 #endif
 
+#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TASKS_RCU_GENERIC)
+void rcu_tasks_initiate_self_tests(void);
+#else
+static inline void rcu_tasks_initiate_self_tests(void) {}
+#endif
+
+
 #ifdef CONFIG_RCU_STALL_COMMON
 void rcu_sysrq_start(void);
 void rcu_sysrq_end(void);
Index: linux-5.15/include/linux/rtmutex.h
===================================================================
--- linux-5.15.orig/include/linux/rtmutex.h
+++ linux-5.15/include/linux/rtmutex.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:102 @ extern void __rt_mutex_init(struct rt_mu
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass);
+extern void _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock);
 #define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0)
+#define rt_mutex_lock_nest_lock(lock, nest_lock)			\
+	do {								\
+		typecheck(struct lockdep_map *, &(nest_lock)->dep_map);	\
+		_rt_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map);	\
+	} while (0)
+
 #else
 extern void rt_mutex_lock(struct rt_mutex *lock);
 #define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock)
+#define rt_mutex_lock_nest_lock(lock, nest_lock) rt_mutex_lock(lock)
 #endif
 
 extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
+extern int rt_mutex_lock_killable(struct rt_mutex *lock);
 extern int rt_mutex_trylock(struct rt_mutex *lock);
 
 extern void rt_mutex_unlock(struct rt_mutex *lock);
Index: linux-5.15/include/linux/sched.h
===================================================================
--- linux-5.15.orig/include/linux/sched.h
+++ linux-5.15/include/linux/sched.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:121 @ struct task_group;
 
 #define task_is_running(task)		(READ_ONCE((task)->__state) == TASK_RUNNING)
 
-#define task_is_traced(task)		((READ_ONCE(task->__state) & __TASK_TRACED) != 0)
-
 #define task_is_stopped(task)		((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)
 
-#define task_is_stopped_or_traced(task)	((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
-
 /*
  * Special states are those that do not use the normal wait-loop pattern. See
  * the comment with set_special_state().
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1083 @ struct task_struct {
 	/* Restored if set_restore_sigmask() was used: */
 	sigset_t			saved_sigmask;
 	struct sigpending		pending;
+#ifdef CONFIG_PREEMPT_RT
+	/* TODO: move me into ->restart_block ? */
+	struct				kernel_siginfo forced_info;
+#endif
 	unsigned long			sas_ss_sp;
 	size_t				sas_ss_size;
 	unsigned int			sas_ss_flags;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1741 @ static __always_inline bool is_percpu_th
 #endif
 }
 
+/* Is the current task guaranteed to stay on its current CPU? */
+static inline bool is_migratable(void)
+{
+#ifdef CONFIG_SMP
+	return preemptible() && !current->migration_disabled;
+#else
+	return false;
+#endif
+}
+
 /* Per-process atomic flags. */
 #define PFA_NO_NEW_PRIVS		0	/* May not gain new privileges. */
 #define PFA_SPREAD_PAGE			1	/* Spread page cache over cpuset */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2026 @ static inline int test_tsk_need_resched(
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
+#ifdef CONFIG_PREEMPT_LAZY
+static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
+}
+
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
+}
+
+static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
+}
+
+static inline int need_resched_lazy(void)
+{
+	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+}
+
+static inline int need_resched_now(void)
+{
+	return test_thread_flag(TIF_NEED_RESCHED);
+}
+
+#else
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
+static inline int need_resched_lazy(void) { return 0; }
+
+static inline int need_resched_now(void)
+{
+	return test_thread_flag(TIF_NEED_RESCHED);
+}
+
+#endif
+
+#ifdef CONFIG_PREEMPT_RT
+static inline bool task_match_saved_state(struct task_struct *p, long match_state)
+{
+	return p->saved_state == match_state;
+}
+
+static inline bool task_is_traced(struct task_struct *task)
+{
+	bool traced = false;
+
+	/* in case the task is sleeping on tasklist_lock */
+	raw_spin_lock_irq(&task->pi_lock);
+	if (READ_ONCE(task->__state) & __TASK_TRACED)
+		traced = true;
+	else if (task->saved_state & __TASK_TRACED)
+		traced = true;
+	raw_spin_unlock_irq(&task->pi_lock);
+	return traced;
+}
+
+static inline bool task_is_stopped_or_traced(struct task_struct *task)
+{
+	bool traced_stopped = false;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&task->pi_lock, flags);
+
+	if (READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED))
+		traced_stopped = true;
+	else if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
+		traced_stopped = true;
+
+	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+	return traced_stopped;
+}
+
+#else
+
+static inline bool task_match_saved_state(struct task_struct *p, long match_state)
+{
+	return false;
+}
+
+static inline bool task_is_traced(struct task_struct *task)
+{
+	return READ_ONCE(task->__state) & __TASK_TRACED;
+}
+
+static inline bool task_is_stopped_or_traced(struct task_struct *task)
+{
+	return READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED);
+}
+#endif
+
+static inline bool task_match_state_or_saved(struct task_struct *p,
+					     long match_state)
+{
+	if (READ_ONCE(p->__state) == match_state)
+		return true;
+
+	return task_match_saved_state(p, match_state);
+}
+
+static inline bool task_match_state_lock(struct task_struct *p,
+					 long match_state)
+{
+	bool match;
+
+	raw_spin_lock_irq(&p->pi_lock);
+	match = task_match_state_or_saved(p, match_state);
+	raw_spin_unlock_irq(&p->pi_lock);
+
+	return match;
+}
+
 /*
  * cond_resched() and cond_resched_lock(): latency reduction via
  * explicit rescheduling in places that are safe. The return
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2172 @ static inline int _cond_resched(void) {
 #endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
 
 #define cond_resched() ({			\
-	___might_sleep(__FILE__, __LINE__, 0);	\
+	__might_resched(__FILE__, __LINE__, 0);	\
 	_cond_resched();			\
 })
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2180 @ extern int __cond_resched_lock(spinlock_
 extern int __cond_resched_rwlock_read(rwlock_t *lock);
 extern int __cond_resched_rwlock_write(rwlock_t *lock);
 
-#define cond_resched_lock(lock) ({				\
-	___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
-	__cond_resched_lock(lock);				\
+#define MIGHT_RESCHED_RCU_SHIFT		8
+#define MIGHT_RESCHED_PREEMPT_MASK	((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
+
+#ifndef CONFIG_PREEMPT_RT
+/*
+ * Non RT kernels have an elevated preempt count due to the held lock,
+ * but are not allowed to be inside a RCU read side critical section
+ */
+# define PREEMPT_LOCK_RESCHED_OFFSETS	PREEMPT_LOCK_OFFSET
+#else
+/*
+ * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
+ * cond_resched*lock() has to take that into account because it checks for
+ * preempt_count() and rcu_preempt_depth().
+ */
+# define PREEMPT_LOCK_RESCHED_OFFSETS	\
+	(PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
+#endif
+
+#define cond_resched_lock(lock) ({						\
+	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);	\
+	__cond_resched_lock(lock);						\
 })
 
-#define cond_resched_rwlock_read(lock) ({			\
-	__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
-	__cond_resched_rwlock_read(lock);			\
+#define cond_resched_rwlock_read(lock) ({					\
+	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);	\
+	__cond_resched_rwlock_read(lock);					\
 })
 
-#define cond_resched_rwlock_write(lock) ({			\
-	__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
-	__cond_resched_rwlock_write(lock);			\
+#define cond_resched_rwlock_write(lock) ({					\
+	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);	\
+	__cond_resched_rwlock_write(lock);					\
 })
 
 static inline void cond_resched_rcu(void)
Index: linux-5.15/include/linux/sched/mm.h
===================================================================
--- linux-5.15.orig/include/linux/sched/mm.h
+++ linux-5.15/include/linux/sched/mm.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:52 @ static inline void mmdrop(struct mm_stru
 		__mmdrop(mm);
 }
 
+#ifdef CONFIG_PREEMPT_RT
+extern void __mmdrop_delayed(struct rcu_head *rhp);
+
+/*
+ * Invoked from finish_task_switch(). Delegates the heavy lifting on RT
+ * kernels via RCU.
+ */
+static inline void mmdrop_sched(struct mm_struct *mm)
+{
+	/* Provides a full memory barrier. See mmdrop() */
+	if (atomic_dec_and_test(&mm->mm_count))
+		call_rcu(&mm->delayed_drop, __mmdrop_delayed);
+}
+#else
+static inline void mmdrop_sched(struct mm_struct *mm)
+{
+	mmdrop(mm);
+}
+#endif
+
 /**
  * mmget() - Pin the address space associated with a &struct mm_struct.
  * @mm: The address space to pin.
Index: linux-5.15/include/linux/serial_8250.h
===================================================================
--- linux-5.15.orig/include/linux/serial_8250.h
+++ linux-5.15/include/linux/serial_8250.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:10 @
 #ifndef _LINUX_SERIAL_8250_H
 #define _LINUX_SERIAL_8250_H
 
+#include <linux/atomic.h>
 #include <linux/serial_core.h>
 #include <linux/serial_reg.h>
 #include <linux/platform_device.h>
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:130 @ struct uart_8250_port {
 #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
 	unsigned char		msr_saved_flags;
 
+	atomic_t		console_printing;
+
 	struct uart_8250_dma	*dma;
 	const struct uart_8250_ops *ops;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:187 @ void serial8250_init_port(struct uart_82
 void serial8250_set_defaults(struct uart_8250_port *up);
 void serial8250_console_write(struct uart_8250_port *up, const char *s,
 			      unsigned int count);
+void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s,
+				     unsigned int count);
 int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
 int serial8250_console_exit(struct uart_port *port);
 
Index: linux-5.15/include/linux/skbuff.h
===================================================================
--- linux-5.15.orig/include/linux/skbuff.h
+++ linux-5.15/include/linux/skbuff.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:303 @ struct sk_buff_head {
 
 	__u32		qlen;
 	spinlock_t	lock;
+	raw_spinlock_t	raw_lock;
 };
 
 struct sk_buff;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1996 @ static inline void skb_queue_head_init(s
 	__skb_queue_head_init(list);
 }
 
+static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
+{
+	raw_spin_lock_init(&list->raw_lock);
+	__skb_queue_head_init(list);
+}
+
 static inline void skb_queue_head_init_class(struct sk_buff_head *list,
 		struct lock_class_key *class)
 {
Index: linux-5.15/include/linux/smp.h
===================================================================
--- linux-5.15.orig/include/linux/smp.h
+++ linux-5.15/include/linux/smp.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:271 @ static inline int get_boot_cpu_id(void)
 #define get_cpu()		({ preempt_disable(); __smp_processor_id(); })
 #define put_cpu()		preempt_enable()
 
+#define get_cpu_light()		({ migrate_disable(); __smp_processor_id(); })
+#define put_cpu_light()		migrate_enable()
+
 /*
  * Callback to arch code if there's nosmp or maxcpus=0 on the
  * boot command line:
Index: linux-5.15/include/linux/spinlock_types_up.h
===================================================================
--- linux-5.15.orig/include/linux/spinlock_types_up.h
+++ linux-5.15/include/linux/spinlock_types_up.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4 @
 #ifndef __LINUX_SPINLOCK_TYPES_UP_H
 #define __LINUX_SPINLOCK_TYPES_UP_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
 # error "please don't include this file directly"
 #endif
 
Index: linux-5.15/include/linux/thread_info.h
===================================================================
--- linux-5.15.orig/include/linux/thread_info.h
+++ linux-5.15/include/linux/thread_info.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:166 @ static inline int test_ti_thread_flag(st
 	clear_ti_thread_flag(task_thread_info(t), TIF_##fl)
 #endif /* !CONFIG_GENERIC_ENTRY */
 
-#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+#ifdef CONFIG_PREEMPT_LAZY
+#define tif_need_resched()	(test_thread_flag(TIF_NEED_RESCHED) || \
+				 test_thread_flag(TIF_NEED_RESCHED_LAZY))
+#define tif_need_resched_now()	(test_thread_flag(TIF_NEED_RESCHED))
+#define tif_need_resched_lazy()	test_thread_flag(TIF_NEED_RESCHED_LAZY)
+
+#else
+#define tif_need_resched()	test_thread_flag(TIF_NEED_RESCHED)
+#define tif_need_resched_now()	test_thread_flag(TIF_NEED_RESCHED)
+#define tif_need_resched_lazy()	0
+#endif
 
 #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
 static inline int arch_within_stack_frames(const void * const stack,
Index: linux-5.15/include/linux/trace_events.h
===================================================================
--- linux-5.15.orig/include/linux/trace_events.h
+++ linux-5.15/include/linux/trace_events.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:72 @ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
+	unsigned char		preempt_lazy_count;
 };
 
 #define TRACE_EVENT_TYPE_MAX						\
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:162 @ static inline void tracing_generic_entry
 						unsigned int trace_ctx)
 {
 	entry->preempt_count		= trace_ctx & 0xff;
+	entry->preempt_lazy_count	= (trace_ctx >> 16) & 0xff;
 	entry->pid			= current->pid;
 	entry->type			= type;
-	entry->flags =			trace_ctx >> 16;
+	entry->flags			= trace_ctx >> 24;
 }
 
 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:178 @ enum trace_flag_type {
 	TRACE_FLAG_SOFTIRQ		= 0x10,
 	TRACE_FLAG_PREEMPT_RESCHED	= 0x20,
 	TRACE_FLAG_NMI			= 0x40,
+	TRACE_FLAG_NEED_RESCHED_LAZY	= 0x80,
 };
 
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
Index: linux-5.15/include/linux/u64_stats_sync.h
===================================================================
--- linux-5.15.orig/include/linux/u64_stats_sync.h
+++ linux-5.15/include/linux/u64_stats_sync.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:69 @
 #include <linux/seqlock.h>
 
 struct u64_stats_sync {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG==32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
 	seqcount_t	seq;
 #endif
 };
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:86 @ static inline u64 u64_stats_read(const u
 	return local64_read(&p->v);
 }
 
+static inline void u64_stats_set(u64_stats_t *p, u64 val)
+{
+	local64_set(&p->v, val);
+}
+
 static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
 {
 	local64_add(val, &p->v);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:112 @ static inline u64 u64_stats_read(const u
 	return p->v;
 }
 
+static inline void u64_stats_set(u64_stats_t *p, u64 val)
+{
+	p->v = val;
+}
+
 static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
 {
 	p->v += val;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:128 @ static inline void u64_stats_inc(u64_sta
 }
 #endif
 
-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
 #define u64_stats_init(syncp)	seqcount_init(&(syncp)->seq)
 #else
 static inline void u64_stats_init(struct u64_stats_sync *syncp)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:138 @ static inline void u64_stats_init(struct
 
 static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
 {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
 	write_seqcount_begin(&syncp->seq);
 #endif
 }
 
 static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
 {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
 	write_seqcount_end(&syncp->seq);
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
 #endif
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:159 @ u64_stats_update_begin_irqsave(struct u6
 {
 	unsigned long flags = 0;
 
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-	local_irq_save(flags);
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
+	else
+		local_irq_save(flags);
 	write_seqcount_begin(&syncp->seq);
 #endif
 	return flags;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:173 @ static inline void
 u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
 				unsigned long flags)
 {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
 	write_seqcount_end(&syncp->seq);
-	local_irq_restore(flags);
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
+	else
+		local_irq_restore(flags);
 #endif
 }
 
 static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
 {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
 	return read_seqcount_begin(&syncp->seq);
 #else
 	return 0;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:193 @ static inline unsigned int __u64_stats_f
 
 static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
 {
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
 	preempt_disable();
 #endif
 	return __u64_stats_fetch_begin(syncp);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:202 @ static inline unsigned int u64_stats_fet
 static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
 					 unsigned int start)
 {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
 	return read_seqcount_retry(&syncp->seq, start);
 #else
 	return false;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:212 @ static inline bool __u64_stats_fetch_ret
 static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
 					 unsigned int start)
 {
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
 	preempt_enable();
 #endif
 	return __u64_stats_fetch_retry(syncp, start);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:226 @ static inline bool u64_stats_fetch_retry
  */
 static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
 {
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
+	preempt_disable();
+#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
 	local_irq_disable();
 #endif
 	return __u64_stats_fetch_begin(syncp);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:237 @ static inline unsigned int u64_stats_fet
 static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
 					     unsigned int start)
 {
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
+	preempt_enable();
+#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
 	local_irq_enable();
 #endif
 	return __u64_stats_fetch_retry(syncp, start);
Index: linux-5.15/include/net/act_api.h
===================================================================
--- linux-5.15.orig/include/net/act_api.h
+++ linux-5.15/include/net/act_api.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:33 @ struct tc_action {
 	atomic_t			tcfa_bindcnt;
 	int				tcfa_action;
 	struct tcf_t			tcfa_tm;
-	struct gnet_stats_basic_packed	tcfa_bstats;
-	struct gnet_stats_basic_packed	tcfa_bstats_hw;
+	struct gnet_stats_basic_sync	tcfa_bstats;
+	struct gnet_stats_basic_sync	tcfa_bstats_hw;
 	struct gnet_stats_queue		tcfa_qstats;
 	struct net_rate_estimator __rcu *tcfa_rate_est;
 	spinlock_t			tcfa_lock;
-	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
-	struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw;
+	struct gnet_stats_basic_sync __percpu *cpu_bstats;
+	struct gnet_stats_basic_sync __percpu *cpu_bstats_hw;
 	struct gnet_stats_queue __percpu *cpu_qstats;
 	struct tc_cookie	__rcu *act_cookie;
 	struct tcf_chain	__rcu *goto_chain;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:209 @ static inline void tcf_action_update_bst
 					    struct sk_buff *skb)
 {
 	if (likely(a->cpu_bstats)) {
-		bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
+		bstats_update(this_cpu_ptr(a->cpu_bstats), skb);
 		return;
 	}
 	spin_lock(&a->tcfa_lock);
Index: linux-5.15/include/net/gen_stats.h
===================================================================
--- linux-5.15.orig/include/net/gen_stats.h
+++ linux-5.15/include/net/gen_stats.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:10 @
 #include <linux/rtnetlink.h>
 #include <linux/pkt_sched.h>
 
-/* Note: this used to be in include/uapi/linux/gen_stats.h */
-struct gnet_stats_basic_packed {
-	__u64	bytes;
-	__u64	packets;
-};
-
-struct gnet_stats_basic_cpu {
-	struct gnet_stats_basic_packed bstats;
+/* Throughput stats.
+ * Must be initialized beforehand with gnet_stats_basic_sync_init().
+ *
+ * If no reads can ever occur parallel to writes (e.g. stack-allocated
+ * bstats), then the internal stat values can be written to and read
+ * from directly. Otherwise, use _bstats_set/update() for writes and
+ * gnet_stats_add_basic() for reads.
+ */
+struct gnet_stats_basic_sync {
+	u64_stats_t bytes;
+	u64_stats_t packets;
 	struct u64_stats_sync syncp;
 } __aligned(2 * sizeof(u64));
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:40 @ struct gnet_dump {
 	struct tc_stats   tc_stats;
 };
 
+void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b);
 int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
 			  struct gnet_dump *d, int padattr);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:49 @ int gnet_stats_start_copy_compat(struct
 				 spinlock_t *lock, struct gnet_dump *d,
 				 int padattr);
 
-int gnet_stats_copy_basic(const seqcount_t *running,
-			  struct gnet_dump *d,
-			  struct gnet_stats_basic_cpu __percpu *cpu,
-			  struct gnet_stats_basic_packed *b);
-void __gnet_stats_copy_basic(const seqcount_t *running,
-			     struct gnet_stats_basic_packed *bstats,
-			     struct gnet_stats_basic_cpu __percpu *cpu,
-			     struct gnet_stats_basic_packed *b);
-int gnet_stats_copy_basic_hw(const seqcount_t *running,
-			     struct gnet_dump *d,
-			     struct gnet_stats_basic_cpu __percpu *cpu,
-			     struct gnet_stats_basic_packed *b);
+int gnet_stats_copy_basic(struct gnet_dump *d,
+			  struct gnet_stats_basic_sync __percpu *cpu,
+			  struct gnet_stats_basic_sync *b, bool running);
+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+			  struct gnet_stats_basic_sync __percpu *cpu,
+			  struct gnet_stats_basic_sync *b, bool running);
+int gnet_stats_copy_basic_hw(struct gnet_dump *d,
+			     struct gnet_stats_basic_sync __percpu *cpu,
+			     struct gnet_stats_basic_sync *b, bool running);
 int gnet_stats_copy_rate_est(struct gnet_dump *d,
 			     struct net_rate_estimator __rcu **ptr);
 int gnet_stats_copy_queue(struct gnet_dump *d,
 			  struct gnet_stats_queue __percpu *cpu_q,
 			  struct gnet_stats_queue *q, __u32 qlen);
-void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
-			     const struct gnet_stats_queue __percpu *cpu_q,
-			     const struct gnet_stats_queue *q, __u32 qlen);
+void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
+			  const struct gnet_stats_queue __percpu *cpu_q,
+			  const struct gnet_stats_queue *q);
 int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
 
 int gnet_stats_finish_copy(struct gnet_dump *d);
 
-int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
-		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
+		      struct gnet_stats_basic_sync __percpu *cpu_bstats,
 		      struct net_rate_estimator __rcu **rate_est,
 		      spinlock_t *lock,
-		      seqcount_t *running, struct nlattr *opt);
+		      bool running, struct nlattr *opt);
 void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
-int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
-			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
+			  struct gnet_stats_basic_sync __percpu *cpu_bstats,
 			  struct net_rate_estimator __rcu **ptr,
 			  spinlock_t *lock,
-			  seqcount_t *running, struct nlattr *opt);
+			  bool running, struct nlattr *opt);
 bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
 bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
 			struct gnet_stats_rate_est64 *sample);
Index: linux-5.15/include/net/netfilter/xt_rateest.h
===================================================================
--- linux-5.15.orig/include/net/netfilter/xt_rateest.h
+++ linux-5.15/include/net/netfilter/xt_rateest.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:9 @
 
 struct xt_rateest {
 	/* keep lock and bstats on same cache line to speedup xt_rateest_tg() */
-	struct gnet_stats_basic_packed	bstats;
+	struct gnet_stats_basic_sync	bstats;
 	spinlock_t			lock;
 
 
Index: linux-5.15/include/net/pkt_cls.h
===================================================================
--- linux-5.15.orig/include/net/pkt_cls.h
+++ linux-5.15/include/net/pkt_cls.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:768 @ struct tc_cookie {
 };
 
 struct tc_qopt_offload_stats {
-	struct gnet_stats_basic_packed *bstats;
+	struct gnet_stats_basic_sync *bstats;
 	struct gnet_stats_queue *qstats;
 };
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:888 @ struct tc_gred_qopt_offload_params {
 };
 
 struct tc_gred_qopt_offload_stats {
-	struct gnet_stats_basic_packed bstats[MAX_DPs];
+	struct gnet_stats_basic_sync bstats[MAX_DPs];
 	struct gnet_stats_queue qstats[MAX_DPs];
 	struct red_stats *xstats[MAX_DPs];
 };
Index: linux-5.15/include/net/sch_generic.h
===================================================================
--- linux-5.15.orig/include/net/sch_generic.h
+++ linux-5.15/include/net/sch_generic.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:43 @ enum qdisc_state_t {
 	__QDISC_STATE_DRAINING,
 };
 
+enum qdisc_state2_t {
+	/* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
+	 * Use qdisc_run_begin/end() or qdisc_is_running() instead.
+	 */
+	__QDISC_STATE2_RUNNING,
+};
+
 #define QDISC_STATE_MISSED	BIT(__QDISC_STATE_MISSED)
 #define QDISC_STATE_DRAINING	BIT(__QDISC_STATE_DRAINING)
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:107 @ struct Qdisc {
 	struct netdev_queue	*dev_queue;
 
 	struct net_rate_estimator __rcu *rate_est;
-	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+	struct gnet_stats_basic_sync __percpu *cpu_bstats;
 	struct gnet_stats_queue	__percpu *cpu_qstats;
 	int			pad;
 	refcount_t		refcnt;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:117 @ struct Qdisc {
 	 */
 	struct sk_buff_head	gso_skb ____cacheline_aligned_in_smp;
 	struct qdisc_skb_head	q;
-	struct gnet_stats_basic_packed bstats;
-	seqcount_t		running;
+	struct gnet_stats_basic_sync bstats;
 	struct gnet_stats_queue	qstats;
 	unsigned long		state;
+	unsigned long		state2; /* must be written under qdisc spinlock */
 	struct Qdisc            *next_sched;
 	struct sk_buff_head	skb_bad_txq;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:153 @ static inline struct Qdisc *qdisc_refcou
 	return NULL;
 }
 
+/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc
+ * root_lock section, or provide their own memory barriers -- ordering
+ * against qdisc_run_begin/end() atomic bit operations.
+ */
 static inline bool qdisc_is_running(struct Qdisc *qdisc)
 {
 	if (qdisc->flags & TCQ_F_NOLOCK)
 		return spin_is_locked(&qdisc->seqlock);
-	return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
+	return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
 }
 
 static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:181 @ static inline bool qdisc_is_empty(const
 	return !READ_ONCE(qdisc->q.qlen);
 }
 
+/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with
+ * the qdisc root lock acquired.
+ */
 static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 {
 	if (qdisc->flags & TCQ_F_NOLOCK) {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:203 @ static inline bool qdisc_run_begin(struc
 		 * when testing it in qdisc_run_end()
 		 */
 		return spin_trylock(&qdisc->seqlock);
-	} else if (qdisc_is_running(qdisc)) {
-		return false;
 	}
-	/* Variant of write_seqcount_begin() telling lockdep a trylock
-	 * was attempted.
-	 */
-	raw_write_seqcount_begin(&qdisc->running);
-	seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
-	return true;
+	return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
 }
 
 static inline void qdisc_run_end(struct Qdisc *qdisc)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:222 @ static inline void qdisc_run_end(struct
 				      &qdisc->state)))
 			__netif_schedule(qdisc);
 	} else {
-		write_seqcount_end(&qdisc->running);
+		__clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
 	}
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:586 @ static inline spinlock_t *qdisc_root_sle
 	return qdisc_lock(root);
 }
 
-static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
-{
-	struct Qdisc *root = qdisc_root_sleeping(qdisc);
-
-	ASSERT_RTNL();
-	return &root->running;
-}
-
 static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
 {
 	return qdisc->dev_queue->dev;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:835 @ static inline int qdisc_enqueue(struct s
 	return sch->enqueue(skb, sch, to_free);
 }
 
-static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
+static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
 				  __u64 bytes, __u32 packets)
 {
-	bstats->bytes += bytes;
-	bstats->packets += packets;
+	u64_stats_update_begin(&bstats->syncp);
+	u64_stats_add(&bstats->bytes, bytes);
+	u64_stats_add(&bstats->packets, packets);
+	u64_stats_update_end(&bstats->syncp);
 }
 
-static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
+static inline void bstats_update(struct gnet_stats_basic_sync *bstats,
 				 const struct sk_buff *skb)
 {
 	_bstats_update(bstats,
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:852 @ static inline void bstats_update(struct
 		       skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1);
 }
 
-static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
-				      __u64 bytes, __u32 packets)
-{
-	u64_stats_update_begin(&bstats->syncp);
-	_bstats_update(&bstats->bstats, bytes, packets);
-	u64_stats_update_end(&bstats->syncp);
-}
-
-static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
-				     const struct sk_buff *skb)
-{
-	u64_stats_update_begin(&bstats->syncp);
-	bstats_update(&bstats->bstats, skb);
-	u64_stats_update_end(&bstats->syncp);
-}
-
 static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
 					   const struct sk_buff *skb)
 {
-	bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb);
+	bstats_update(this_cpu_ptr(sch->cpu_bstats), skb);
 }
 
 static inline void qdisc_bstats_update(struct Qdisc *sch,
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:944 @ static inline void qdisc_qstats_qlen_bac
 					     __u32 *backlog)
 {
 	struct gnet_stats_queue qstats = { 0 };
-	__u32 len = qdisc_qlen_sum(sch);
 
-	__gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len);
-	*qlen = qstats.qlen;
+	gnet_stats_add_queue(&qstats, sch->cpu_qstats, &sch->qstats);
+	*qlen = qstats.qlen + qdisc_qlen(sch);
 	*backlog = qstats.backlog;
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1291 @ void psched_ppscfg_precompute(struct psc
 struct mini_Qdisc {
 	struct tcf_proto *filter_list;
 	struct tcf_block *block;
-	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+	struct gnet_stats_basic_sync __percpu *cpu_bstats;
 	struct gnet_stats_queue	__percpu *cpu_qstats;
 	struct rcu_head rcu;
 };
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1299 @ struct mini_Qdisc {
 static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
 						const struct sk_buff *skb)
 {
-	bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb);
+	bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb);
 }
 
 static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)
Index: linux-5.15/init/Kconfig
===================================================================
--- linux-5.15.orig/init/Kconfig
+++ linux-5.15/init/Kconfig
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:922 @ config NUMA_BALANCING
 	bool "Memory placement aware NUMA scheduler"
 	depends on ARCH_SUPPORTS_NUMA_BALANCING
 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
-	depends on SMP && NUMA && MIGRATION
+	depends on SMP && NUMA && MIGRATION && !PREEMPT_RT
 	help
 	  This option adds support for automatic NUMA aware memory/task placement.
 	  The mechanism is quite primitive and is based on migrating memory when
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1917 @ choice
 
 config SLAB
 	bool "SLAB"
+	depends on !PREEMPT_RT
 	select HAVE_HARDENED_USERCOPY_ALLOCATOR
 	help
 	  The regular slab allocator that is established and known to work
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1938 @ config SLUB
 config SLOB
 	depends on EXPERT
 	bool "SLOB (Simple Allocator)"
+	depends on !PREEMPT_RT
 	help
 	   SLOB replaces the stock allocator with a drastically simpler
 	   allocator. SLOB is generally more space efficient but
Index: linux-5.15/init/main.c
===================================================================
--- linux-5.15.orig/init/main.c
+++ linux-5.15/init/main.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1609 @ static noinline void __init kernel_init_
 
 	rcu_init_tasks_generic();
 	do_pre_smp_initcalls();
+	rcu_tasks_initiate_self_tests();
 	lockup_detector_init();
 
 	smp_init();
Index: linux-5.15/kernel/Kconfig.preempt
===================================================================
--- linux-5.15.orig/kernel/Kconfig.preempt
+++ linux-5.15/kernel/Kconfig.preempt
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4 @
 # SPDX-License-Identifier: GPL-2.0-only
 
+config HAVE_PREEMPT_LAZY
+	bool
+
+config PREEMPT_LAZY
+	def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT
+
 choice
 	prompt "Preemption Model"
 	default PREEMPT_NONE
Index: linux-5.15/kernel/cgroup/rstat.c
===================================================================
--- linux-5.15.orig/kernel/cgroup/rstat.c
+++ linux-5.15/kernel/cgroup/rstat.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:159 @ static void cgroup_rstat_flush_locked(st
 		raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
 						       cpu);
 		struct cgroup *pos = NULL;
+		unsigned long flags;
 
-		raw_spin_lock(cpu_lock);
+		raw_spin_lock_irqsave(cpu_lock, flags);
 		while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
 			struct cgroup_subsys_state *css;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:173 @ static void cgroup_rstat_flush_locked(st
 				css->ss->css_rstat_flush(css, cpu);
 			rcu_read_unlock();
 		}
-		raw_spin_unlock(cpu_lock);
+		raw_spin_unlock_irqrestore(cpu_lock, flags);
 
 		/* if @may_sleep, play nice and yield if necessary */
 		if (may_sleep && (need_resched() ||
Index: linux-5.15/kernel/debug/debug_core.c
===================================================================
--- linux-5.15.orig/kernel/debug/debug_core.c
+++ linux-5.15/kernel/debug/debug_core.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:242 @ NOKPROBE_SYMBOL(kgdb_call_nmi_hook);
 static DEFINE_PER_CPU(call_single_data_t, kgdb_roundup_csd) =
 	CSD_INIT(kgdb_call_nmi_hook, NULL);
 
-void __weak kgdb_roundup_cpus(void)
+void __weak kgdb_roundup_cpu(unsigned int cpu)
 {
 	call_single_data_t *csd;
+	int ret;
+
+	csd = &per_cpu(kgdb_roundup_csd, cpu);
+
+	/*
+	 * If it didn't round up last time, don't try again
+	 * since smp_call_function_single_async() will block.
+	 *
+	 * If rounding_up is false then we know that the
+	 * previous call must have at least started and that
+	 * means smp_call_function_single_async() won't block.
+	 */
+	if (kgdb_info[cpu].rounding_up)
+		return;
+	kgdb_info[cpu].rounding_up = true;
+
+	ret = smp_call_function_single_async(cpu, csd);
+	if (ret)
+		kgdb_info[cpu].rounding_up = false;
+}
+NOKPROBE_SYMBOL(kgdb_roundup_cpu);
+
+void __weak kgdb_roundup_cpus(void)
+{
 	int this_cpu = raw_smp_processor_id();
 	int cpu;
-	int ret;
 
 	for_each_online_cpu(cpu) {
 		/* No need to roundup ourselves */
 		if (cpu == this_cpu)
 			continue;
 
-		csd = &per_cpu(kgdb_roundup_csd, cpu);
-
-		/*
-		 * If it didn't round up last time, don't try again
-		 * since smp_call_function_single_async() will block.
-		 *
-		 * If rounding_up is false then we know that the
-		 * previous call must have at least started and that
-		 * means smp_call_function_single_async() won't block.
-		 */
-		if (kgdb_info[cpu].rounding_up)
-			continue;
-		kgdb_info[cpu].rounding_up = true;
-
-		ret = smp_call_function_single_async(cpu, csd);
-		if (ret)
-			kgdb_info[cpu].rounding_up = false;
+		kgdb_roundup_cpu(cpu);
 	}
 }
 NOKPROBE_SYMBOL(kgdb_roundup_cpus);
Index: linux-5.15/kernel/debug/kdb/kdb_io.c
===================================================================
--- linux-5.15.orig/kernel/debug/kdb/kdb_io.c
+++ linux-5.15/kernel/debug/kdb/kdb_io.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:562 @ static void kdb_msg_write(const char *ms
 		cp++;
 	}
 
+	/* mirror output on atomic consoles */
 	for_each_console(c) {
 		if (!(c->flags & CON_ENABLED))
 			continue;
 		if (c == dbg_io_ops->cons)
 			continue;
-		/*
-		 * Set oops_in_progress to encourage the console drivers to
-		 * disregard their internal spin locks: in the current calling
-		 * context the risk of deadlock is a bigger problem than risks
-		 * due to re-entering the console driver. We operate directly on
-		 * oops_in_progress rather than using bust_spinlocks() because
-		 * the calls bust_spinlocks() makes on exit are not appropriate
-		 * for this calling context.
-		 */
-		++oops_in_progress;
-		c->write(c, msg, msg_len);
-		--oops_in_progress;
+
+		if (!c->write_atomic)
+			continue;
+		c->write_atomic(c, msg, msg_len);
+
 		touch_nmi_watchdog();
 	}
 }
Index: linux-5.15/kernel/entry/common.c
===================================================================
--- linux-5.15.orig/kernel/entry/common.c
+++ linux-5.15/kernel/entry/common.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:162 @ static unsigned long exit_to_user_mode_l
 
 		local_irq_enable_exit_to_user(ti_work);
 
-		if (ti_work & _TIF_NEED_RESCHED)
+		if (ti_work & _TIF_NEED_RESCHED_MASK)
 			schedule();
 
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+		if (unlikely(current->forced_info.si_signo)) {
+			struct task_struct *t = current;
+			force_sig_info(&t->forced_info);
+			t->forced_info.si_signo = 0;
+		}
+#endif
+
 		if (ti_work & _TIF_UPROBE)
 			uprobe_notify_resume(regs);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:398 @ void irqentry_exit_cond_resched(void)
 		rcu_irq_exit_check_preempt();
 		if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
 			WARN_ON_ONCE(!on_thread_stack());
-		if (need_resched())
+		if (should_resched(0))
 			preempt_schedule_irq();
 	}
 }
Index: linux-5.15/kernel/exit.c
===================================================================
--- linux-5.15.orig/kernel/exit.c
+++ linux-5.15/kernel/exit.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:67 @
 #include <linux/rcuwait.h>
 #include <linux/compat.h>
 #include <linux/io_uring.h>
+#include <linux/kprobes.h>
 #include <linux/sysfs.h>
 
 #include <linux/uaccess.h>
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:219 @ static void delayed_put_task_struct(stru
 {
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 
+	kprobe_flush_task(tsk);
 	perf_event_delayed_put(tsk);
 	trace_sched_process_free(tsk);
+
+	/* RT enabled kernels delay freeing the VMAP'ed task stack */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		put_task_stack(tsk);
+
 	put_task_struct(tsk);
 }
 
Index: linux-5.15/kernel/fork.c
===================================================================
--- linux-5.15.orig/kernel/fork.c
+++ linux-5.15/kernel/fork.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:297 @ static inline void free_thread_stack(str
 			return;
 		}
 
-		vfree_atomic(tsk->stack);
+		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+			vfree_atomic(tsk->stack);
+		else
+			vfree(tsk->stack);
 		return;
 	}
 #endif
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:719 @ void __mmdrop(struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(__mmdrop);
 
+#ifdef CONFIG_PREEMPT_RT
+/*
+ * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is
+ * by far the least expensive way to do that.
+ */
+void __mmdrop_delayed(struct rcu_head *rhp)
+{
+	struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
+
+	__mmdrop(mm);
+}
+#endif
+
 static void mmdrop_async_fn(struct work_struct *work)
 {
 	struct mm_struct *mm;
Index: linux-5.15/kernel/irq/irqdesc.c
===================================================================
--- linux-5.15.orig/kernel/irq/irqdesc.c
+++ linux-5.15/kernel/irq/irqdesc.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:667 @ int generic_handle_irq(unsigned int irq)
 }
 EXPORT_SYMBOL_GPL(generic_handle_irq);
 
+/**
+ * generic_handle_irq_safe - Invoke the handler for a particular irq from any
+ *			     context.
+ * @irq:	The irq number to handle
+ *
+ * Returns:	0 on success, a negative value on error.
+ *
+ * This function can be called from any context (IRQ or process context). It
+ * will report an error if not invoked from IRQ context and the irq has been
+ * marked to enforce IRQ-context only.
+ */
+int generic_handle_irq_safe(unsigned int irq)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = handle_irq_desc(irq_to_desc(irq));
+	local_irq_restore(flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(generic_handle_irq_safe);
+
 #ifdef CONFIG_IRQ_DOMAIN
 /**
  * generic_handle_domain_irq - Invoke the handler for a HW irq belonging
Index: linux-5.15/kernel/irq/manage.c
===================================================================
--- linux-5.15.orig/kernel/irq/manage.c
+++ linux-5.15/kernel/irq/manage.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1304 @ static int irq_thread(void *data)
 
 	irq_thread_set_ready(desc, action);
 
+	sched_set_fifo(current);
+
 	if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD,
 					   &action->thread_flags))
 		handler_fn = irq_forced_thread_fn;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1471 @ setup_irq_thread(struct irqaction *new,
 	if (IS_ERR(t))
 		return PTR_ERR(t);
 
-	sched_set_fifo(t);
-
 	/*
 	 * We keep the reference to the task struct even if
 	 * the thread dies to avoid that the interrupt code
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2864 @ EXPORT_SYMBOL_GPL(irq_get_irqchip_state)
  *	This call sets the internal irqchip state of an interrupt,
  *	depending on the value of @which.
  *
- *	This function should be called with preemption disabled if the
+ *	This function should be called with migration disabled if the
  *	interrupt controller has per-cpu registers.
  */
 int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
Index: linux-5.15/kernel/irq/spurious.c
===================================================================
--- linux-5.15.orig/kernel/irq/spurious.c
+++ linux-5.15/kernel/irq/spurious.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:450 @ MODULE_PARM_DESC(noirqdebug, "Disable ir
 
 static int __init irqfixup_setup(char *str)
 {
+	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+		pr_warn("irqfixup boot option not supported with PREEMPT_RT\n");
+		return 1;
+	}
 	irqfixup = 1;
 	printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
 	printk(KERN_WARNING "This may impact system performance.\n");
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:466 @ module_param(irqfixup, int, 0644);
 
 static int __init irqpoll_setup(char *str)
 {
+	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+		pr_warn("irqpoll boot option not supported with PREEMPT_RT\n");
+		return 1;
+	}
 	irqfixup = 2;
 	printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
 				"enabled\n");
Index: linux-5.15/kernel/irq_work.c
===================================================================
--- linux-5.15.orig/kernel/irq_work.c
+++ linux-5.15/kernel/irq_work.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:21 @
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
+#include <linux/smpboot.h>
 #include <asm/processor.h>
 #include <linux/kasan.h>
 
 static DEFINE_PER_CPU(struct llist_head, raised_list);
 static DEFINE_PER_CPU(struct llist_head, lazy_list);
+static DEFINE_PER_CPU(struct task_struct *, irq_workd);
+
+static void wake_irq_workd(void)
+{
+	struct task_struct *tsk = __this_cpu_read(irq_workd);
+
+	if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk)
+		wake_up_process(tsk);
+}
+
+#ifdef CONFIG_SMP
+static void irq_work_wake(struct irq_work *entry)
+{
+	wake_irq_workd();
+}
+
+static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) =
+	IRQ_WORK_INIT_HARD(irq_work_wake);
+#endif
+
+static int irq_workd_should_run(unsigned int cpu)
+{
+	return !llist_empty(this_cpu_ptr(&lazy_list));
+}
 
 /*
  * Claim the entry so that no one else will poke at it.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:80 @ void __weak arch_irq_work_raise(void)
 /* Enqueue on current CPU, work must already be claimed and preempt disabled */
 static void __irq_work_queue_local(struct irq_work *work)
 {
+	struct llist_head *list;
+	bool rt_lazy_work = false;
+	bool lazy_work = false;
+	int work_flags;
+
+	work_flags = atomic_read(&work->node.a_flags);
+	if (work_flags & IRQ_WORK_LAZY)
+		lazy_work = true;
+	else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
+		 !(work_flags & IRQ_WORK_HARD_IRQ))
+		rt_lazy_work = true;
+
+	if (lazy_work || rt_lazy_work)
+		list = this_cpu_ptr(&lazy_list);
+	else
+		list = this_cpu_ptr(&raised_list);
+
+	if (!llist_add(&work->node.llist, list))
+		return;
+
 	/* If the work is "lazy", handle it from next tick if any */
-	if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) {
-		if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) &&
-		    tick_nohz_tick_stopped())
-			arch_irq_work_raise();
-	} else {
-		if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list)))
-			arch_irq_work_raise();
-	}
+	if (!lazy_work || tick_nohz_tick_stopped())
+		arch_irq_work_raise();
 }
 
 /* Enqueue the irq work @work on the current CPU */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:146 @ bool irq_work_queue_on(struct irq_work *
 	if (cpu != smp_processor_id()) {
 		/* Arch remote IPI send/receive backend aren't NMI safe */
 		WARN_ON_ONCE(in_nmi());
+
+		/*
+		 * On PREEMPT_RT the items which are not marked as
+		 * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work
+		 * item is used on the remote CPU to wake the thread.
+		 */
+		if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
+		    !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
+
+			if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
+				goto out;
+
+			work = &per_cpu(irq_work_wakeup, cpu);
+			if (!irq_work_claim(work))
+				goto out;
+		}
+
 		__smp_call_single_queue(cpu, &work->node.llist);
 	} else {
 		__irq_work_queue_local(work);
 	}
+out:
 	preempt_enable();
 
 	return true;
 #endif /* CONFIG_SMP */
 }
 
-
 bool irq_work_needs_cpu(void)
 {
 	struct llist_head *raised, *lazy;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:219 @ void irq_work_single(void *arg)
 	 * else claimed it meanwhile.
 	 */
 	(void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
+
+	if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
+	    !arch_irq_work_has_interrupt())
+		rcuwait_wake_up(&work->irqwait);
 }
 
 static void irq_work_run_list(struct llist_head *list)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:230 @ static void irq_work_run_list(struct lli
 	struct irq_work *work, *tmp;
 	struct llist_node *llnode;
 
-	BUG_ON(!irqs_disabled());
+	/*
+	 * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed
+	 * in a per-CPU thread in preemptible context. Only the items which are
+	 * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context.
+	 */
+	BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT));
 
 	if (llist_empty(list))
 		return;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:252 @ static void irq_work_run_list(struct lli
 void irq_work_run(void)
 {
 	irq_work_run_list(this_cpu_ptr(&raised_list));
-	irq_work_run_list(this_cpu_ptr(&lazy_list));
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		irq_work_run_list(this_cpu_ptr(&lazy_list));
+	else
+		wake_irq_workd();
 }
 EXPORT_SYMBOL_GPL(irq_work_run);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:265 @ void irq_work_tick(void)
 
 	if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
 		irq_work_run_list(raised);
-	irq_work_run_list(this_cpu_ptr(&lazy_list));
+
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		irq_work_run_list(this_cpu_ptr(&lazy_list));
+	else
+		wake_irq_workd();
 }
 
 /*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:279 @ void irq_work_tick(void)
 void irq_work_sync(struct irq_work *work)
 {
 	lockdep_assert_irqs_enabled();
+	might_sleep();
+
+	if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
+	    !arch_irq_work_has_interrupt()) {
+		rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work),
+				   TASK_UNINTERRUPTIBLE);
+		return;
+	}
 
 	while (irq_work_is_busy(work))
 		cpu_relax();
 }
 EXPORT_SYMBOL_GPL(irq_work_sync);
+
+static void run_irq_workd(unsigned int cpu)
+{
+	irq_work_run_list(this_cpu_ptr(&lazy_list));
+}
+
+static void irq_workd_setup(unsigned int cpu)
+{
+	sched_set_fifo_low(current);
+}
+
+static struct smp_hotplug_thread irqwork_threads = {
+	.store                  = &irq_workd,
+	.setup			= irq_workd_setup,
+	.thread_should_run      = irq_workd_should_run,
+	.thread_fn              = run_irq_workd,
+	.thread_comm            = "irq_work/%u",
+};
+
+static __init int irq_work_init_threads(void)
+{
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		BUG_ON(smpboot_register_percpu_thread(&irqwork_threads));
+	return 0;
+}
+early_initcall(irq_work_init_threads);
Index: linux-5.15/kernel/kcov.c
===================================================================
--- linux-5.15.orig/kernel/kcov.c
+++ linux-5.15/kernel/kcov.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:91 @ static struct list_head kcov_remote_area
 
 struct kcov_percpu_data {
 	void			*irq_area;
+	local_lock_t		lock;
 
 	unsigned int		saved_mode;
 	unsigned int		saved_size;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:100 @ struct kcov_percpu_data {
 	int			saved_sequence;
 };
 
-static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data);
+static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data) = {
+	.lock = INIT_LOCAL_LOCK(lock),
+};
 
 /* Must be called with kcov_remote_lock locked. */
 static struct kcov_remote *kcov_remote_find(u64 handle)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:830 @ void kcov_remote_start(u64 handle)
 	if (!in_task() && !in_serving_softirq())
 		return;
 
-	local_irq_save(flags);
+	local_lock_irqsave(&kcov_percpu_data.lock, flags);
 
 	/*
 	 * Check that kcov_remote_start() is not called twice in background
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:838 @ void kcov_remote_start(u64 handle)
 	 */
 	mode = READ_ONCE(t->kcov_mode);
 	if (WARN_ON(in_task() && kcov_mode_enabled(mode))) {
-		local_irq_restore(flags);
+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
 		return;
 	}
 	/*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:847 @ void kcov_remote_start(u64 handle)
 	 * happened while collecting coverage from a background thread.
 	 */
 	if (WARN_ON(in_serving_softirq() && t->kcov_softirq)) {
-		local_irq_restore(flags);
+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
 		return;
 	}
 
 	spin_lock(&kcov_remote_lock);
 	remote = kcov_remote_find(handle);
 	if (!remote) {
-		spin_unlock_irqrestore(&kcov_remote_lock, flags);
+		spin_unlock(&kcov_remote_lock);
+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
 		return;
 	}
 	kcov_debug("handle = %llx, context: %s\n", handle,
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:876 @ void kcov_remote_start(u64 handle)
 		size = CONFIG_KCOV_IRQ_AREA_SIZE;
 		area = this_cpu_ptr(&kcov_percpu_data)->irq_area;
 	}
-	spin_unlock_irqrestore(&kcov_remote_lock, flags);
+	spin_unlock(&kcov_remote_lock);
 
 	/* Can only happen when in_task(). */
 	if (!area) {
+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
 		area = vmalloc(size * sizeof(unsigned long));
 		if (!area) {
 			kcov_put(kcov);
 			return;
 		}
+		local_lock_irqsave(&kcov_percpu_data.lock, flags);
 	}
 
-	local_irq_save(flags);
-
 	/* Reset coverage size. */
 	*(u64 *)area = 0;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:898 @ void kcov_remote_start(u64 handle)
 	}
 	kcov_start(t, kcov, size, area, mode, sequence);
 
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
 
 }
 EXPORT_SYMBOL(kcov_remote_start);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:972 @ void kcov_remote_stop(void)
 	if (!in_task() && !in_serving_softirq())
 		return;
 
-	local_irq_save(flags);
+	local_lock_irqsave(&kcov_percpu_data.lock, flags);
 
 	mode = READ_ONCE(t->kcov_mode);
 	barrier();
 	if (!kcov_mode_enabled(mode)) {
-		local_irq_restore(flags);
+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
 		return;
 	}
 	/*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:985 @ void kcov_remote_stop(void)
 	 * actually found the remote handle and started collecting coverage.
 	 */
 	if (in_serving_softirq() && !t->kcov_softirq) {
-		local_irq_restore(flags);
+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
 		return;
 	}
 	/* Make sure that kcov_softirq is only set when in softirq. */
 	if (WARN_ON(!in_serving_softirq() && t->kcov_softirq)) {
-		local_irq_restore(flags);
+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
 		return;
 	}
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1020 @ void kcov_remote_stop(void)
 		spin_unlock(&kcov_remote_lock);
 	}
 
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
 
 	/* Get in kcov_remote_start(). */
 	kcov_put(kcov);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1041 @ static int __init kcov_init(void)
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		void *area = vmalloc(CONFIG_KCOV_IRQ_AREA_SIZE *
-				sizeof(unsigned long));
+		void *area = vmalloc_node(CONFIG_KCOV_IRQ_AREA_SIZE *
+				sizeof(unsigned long), cpu_to_node(cpu));
 		if (!area)
 			return -ENOMEM;
 		per_cpu_ptr(&kcov_percpu_data, cpu)->irq_area = area;
Index: linux-5.15/kernel/kprobes.c
===================================================================
--- linux-5.15.orig/kernel/kprobes.c
+++ linux-5.15/kernel/kprobes.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1251 @ void kprobe_busy_end(void)
 }
 
 /*
- * This function is called from finish_task_switch when task tk becomes dead,
- * so that we can recycle any function-return probe instances associated
- * with this task. These left over instances represent probed functions
- * that have been called but will never return.
+ * This function is called from delayed_put_task_struct() when a task is
+ * dead and cleaned up to recycle any function-return probe instances
+ * associated with this task. These left over instances represent probed
+ * functions that have been called but will never return.
  */
 void kprobe_flush_task(struct task_struct *tk)
 {
Index: linux-5.15/kernel/ksysfs.c
===================================================================
--- linux-5.15.orig/kernel/ksysfs.c
+++ linux-5.15/kernel/ksysfs.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:141 @ KERNEL_ATTR_RO(vmcoreinfo);
 
 #endif /* CONFIG_CRASH_CORE */
 
+#if defined(CONFIG_PREEMPT_RT)
+static ssize_t realtime_show(struct kobject *kobj,
+			     struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", 1);
+}
+KERNEL_ATTR_RO(realtime);
+#endif
+
 /* whether file capabilities are enabled */
 static ssize_t fscaps_show(struct kobject *kobj,
 				  struct kobj_attribute *attr, char *buf)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:241 @ static struct attribute * kernel_attrs[]
 	&rcu_expedited_attr.attr,
 	&rcu_normal_attr.attr,
 #endif
+#ifdef CONFIG_PREEMPT_RT
+	&realtime_attr.attr,
+#endif
 	NULL
 };
 
Index: linux-5.15/kernel/kthread.c
===================================================================
--- linux-5.15.orig/kernel/kthread.c
+++ linux-5.15/kernel/kthread.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:273 @ EXPORT_SYMBOL_GPL(kthread_parkme);
 
 static int kthread(void *_create)
 {
+	static const struct sched_param param = { .sched_priority = 0 };
 	/* Copy data: it's on kthread's stack */
 	struct kthread_create_info *create = _create;
 	int (*threadfn)(void *data) = create->threadfn;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:304 @ static int kthread(void *_create)
 	init_completion(&self->parked);
 	current->vfork_done = &self->exited;
 
+	/*
+	 * The new thread inherited kthreadd's priority and CPU mask. Reset
+	 * back to default in case they have been changed.
+	 */
+	sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
+	set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD));
+
 	/* OK, tell user we're spawned, wait for stop or wakeup */
 	__set_current_state(TASK_UNINTERRUPTIBLE);
 	create->result = current;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:420 @ struct task_struct *__kthread_create_on_
 ready:
 	task = create->result;
 	if (!IS_ERR(task)) {
-		static const struct sched_param param = { .sched_priority = 0 };
 		char name[TASK_COMM_LEN];
 
 		/*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:428 @ ready:
 		 */
 		vsnprintf(name, sizeof(name), namefmt, args);
 		set_task_comm(task, name);
-		/*
-		 * root may have changed our (kthreadd's) priority or CPU mask.
-		 * The kernel thread should not inherit these properties.
-		 */
-		sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
-		set_cpus_allowed_ptr(task,
-				     housekeeping_cpumask(HK_FLAG_KTHREAD));
 	}
 	kfree(create);
 	return task;
Index: linux-5.15/kernel/locking/lockdep.c
===================================================================
--- linux-5.15.orig/kernel/locking/lockdep.c
+++ linux-5.15/kernel/locking/lockdep.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5475 @ static noinstr void check_flags(unsigned
 		}
 	}
 
+#ifndef CONFIG_PREEMPT_RT
 	/*
 	 * We dont accurately track softirq state in e.g.
 	 * hardirq contexts (such as on 4KSTACKS), so only
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5490 @ static noinstr void check_flags(unsigned
 			DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
 		}
 	}
+#endif
 
 	if (!debug_locks)
 		print_irqtrace_events(current);
Index: linux-5.15/kernel/locking/rtmutex.c
===================================================================
--- linux-5.15.orig/kernel/locking/rtmutex.c
+++ linux-5.15/kernel/locking/rtmutex.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1101 @ static int __sched task_blocks_on_rt_mut
 	 * which is wrong, as the other waiter is not in a deadlock
 	 * situation.
 	 */
-	if (owner == task)
+	if (owner == task) {
+#if defined(DEBUG_WW_MUTEXES) && defined(CONFIG_DEBUG_LOCKING_API_SELFTESTS)
+		/*
+		 * The lockdep selftest for ww-mutex assumes in a few cases
+		 * the ww_ctx->contending_lock assignment via
+		 * __ww_mutex_check_kill() which does not happen if the rtmutex
+		 * detects the deadlock early.
+		 */
+		if (build_ww_mutex() && ww_ctx) {
+			struct rt_mutex *rtm;
+
+			/* Check whether the waiter should backout immediately */
+			rtm = container_of(lock, struct rt_mutex, rtmutex);
+
+			__ww_mutex_add_waiter(waiter, rtm, ww_ctx);
+			__ww_mutex_check_kill(rtm, waiter, ww_ctx);
+		}
+#endif
 		return -EDEADLK;
+	}
 
 	raw_spin_lock(&task->pi_lock);
 	waiter->task = task;
Index: linux-5.15/kernel/locking/rtmutex_api.c
===================================================================
--- linux-5.15.orig/kernel/locking/rtmutex_api.c
+++ linux-5.15/kernel/locking/rtmutex_api.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:24 @ int max_lock_depth = 1024;
  */
 static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock,
 						  unsigned int state,
+						  struct lockdep_map *nest_lock,
 						  unsigned int subclass)
 {
 	int ret;
 
 	might_sleep();
-	mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, _RET_IP_);
 	ret = __rt_mutex_lock(&lock->rtmutex, state);
 	if (ret)
 		mutex_release(&lock->dep_map, _RET_IP_);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:52 @ EXPORT_SYMBOL(rt_mutex_base_init);
  */
 void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)
 {
-	__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
+	__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
 
+void __sched _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock)
+{
+	__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0);
+}
+EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock);
+
 #else /* !CONFIG_DEBUG_LOCK_ALLOC */
 
 /**
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:71 @ EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
  */
 void __sched rt_mutex_lock(struct rt_mutex *lock)
 {
-	__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
+	__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock);
 #endif
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:87 @ EXPORT_SYMBOL_GPL(rt_mutex_lock);
  */
 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
 {
-	return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
+	return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, NULL, 0);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
 
 /**
+ * rt_mutex_lock_killable - lock a rt_mutex killable
+ *
+ * @lock:		the rt_mutex to be locked
+ *
+ * Returns:
+ *  0		on success
+ * -EINTR	when interrupted by a signal
+ */
+int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
+{
+	return __rt_mutex_lock_common(lock, TASK_KILLABLE, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
+
+/**
  * rt_mutex_trylock - try to lock a rt_mutex
  *
  * @lock:	the rt_mutex to be locked
Index: linux-5.15/kernel/locking/spinlock_rt.c
===================================================================
--- linux-5.15.orig/kernel/locking/spinlock_rt.c
+++ linux-5.15/kernel/locking/spinlock_rt.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:27 @
 #define RT_MUTEX_BUILD_SPINLOCKS
 #include "rtmutex.c"
 
+/*
+ * __might_resched() skips the state check as rtlocks are state
+ * preserving. Take RCU nesting into account as spin/read/write_lock() can
+ * legitimately nest into an RCU read side critical section.
+ */
+#define RTLOCK_RESCHED_OFFSETS						\
+	(rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT)
+
+#define rtlock_might_resched()						\
+	__might_resched(__FILE__, __LINE__, RTLOCK_RESCHED_OFFSETS)
+
 static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
 {
 	if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:46 @ static __always_inline void rtlock_lock(
 
 static __always_inline void __rt_spin_lock(spinlock_t *lock)
 {
-	___might_sleep(__FILE__, __LINE__, 0);
+	rtlock_might_resched();
 	rtlock_lock(&lock->lock);
 	rcu_read_lock();
 	migrate_disable();
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:224 @ EXPORT_SYMBOL(rt_write_trylock);
 
 void __sched rt_read_lock(rwlock_t *rwlock)
 {
-	___might_sleep(__FILE__, __LINE__, 0);
+	rtlock_might_resched();
 	rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
 	rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
 	rcu_read_lock();
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:234 @ EXPORT_SYMBOL(rt_read_lock);
 
 void __sched rt_write_lock(rwlock_t *rwlock)
 {
-	___might_sleep(__FILE__, __LINE__, 0);
+	rtlock_might_resched();
 	rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
 	rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
 	rcu_read_lock();
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:260 @ void __sched rt_write_unlock(rwlock_t *r
 }
 EXPORT_SYMBOL(rt_write_unlock);
 
-int __sched rt_rwlock_is_contended(rwlock_t *rwlock)
-{
-	return rw_base_is_contended(&rwlock->rwbase);
-}
-EXPORT_SYMBOL(rt_rwlock_is_contended);
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 void __rt_rwlock_init(rwlock_t *rwlock, const char *name,
 		      struct lock_class_key *key)
Index: linux-5.15/kernel/panic.c
===================================================================
--- linux-5.15.orig/kernel/panic.c
+++ linux-5.15/kernel/panic.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:248 @ void check_panic_on_warn(const char *ori
 void panic(const char *fmt, ...)
 {
 	static char buf[1024];
+	va_list args2;
 	va_list args;
 	long i, i_next = 0, len;
 	int state = 0;
 	int old_cpu, this_cpu;
 	bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
 
+	console_verbose();
+	pr_emerg("Kernel panic - not syncing:\n");
+	va_start(args2, fmt);
+	va_copy(args, args2);
+	vprintk(fmt, args2);
+	va_end(args2);
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+	/*
+	 * Avoid nested stack-dumping if a panic occurs during oops processing
+	 */
+	if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
+		dump_stack();
+#endif
+	pr_flush(1000, true);
 	if (panic_on_warn) {
 		/*
 		 * This thread may hit another WARN() in the panic path.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:309 @ void panic(const char *fmt, ...)
 	if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
 		panic_smp_self_stop();
 
-	console_verbose();
 	bust_spinlocks(1);
-	va_start(args, fmt);
 	len = vscnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
 
 	if (len && buf[len - 1] == '\n')
 		buf[len - 1] = '\0';
 
-	pr_emerg("Kernel panic - not syncing: %s\n", buf);
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-	/*
-	 * Avoid nested stack-dumping if a panic occurs during oops processing
-	 */
-	if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
-		dump_stack();
-#endif
-
 	/*
 	 * If kgdb is enabled, give it a chance to run before we stop all
 	 * the other CPUs or else we won't be able to debug processes left
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:624 @ static u64 oops_id;
 
 static int init_oops_id(void)
 {
+#ifndef CONFIG_PREEMPT_RT
 	if (!oops_id)
 		get_random_bytes(&oops_id, sizeof(oops_id));
 	else
+#endif
 		oops_id++;
 
 	return 0;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:639 @ static void print_oops_end_marker(void)
 {
 	init_oops_id();
 	pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
+	pr_flush(1000, true);
 }
 
 /*
Index: linux-5.15/kernel/printk/printk.c
===================================================================
--- linux-5.15.orig/kernel/printk/printk.c
+++ linux-5.15/kernel/printk/printk.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:47 @
 #include <linux/irq_work.h>
 #include <linux/ctype.h>
 #include <linux/uio.h>
+#include <linux/kgdb.h>
+#include <linux/kthread.h>
+#include <linux/clocksource.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:275 @ static void __up_console_sem(unsigned lo
 static int console_locked, console_suspended;
 
 /*
- * If exclusive_console is non-NULL then only this console is to be printed to.
- */
-static struct console *exclusive_console;
-
-/*
  *	Array of consoles built from command line options (console=)
  */
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:353 @ static int console_msg_format = MSG_FORM
  * non-prinatable characters are escaped in the "\xff" notation.
  */
 
+#ifdef CONFIG_PRINTK
 /* syslog_lock protects syslog_* variables and write access to clear_seq. */
 static DEFINE_MUTEX(syslog_lock);
 
-#ifdef CONFIG_PRINTK
+/* Set to enable sync mode. Once set, it is never cleared. */
+static bool sync_mode;
+
 DECLARE_WAIT_QUEUE_HEAD(log_wait);
 /* All 3 protected by @syslog_lock. */
 /* the next printk record to read by syslog(READ) or /proc/kmsg */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:367 @ static u64 syslog_seq;
 static size_t syslog_partial;
 static bool syslog_time;
 
-/* All 3 protected by @console_sem. */
-/* the next printk record to write to the console */
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
-
-struct latched_seq {
-	seqcount_latch_t	latch;
-	u64			val[2];
-};
-
 /*
  * The next printk record to read after the last 'clear' command. There are
  * two copies (updated with seqcount_latch) so that reads can locklessly
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:384 @ static struct latched_seq clear_seq = {
 #define PREFIX_MAX		32
 #endif
 
-/* the maximum size of a formatted record (i.e. with prefix added per line) */
-#define CONSOLE_LOG_MAX		1024
-
 /* the maximum size allowed to be reserved for a record */
 #define LOG_LINE_MAX		(CONSOLE_LOG_MAX - PREFIX_MAX)
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:427 @ bool printk_percpu_data_ready(void)
 	return __printk_percpu_data_ready;
 }
 
-/* Must be called under syslog_lock. */
+/* Must be called under associated write-protection lock. */
 static void latched_seq_write(struct latched_seq *ls, u64 val)
 {
 	raw_write_seqcount_latch(&ls->latch);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1761 @ SYSCALL_DEFINE3(syslog, int, type, char
 	return do_syslog(type, buf, len, SYSLOG_FROM_READER);
 }
 
-/*
- * Special console_lock variants that help to reduce the risk of soft-lockups.
- * They allow to pass console_lock to another printk() call using a busy wait.
- */
+int printk_delay_msec __read_mostly;
 
-#ifdef CONFIG_LOCKDEP
-static struct lockdep_map console_owner_dep_map = {
-	.name = "console_owner"
-};
-#endif
+static inline void printk_delay(int level)
+{
+	boot_delay_msec(level);
 
-static DEFINE_RAW_SPINLOCK(console_owner_lock);
-static struct task_struct *console_owner;
-static bool console_waiter;
+	if (unlikely(printk_delay_msec)) {
+		int m = printk_delay_msec;
 
-/**
- * console_lock_spinning_enable - mark beginning of code where another
- *	thread might safely busy wait
- *
- * This basically converts console_lock into a spinlock. This marks
- * the section where the console_lock owner can not sleep, because
- * there may be a waiter spinning (like a spinlock). Also it must be
- * ready to hand over the lock at the end of the section.
- */
-static void console_lock_spinning_enable(void)
-{
-	raw_spin_lock(&console_owner_lock);
-	console_owner = current;
-	raw_spin_unlock(&console_owner_lock);
+		while (m--) {
+			mdelay(1);
+			touch_nmi_watchdog();
+		}
+	}
+}
 
-	/* The waiter may spin on us after setting console_owner */
-	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
+static bool kernel_sync_mode(void)
+{
+	return (oops_in_progress || sync_mode);
 }
 
-/**
- * console_lock_spinning_disable_and_check - mark end of code where another
- *	thread was able to busy wait and check if there is a waiter
- *
- * This is called at the end of the section where spinning is allowed.
- * It has two functions. First, it is a signal that it is no longer
- * safe to start busy waiting for the lock. Second, it checks if
- * there is a busy waiter and passes the lock rights to her.
- *
- * Important: Callers lose the lock if there was a busy waiter.
- *	They must not touch items synchronized by console_lock
- *	in this case.
- *
- * Return: 1 if the lock rights were passed, 0 otherwise.
- */
-static int console_lock_spinning_disable_and_check(void)
+static bool console_may_sync(struct console *con)
 {
-	int waiter;
+	if (!(con->flags & CON_ENABLED))
+		return false;
+	if (con->write_atomic && kernel_sync_mode())
+		return true;
+	if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread)
+		return true;
+	if (con->write && (con->flags & CON_BOOT) && !con->thread)
+		return true;
+	return false;
+}
 
-	raw_spin_lock(&console_owner_lock);
-	waiter = READ_ONCE(console_waiter);
-	console_owner = NULL;
-	raw_spin_unlock(&console_owner_lock);
+static bool call_sync_console_driver(struct console *con, const char *text, size_t text_len)
+{
+	if (!(con->flags & CON_ENABLED))
+		return false;
 
-	if (!waiter) {
-		spin_release(&console_owner_dep_map, _THIS_IP_);
-		return 0;
+	if (con->write_atomic && kernel_sync_mode()) {
+		con->write_atomic(con, text, text_len);
+		return true;
 	}
 
-	/* The waiter is now free to continue */
-	WRITE_ONCE(console_waiter, false);
+	if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread) {
+		if (console_trylock()) {
+			con->write_atomic(con, text, text_len);
+			console_unlock();
+			return true;
+		}
 
-	spin_release(&console_owner_dep_map, _THIS_IP_);
+	} else if (con->write && (con->flags & CON_BOOT) && !con->thread) {
+		if (console_trylock()) {
+			con->write(con, text, text_len);
+			console_unlock();
+			return true;
+		}
+	}
 
-	/*
-	 * Hand off console_lock to waiter. The waiter will perform
-	 * the up(). After this, the waiter is the console_lock owner.
-	 */
-	mutex_release(&console_lock_dep_map, _THIS_IP_);
-	return 1;
+	return false;
 }
 
-/**
- * console_trylock_spinning - try to get console_lock by busy waiting
- *
- * This allows to busy wait for the console_lock when the current
- * owner is running in specially marked sections. It means that
- * the current owner is running and cannot reschedule until it
- * is ready to lose the lock.
- *
- * Return: 1 if we got the lock, 0 othrewise
- */
-static int console_trylock_spinning(void)
+static bool have_atomic_console(void)
 {
-	struct task_struct *owner = NULL;
-	bool waiter;
-	bool spin = false;
-	unsigned long flags;
+	struct console *con;
 
-	if (console_trylock())
-		return 1;
+	for_each_console(con) {
+		if (!(con->flags & CON_ENABLED))
+			continue;
+		if (con->write_atomic)
+			return true;
+	}
+	return false;
+}
 
-	printk_safe_enter_irqsave(flags);
+static bool print_sync(struct console *con, u64 *seq)
+{
+	struct printk_info info;
+	struct printk_record r;
+	size_t text_len;
 
-	raw_spin_lock(&console_owner_lock);
-	owner = READ_ONCE(console_owner);
-	waiter = READ_ONCE(console_waiter);
-	if (!waiter && owner && owner != current) {
-		WRITE_ONCE(console_waiter, true);
-		spin = true;
-	}
-	raw_spin_unlock(&console_owner_lock);
-
-	/*
-	 * If there is an active printk() writing to the
-	 * consoles, instead of having it write our data too,
-	 * see if we can offload that load from the active
-	 * printer, and do some printing ourselves.
-	 * Go into a spin only if there isn't already a waiter
-	 * spinning, and there is an active printer, and
-	 * that active printer isn't us (recursive printk?).
-	 */
-	if (!spin) {
-		printk_safe_exit_irqrestore(flags);
-		return 0;
-	}
+	prb_rec_init_rd(&r, &info, &con->sync_buf[0], sizeof(con->sync_buf));
 
-	/* We spin waiting for the owner to release us */
-	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
-	/* Owner will clear console_waiter on hand off */
-	while (READ_ONCE(console_waiter))
-		cpu_relax();
-	spin_release(&console_owner_dep_map, _THIS_IP_);
+	if (!prb_read_valid(prb, *seq, &r))
+		return false;
 
-	printk_safe_exit_irqrestore(flags);
-	/*
-	 * The owner passed the console lock to us.
-	 * Since we did not spin on console lock, annotate
-	 * this as a trylock. Otherwise lockdep will
-	 * complain.
-	 */
-	mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
+	text_len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
 
-	return 1;
+	if (!call_sync_console_driver(con, &con->sync_buf[0], text_len))
+		return false;
+
+	*seq = r.info->seq;
+
+	touch_softlockup_watchdog_sync();
+	clocksource_touch_watchdog();
+	rcu_cpu_stall_reset();
+	touch_nmi_watchdog();
+
+	if (text_len)
+		printk_delay(r.info->level);
+
+	return true;
 }
 
-/*
- * Call the console drivers, asking them to write out
- * log_buf[start] to log_buf[end - 1].
- * The console_lock must be held.
- */
-static void call_console_drivers(const char *ext_text, size_t ext_len,
-				 const char *text, size_t len)
+static u64 read_console_seq(struct console *con)
 {
-	static char dropped_text[64];
-	size_t dropped_len = 0;
-	struct console *con;
+	u64 seq2;
+	u64 seq;
 
-	trace_console_rcuidle(text, len);
+	seq = latched_seq_read_nolock(&con->printk_seq);
+	seq2 = latched_seq_read_nolock(&con->printk_sync_seq);
+	if (seq2 > seq)
+		seq = seq2;
+#ifdef CONFIG_HAVE_NMI
+	seq2 = latched_seq_read_nolock(&con->printk_sync_nmi_seq);
+	if (seq2 > seq)
+		seq = seq2;
+#endif
+	return seq;
+}
 
-	if (!console_drivers)
-		return;
+static void print_sync_until(struct console *con, u64 seq, bool is_locked)
+{
+	u64 printk_seq;
 
-	if (console_dropped) {
-		dropped_len = snprintf(dropped_text, sizeof(dropped_text),
-				       "** %lu printk messages dropped **\n",
-				       console_dropped);
-		console_dropped = 0;
-	}
+	while (!__printk_cpu_trylock())
+		cpu_relax();
 
-	for_each_console(con) {
-		if (exclusive_console && con != exclusive_console)
-			continue;
-		if (!(con->flags & CON_ENABLED))
-			continue;
-		if (!con->write)
-			continue;
-		if (!cpu_online(smp_processor_id()) &&
-		    !(con->flags & CON_ANYTIME))
-			continue;
-		if (con->flags & CON_EXTENDED)
-			con->write(con, ext_text, ext_len);
-		else {
-			if (dropped_len)
-				con->write(con, dropped_text, dropped_len);
-			con->write(con, text, len);
-		}
+	for (;;) {
+		printk_seq = read_console_seq(con);
+		if (printk_seq >= seq)
+			break;
+		if (!print_sync(con, &printk_seq))
+			break;
+
+		if (is_locked)
+			latched_seq_write(&con->printk_seq, printk_seq + 1);
+#ifdef CONFIG_PRINTK_NMI
+		else if (in_nmi())
+			latched_seq_write(&con->printk_sync_nmi_seq, printk_seq + 1);
+#endif
+		else
+			latched_seq_write(&con->printk_sync_seq, printk_seq + 1);
 	}
+
+	__printk_cpu_unlock();
 }
 
 /*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1979 @ static u8 *__printk_recursion_counter(vo
 		local_irq_restore(flags);		\
 	} while (0)
 
-int printk_delay_msec __read_mostly;
-
-static inline void printk_delay(void)
-{
-	if (unlikely(printk_delay_msec)) {
-		int m = printk_delay_msec;
-
-		while (m--) {
-			mdelay(1);
-			touch_nmi_watchdog();
-		}
-	}
-}
-
 static inline u32 printk_caller_id(void)
 {
 	return in_task() ? task_pid_nr(current) :
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2066 @ int vprintk_store(int facility, int leve
 	const u32 caller_id = printk_caller_id();
 	struct prb_reserved_entry e;
 	enum printk_info_flags flags = 0;
+	bool final_commit = false;
 	struct printk_record r;
 	unsigned long irqflags;
 	u16 trunc_msg_len = 0;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2077 @ int vprintk_store(int facility, int leve
 	u16 text_len;
 	int ret = 0;
 	u64 ts_nsec;
+	u64 seq;
 
 	/*
 	 * Since the duration of printk() can vary depending on the message
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2116 @ int vprintk_store(int facility, int leve
 	if (flags & LOG_CONT) {
 		prb_rec_init_wr(&r, reserve_size);
 		if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
+			seq = r.info->seq;
 			text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
 						 facility, &flags, fmt, args);
 			r.info->text_len += text_len;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2124 @ int vprintk_store(int facility, int leve
 			if (flags & LOG_NEWLINE) {
 				r.info->flags |= LOG_NEWLINE;
 				prb_final_commit(&e);
+				final_commit = true;
 			} else {
 				prb_commit(&e);
 			}
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2148 @ int vprintk_store(int facility, int leve
 		if (!prb_reserve(&e, prb, &r))
 			goto out;
 	}
+	seq = r.info->seq;
 
 	/* fill message */
 	text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2164 @ int vprintk_store(int facility, int leve
 		memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
 
 	/* A message without a trailing newline can be continued. */
-	if (!(flags & LOG_NEWLINE))
+	if (!(flags & LOG_NEWLINE)) {
 		prb_commit(&e);
-	else
+	} else {
 		prb_final_commit(&e);
+		final_commit = true;
+	}
 
 	ret = text_len + trunc_msg_len;
 out:
+	/* only the kernel may perform synchronous printing */
+	if (facility == 0 && final_commit) {
+		struct console *con;
+
+		for_each_console(con) {
+			if (console_may_sync(con))
+				print_sync_until(con, seq + 1, false);
+		}
+	}
+
 	printk_exit_irqrestore(recursion_ptr, irqflags);
 	return ret;
 }
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2192 @ asmlinkage int vprintk_emit(int facility
 			    const char *fmt, va_list args)
 {
 	int printed_len;
-	bool in_sched = false;
 
 	/* Suppress unimportant messages after panic happens */
 	if (unlikely(suppress_printk))
 		return 0;
 
-	if (level == LOGLEVEL_SCHED) {
+	if (level == LOGLEVEL_SCHED)
 		level = LOGLEVEL_DEFAULT;
-		in_sched = true;
-	}
-
-	boot_delay_msec(level);
-	printk_delay();
 
 	printed_len = vprintk_store(facility, level, dev_info, fmt, args);
 
-	/* If called from the scheduler, we can not call up(). */
-	if (!in_sched) {
-		/*
-		 * Disable preemption to avoid being preempted while holding
-		 * console_sem which would prevent anyone from printing to
-		 * console
-		 */
-		preempt_disable();
-		/*
-		 * Try to acquire and then immediately release the console
-		 * semaphore.  The release will print out buffers and wake up
-		 * /dev/kmsg and syslog() users.
-		 */
-		if (console_trylock_spinning())
-			console_unlock();
-		preempt_enable();
-	}
-
 	wake_up_klogd();
 	return printed_len;
 }
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2226 @ asmlinkage __visible int _printk(const c
 }
 EXPORT_SYMBOL(_printk);
 
-#else /* CONFIG_PRINTK */
+static int printk_kthread_func(void *data)
+{
+	struct console *con = data;
+	unsigned long dropped = 0;
+	char *dropped_text = NULL;
+	struct printk_info info;
+	struct printk_record r;
+	char *ext_text = NULL;
+	size_t dropped_len;
+	int ret = -ENOMEM;
+	char *text = NULL;
+	char *write_text;
+	size_t len;
+	int error;
+	u64 seq;
+
+	if (con->flags & CON_EXTENDED) {
+		ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
+		if (!ext_text)
+			goto out;
+	}
+	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
+	dropped_text = kmalloc(64, GFP_KERNEL);
+	if (!text || !dropped_text)
+		goto out;
+	if (con->flags & CON_EXTENDED)
+		write_text = ext_text;
+	else
+		write_text = text;
+
+	seq = read_console_seq(con);
 
-#define CONSOLE_LOG_MAX		0
-#define printk_time		false
+	prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
 
-#define prb_read_valid(rb, seq, r)	false
-#define prb_first_valid_seq(rb)		0
+	for (;;) {
+		error = wait_event_interruptible(log_wait,
+						 prb_read_valid(prb, seq, &r) || kthread_should_stop());
 
-static u64 syslog_seq;
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
+		if (kthread_should_stop())
+			break;
+
+		if (error)
+			continue;
+
+		if (seq != r.info->seq) {
+			dropped += r.info->seq - seq;
+			seq = r.info->seq;
+		}
+
+		seq++;
+
+		if (!(con->flags & CON_ENABLED))
+			continue;
+
+		if (suppress_message_printing(r.info->level))
+			continue;
+
+		if (con->flags & CON_EXTENDED) {
+			len = info_print_ext_header(ext_text,
+						    CONSOLE_EXT_LOG_MAX,
+						    r.info);
+			len += msg_print_ext_body(ext_text + len,
+						  CONSOLE_EXT_LOG_MAX - len,
+						  &r.text_buf[0], r.info->text_len,
+						  &r.info->dev_info);
+		} else {
+			len = record_print_text(&r,
+						console_msg_format & MSG_FORMAT_SYSLOG,
+						printk_time);
+		}
+
+		console_lock();
+
+		/*
+		 * Even though the printk kthread is always preemptible, it is
+		 * still not allowed to call cond_resched() from within
+		 * console drivers. The task may become non-preemptible in the
+		 * console driver call chain. For example, vt_console_print()
+		 * takes a spinlock and then can call into fbcon_redraw(),
+		 * which can conditionally invoke cond_resched().
+		 */
+		console_may_schedule = 0;
+
+		if (kernel_sync_mode() && con->write_atomic) {
+			console_unlock();
+			break;
+		}
+
+		if (!(con->flags & CON_EXTENDED) && dropped) {
+			dropped_len = snprintf(dropped_text, 64,
+					       "** %lu printk messages dropped **\n",
+					       dropped);
+			dropped = 0;
+
+			con->write(con, dropped_text, dropped_len);
+			printk_delay(r.info->level);
+		}
+
+		con->write(con, write_text, len);
+		if (len)
+			printk_delay(r.info->level);
+
+		latched_seq_write(&con->printk_seq, seq);
 
-static size_t record_print_text(const struct printk_record *r,
-				bool syslog, bool time)
+		console_unlock();
+	}
+	ret = 0;
+out:
+	kfree(dropped_text);
+	kfree(text);
+	kfree(ext_text);
+	pr_info("%sconsole [%s%d]: printing thread stopped\n",
+		(con->flags & CON_BOOT) ? "boot" : "",
+		con->name, con->index);
+	return ret;
+}
+
+/* Must be called within console_lock(). */
+static void start_printk_kthread(struct console *con)
 {
-	return 0;
+	con->thread = kthread_run(printk_kthread_func, con,
+				  "pr/%s%d", con->name, con->index);
+	if (IS_ERR(con->thread)) {
+		pr_err("%sconsole [%s%d]: unable to start printing thread\n",
+		       (con->flags & CON_BOOT) ? "boot" : "",
+		       con->name, con->index);
+		return;
+	}
+	pr_info("%sconsole [%s%d]: printing thread started\n",
+		(con->flags & CON_BOOT) ? "boot" : "",
+		con->name, con->index);
 }
-static ssize_t info_print_ext_header(char *buf, size_t size,
-				     struct printk_info *info)
+
+/* protected by console_lock */
+static bool kthreads_started;
+
+/* Must be called within console_lock(). */
+static void console_try_thread(struct console *con)
 {
-	return 0;
+	if (kthreads_started) {
+		start_printk_kthread(con);
+		return;
+	}
+
+	/*
+	 * The printing threads have not been started yet. If this console
+	 * can print synchronously, print all unprinted messages.
+	 */
+	if (console_may_sync(con)) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		print_sync_until(con, prb_next_seq(prb), true);
+		local_irq_restore(flags);
+	}
 }
-static ssize_t msg_print_ext_body(char *buf, size_t size,
-				  char *text, size_t text_len,
-				  struct dev_printk_info *dev_info) { return 0; }
-static void console_lock_spinning_enable(void) { }
-static int console_lock_spinning_disable_and_check(void) { return 0; }
-static void call_console_drivers(const char *ext_text, size_t ext_len,
-				 const char *text, size_t len) {}
-static bool suppress_message_printing(int level) { return false; }
 
 #endif /* CONFIG_PRINTK */
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2638 @ int is_console_locked(void)
 }
 EXPORT_SYMBOL(is_console_locked);
 
-/*
- * Check if we have any console that is capable of printing while cpu is
- * booting or shutting down. Requires console_sem.
- */
-static int have_callable_console(void)
-{
-	struct console *con;
-
-	for_each_console(con)
-		if ((con->flags & CON_ENABLED) &&
-				(con->flags & CON_ANYTIME))
-			return 1;
-
-	return 0;
-}
-
-/*
- * Can we actually use the console at this time on this cpu?
- *
- * Console drivers may assume that per-cpu resources have been allocated. So
- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
- * call them until this CPU is officially up.
- */
-static inline int can_use_console(void)
-{
-	return cpu_online(raw_smp_processor_id()) || have_callable_console();
-}
-
 /**
  * console_unlock - unlock the console system
  *
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2654 @ static inline int can_use_console(void)
  */
 void console_unlock(void)
 {
-	static char ext_text[CONSOLE_EXT_LOG_MAX];
-	static char text[CONSOLE_LOG_MAX];
-	unsigned long flags;
-	bool do_cond_resched, retry;
-	struct printk_info info;
-	struct printk_record r;
-	u64 __maybe_unused next_seq;
-
 	if (console_suspended) {
 		up_console_sem();
 		return;
 	}
 
-	prb_rec_init_rd(&r, &info, text, sizeof(text));
-
-	/*
-	 * Console drivers are called with interrupts disabled, so
-	 * @console_may_schedule should be cleared before; however, we may
-	 * end up dumping a lot of lines, for example, if called from
-	 * console registration path, and should invoke cond_resched()
-	 * between lines if allowable.  Not doing so can cause a very long
-	 * scheduling stall on a slow console leading to RCU stall and
-	 * softlockup warnings which exacerbate the issue with more
-	 * messages practically incapacitating the system.
-	 *
-	 * console_trylock() is not able to detect the preemptive
-	 * context reliably. Therefore the value must be stored before
-	 * and cleared after the "again" goto label.
-	 */
-	do_cond_resched = console_may_schedule;
-again:
-	console_may_schedule = 0;
-
-	/*
-	 * We released the console_sem lock, so we need to recheck if
-	 * cpu is online and (if not) is there at least one CON_ANYTIME
-	 * console.
-	 */
-	if (!can_use_console()) {
-		console_locked = 0;
-		up_console_sem();
-		return;
-	}
-
-	for (;;) {
-		size_t ext_len = 0;
-		int handover;
-		size_t len;
-
-skip:
-		if (!prb_read_valid(prb, console_seq, &r))
-			break;
-
-		if (console_seq != r.info->seq) {
-			console_dropped += r.info->seq - console_seq;
-			console_seq = r.info->seq;
-		}
-
-		if (suppress_message_printing(r.info->level)) {
-			/*
-			 * Skip record we have buffered and already printed
-			 * directly to the console when we received it, and
-			 * record that has level above the console loglevel.
-			 */
-			console_seq++;
-			goto skip;
-		}
-
-		/* Output to all consoles once old messages replayed. */
-		if (unlikely(exclusive_console &&
-			     console_seq >= exclusive_console_stop_seq)) {
-			exclusive_console = NULL;
-		}
-
-		/*
-		 * Handle extended console text first because later
-		 * record_print_text() will modify the record buffer in-place.
-		 */
-		if (nr_ext_console_drivers) {
-			ext_len = info_print_ext_header(ext_text,
-						sizeof(ext_text),
-						r.info);
-			ext_len += msg_print_ext_body(ext_text + ext_len,
-						sizeof(ext_text) - ext_len,
-						&r.text_buf[0],
-						r.info->text_len,
-						&r.info->dev_info);
-		}
-		len = record_print_text(&r,
-				console_msg_format & MSG_FORMAT_SYSLOG,
-				printk_time);
-		console_seq++;
-
-		/*
-		 * While actively printing out messages, if another printk()
-		 * were to occur on another CPU, it may wait for this one to
-		 * finish. This task can not be preempted if there is a
-		 * waiter waiting to take over.
-		 *
-		 * Interrupts are disabled because the hand over to a waiter
-		 * must not be interrupted until the hand over is completed
-		 * (@console_waiter is cleared).
-		 */
-		printk_safe_enter_irqsave(flags);
-		console_lock_spinning_enable();
-
-		stop_critical_timings();	/* don't trace print latency */
-		call_console_drivers(ext_text, ext_len, text, len);
-		start_critical_timings();
-
-		handover = console_lock_spinning_disable_and_check();
-		printk_safe_exit_irqrestore(flags);
-		if (handover)
-			return;
-
-		if (do_cond_resched)
-			cond_resched();
-	}
-
-	/* Get consistent value of the next-to-be-used sequence number. */
-	next_seq = console_seq;
-
 	console_locked = 0;
 	up_console_sem();
-
-	/*
-	 * Someone could have filled up the buffer again, so re-check if there's
-	 * something to flush. In case we cannot trylock the console_sem again,
-	 * there's a new owner and the console_unlock() from them will do the
-	 * flush, no worries.
-	 */
-	retry = prb_read_valid(prb, next_seq, NULL);
-	if (retry && console_trylock())
-		goto again;
 }
 EXPORT_SYMBOL(console_unlock);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2710 @ void console_unblank(void)
  */
 void console_flush_on_panic(enum con_flush_mode mode)
 {
-	/*
-	 * If someone else is holding the console lock, trylock will fail
-	 * and may_schedule may be set.  Ignore and proceed to unlock so
-	 * that messages are flushed out.  As this can be called from any
-	 * context and we don't want to get preempted while flushing,
-	 * ensure may_schedule is cleared.
-	 */
-	console_trylock();
-	console_may_schedule = 0;
+	if (!console_trylock())
+		return;
+
+#ifdef CONFIG_PRINTK
+	if (mode == CONSOLE_REPLAY_ALL) {
+		struct console *c;
+		u64 seq;
+
+		seq = prb_first_valid_seq(prb);
+		for_each_console(c)
+			latched_seq_write(&c->printk_seq, seq);
+	}
+#endif
 
-	if (mode == CONSOLE_REPLAY_ALL)
-		console_seq = prb_first_valid_seq(prb);
 	console_unlock();
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2859 @ static int try_enable_new_console(struct
 void register_console(struct console *newcon)
 {
 	struct console *bcon = NULL;
+	u64 __maybe_unused seq = 0;
 	int err;
 
 	for_each_console(bcon) {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2882 @ void register_console(struct console *ne
 		}
 	}
 
+	newcon->thread = NULL;
+
 	if (console_drivers && console_drivers->flags & CON_BOOT)
 		bcon = console_drivers;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2925 @ void register_console(struct console *ne
 	 * the real console are the same physical device, it's annoying to
 	 * see the beginning boot messages twice
 	 */
-	if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV))
+	if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) {
 		newcon->flags &= ~CON_PRINTBUFFER;
+		newcon->flags |= CON_HANDOVER;
+	}
 
 	/*
 	 *	Put this console in the list - keep the
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2950 @ void register_console(struct console *ne
 	if (newcon->flags & CON_EXTENDED)
 		nr_ext_console_drivers++;
 
-	if (newcon->flags & CON_PRINTBUFFER) {
-		/*
-		 * console_unlock(); will print out the buffered messages
-		 * for us.
-		 *
-		 * We're about to replay the log buffer.  Only do this to the
-		 * just-registered console to avoid excessive message spam to
-		 * the already-registered consoles.
-		 *
-		 * Set exclusive_console with disabled interrupts to reduce
-		 * race window with eventual console_flush_on_panic() that
-		 * ignores console_lock.
-		 */
-		exclusive_console = newcon;
-		exclusive_console_stop_seq = console_seq;
+#ifdef CONFIG_PRINTK
+	if (!(newcon->flags & CON_PRINTBUFFER))
+		seq = prb_next_seq(prb);
 
-		/* Get a consistent copy of @syslog_seq. */
-		mutex_lock(&syslog_lock);
-		console_seq = syslog_seq;
-		mutex_unlock(&syslog_lock);
-	}
+	seqcount_latch_init(&newcon->printk_seq.latch);
+	latched_seq_write(&newcon->printk_seq, seq);
+	seqcount_latch_init(&newcon->printk_sync_seq.latch);
+	latched_seq_write(&newcon->printk_sync_seq, seq);
+#ifdef CONFIG_HAVE_NMI
+	seqcount_latch_init(&newcon->printk_sync_nmi_seq.latch);
+	latched_seq_write(&newcon->printk_sync_nmi_seq, seq);
+#endif
+
+	console_try_thread(newcon);
+#endif /* CONFIG_PRINTK */
 	console_unlock();
 	console_sysfs_notify();
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3038 @ int unregister_console(struct console *c
 	console_unlock();
 	console_sysfs_notify();
 
+	if (console->thread && !IS_ERR(console->thread))
+		kthread_stop(console->thread);
+
 	if (console->exit)
 		res = console->exit(console);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3123 @ static int __init printk_late_init(void)
 			unregister_console(con);
 		}
 	}
+
+#ifdef CONFIG_PRINTK
+	console_lock();
+	for_each_console(con)
+		start_printk_kthread(con);
+	kthreads_started = true;
+	console_unlock();
+#endif
+
 	ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
 					console_cpu_notify);
 	WARN_ON(ret < 0);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3155 @ static void wake_up_klogd_work_func(stru
 {
 	int pending = this_cpu_xchg(printk_pending, 0);
 
-	if (pending & PRINTK_PENDING_OUTPUT) {
-		/* If trylock fails, someone else is doing the printing */
-		if (console_trylock())
-			console_unlock();
-	}
-
 	if (pending & PRINTK_PENDING_WAKEUP)
-		wake_up_interruptible(&log_wait);
+		wake_up_interruptible_all(&log_wait);
 }
 
 static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3203 @ void defer_console_output(void)
 
 void printk_trigger_flush(void)
 {
-	defer_console_output();
+	wake_up_klogd();
 }
 
 int vprintk_deferred(const char *fmt, va_list args)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3354 @ void kmsg_dump(enum kmsg_dump_reason rea
 {
 	struct kmsg_dumper *dumper;
 
+	if (!oops_in_progress) {
+		/*
+		 * If atomic consoles are available, activate kernel sync mode
+		 * to make sure any final messages are visible. The trailing
+		 * printk message is important to flush any pending messages.
+		 */
+		if (have_atomic_console()) {
+			sync_mode = true;
+			pr_info("enabled sync mode\n");
+		}
+
+		/*
+		 * Give the printing threads time to flush, allowing up to
+		 * 1s of no printing forward progress before giving up.
+		 */
+		pr_flush(1000, true);
+	}
+
 	rcu_read_lock();
 	list_for_each_entry_rcu(dumper, &dump_list, list) {
 		enum kmsg_dump_reason max_reason = dumper->max_reason;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3554 @ EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
 #ifdef CONFIG_SMP
 static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1);
 static atomic_t printk_cpulock_nested = ATOMIC_INIT(0);
+static unsigned int kgdb_cpu = -1;
 
 /**
  * __printk_wait_on_cpu_lock() - Busy wait until the printk cpu-reentrant
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3634 @ EXPORT_SYMBOL(__printk_cpu_trylock);
  */
 void __printk_cpu_unlock(void)
 {
+	bool trigger_kgdb = false;
+	unsigned int cpu;
+
 	if (atomic_read(&printk_cpulock_nested)) {
 		atomic_dec(&printk_cpulock_nested);
 		return;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3647 @ void __printk_cpu_unlock(void)
 	 * LMM(__printk_cpu_unlock:A)
 	 */
 
+	cpu = smp_processor_id();
+	if (kgdb_cpu == cpu) {
+		trigger_kgdb = true;
+		kgdb_cpu = -1;
+	}
+
 	/*
 	 * Guarantee loads and stores from this CPU when it was the
 	 * lock owner are visible to the next lock owner. This pairs
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3673 @ void __printk_cpu_unlock(void)
 	 */
 	atomic_set_release(&printk_cpulock_owner,
 			   -1); /* LMM(__printk_cpu_unlock:B) */
+
+	if (trigger_kgdb) {
+		pr_warn("re-triggering kgdb roundup for CPU#%d\n", cpu);
+		kgdb_roundup_cpu(cpu);
+	}
 }
 EXPORT_SYMBOL(__printk_cpu_unlock);
+
+bool kgdb_roundup_delay(unsigned int cpu)
+{
+	if (cpu != atomic_read(&printk_cpulock_owner))
+		return false;
+
+	kgdb_cpu = cpu;
+	return true;
+}
+EXPORT_SYMBOL(kgdb_roundup_delay);
 #endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PRINTK
+static void pr_msleep(bool may_sleep, int ms)
+{
+	if (may_sleep) {
+		msleep(ms);
+	} else {
+		while (ms--)
+			udelay(1000);
+	}
+}
+
+/**
+ * pr_flush() - Wait for printing threads to catch up.
+ *
+ * @timeout_ms:        The maximum time (in ms) to wait.
+ * @reset_on_progress: Reset the timeout if forward progress is seen.
+ *
+ * A value of 0 for @timeout_ms means no waiting will occur. A value of -1
+ * represents infinite waiting.
+ *
+ * If @reset_on_progress is true, the timeout will be reset whenever any
+ * printer has been seen to make some forward progress.
+ *
+ * Context: Any context.
+ * Return: true if all enabled printers are caught up.
+ */
+bool pr_flush(int timeout_ms, bool reset_on_progress)
+{
+	int remaining = timeout_ms;
+	struct console *con;
+	u64 last_diff = 0;
+	bool may_sleep;
+	u64 printk_seq;
+	u64 diff;
+	u64 seq;
+
+	may_sleep = (preemptible() &&
+		     !in_softirq() &&
+		     system_state >= SYSTEM_RUNNING);
+
+	seq = prb_next_seq(prb);
+
+	for (;;) {
+		diff = 0;
+
+		for_each_console(con) {
+			if (!(con->flags & CON_ENABLED))
+				continue;
+			printk_seq = read_console_seq(con);
+			if (printk_seq < seq)
+				diff += seq - printk_seq;
+		}
+
+		if (diff != last_diff && reset_on_progress)
+			remaining = timeout_ms;
+
+		if (diff == 0 || remaining == 0)
+			break;
+
+		if (remaining < 0) {
+			pr_msleep(may_sleep, 100);
+		} else if (remaining < 100) {
+			pr_msleep(may_sleep, remaining);
+			remaining = 0;
+		} else {
+			pr_msleep(may_sleep, 100);
+			remaining -= 100;
+		}
+
+		last_diff = diff;
+	}
+
+	return (diff == 0);
+}
+EXPORT_SYMBOL(pr_flush);
+#endif /* CONFIG_PRINTK */
Index: linux-5.15/kernel/ptrace.c
===================================================================
--- linux-5.15.orig/kernel/ptrace.c
+++ linux-5.15/kernel/ptrace.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:200 @ static bool ptrace_freeze_traced(struct
 	spin_lock_irq(&task->sighand->siglock);
 	if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
 	    !__fatal_signal_pending(task)) {
+#ifdef CONFIG_PREEMPT_RT
+		unsigned long flags;
+
+		raw_spin_lock_irqsave(&task->pi_lock, flags);
+		if (READ_ONCE(task->__state) & __TASK_TRACED)
+			WRITE_ONCE(task->__state, __TASK_TRACED);
+		else
+			task->saved_state = __TASK_TRACED;
+		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+#else
 		WRITE_ONCE(task->__state, __TASK_TRACED);
+#endif
 		ret = true;
 	}
 	spin_unlock_irq(&task->sighand->siglock);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:221 @ static bool ptrace_freeze_traced(struct
 
 static void ptrace_unfreeze_traced(struct task_struct *task)
 {
-	if (READ_ONCE(task->__state) != __TASK_TRACED)
+	unsigned long flags;
+	bool frozen = true;
+
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
+	    READ_ONCE(task->__state) != __TASK_TRACED)
 		return;
 
 	WARN_ON(!task->ptrace || task->parent != current);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:235 @ static void ptrace_unfreeze_traced(struc
 	 * Recheck state under the lock to close this race.
 	 */
 	spin_lock_irq(&task->sighand->siglock);
-	if (READ_ONCE(task->__state) == __TASK_TRACED) {
-		if (__fatal_signal_pending(task))
-			wake_up_state(task, __TASK_TRACED);
-		else
-			WRITE_ONCE(task->__state, TASK_TRACED);
-	}
+	raw_spin_lock_irqsave(&task->pi_lock, flags);
+	if (READ_ONCE(task->__state) == __TASK_TRACED)
+		WRITE_ONCE(task->__state, TASK_TRACED);
+
+#ifdef CONFIG_PREEMPT_RT
+	else if (task->saved_state == __TASK_TRACED)
+		task->saved_state = TASK_TRACED;
+#endif
+	else
+		frozen = false;
+	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+
+	if (frozen && __fatal_signal_pending(task))
+		wake_up_state(task, __TASK_TRACED);
+
 	spin_unlock_irq(&task->sighand->siglock);
 }
 
Index: linux-5.15/kernel/rcu/tasks.h
===================================================================
--- linux-5.15.orig/kernel/rcu/tasks.h
+++ linux-5.15/kernel/rcu/tasks.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1365 @ static void test_rcu_tasks_callback(stru
 	rttd->notrun = true;
 }
 
-static void rcu_tasks_initiate_self_tests(void)
+void rcu_tasks_initiate_self_tests(void)
 {
 	pr_info("Running RCU-tasks wait API self tests\n");
 #ifdef CONFIG_TASKS_RCU
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1402 @ static int rcu_tasks_verify_self_tests(v
 	return ret;
 }
 late_initcall(rcu_tasks_verify_self_tests);
-#else /* #ifdef CONFIG_PROVE_RCU */
-static void rcu_tasks_initiate_self_tests(void) { }
-#endif /* #else #ifdef CONFIG_PROVE_RCU */
+#endif /* #ifdef CONFIG_PROVE_RCU */
 
 void __init rcu_init_tasks_generic(void)
 {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1417 @ void __init rcu_init_tasks_generic(void)
 #ifdef CONFIG_TASKS_TRACE_RCU
 	rcu_spawn_tasks_trace_kthread();
 #endif
-
-	// Run the self-tests.
-	rcu_tasks_initiate_self_tests();
 }
 
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
Index: linux-5.15/kernel/rcu/tree.c
===================================================================
--- linux-5.15.orig/kernel/rcu/tree.c
+++ linux-5.15/kernel/rcu/tree.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2282 @ rcu_report_qs_rdp(struct rcu_data *rdp)
 {
 	unsigned long flags;
 	unsigned long mask;
-	bool needwake = false;
-	const bool offloaded = rcu_rdp_is_offloaded(rdp);
+	bool offloaded, needwake = false;
 	struct rcu_node *rnp;
 
 	WARN_ON_ONCE(rdp->cpu != smp_processor_id());
 	rnp = rdp->mynode;
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
+	offloaded = rcu_rdp_is_offloaded(rdp);
 	if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||
 	    rdp->gpwrap) {
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2450 @ static void rcu_do_batch(struct rcu_data
 	int div;
 	bool __maybe_unused empty;
 	unsigned long flags;
-	const bool offloaded = rcu_rdp_is_offloaded(rdp);
+	bool offloaded;
 	struct rcu_head *rhp;
 	struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
 	long bl, count = 0;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2476 @ static void rcu_do_batch(struct rcu_data
 	rcu_nocb_lock(rdp);
 	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
 	pending = rcu_segcblist_n_cbs(&rdp->cblist);
+	offloaded = rcu_rdp_is_offloaded(rdp);
 	div = READ_ONCE(rcu_divisor);
 	div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
 	bl = max(rdp->blimit, pending >> div);
Index: linux-5.15/kernel/sched/core.c
===================================================================
--- linux-5.15.orig/kernel/sched/core.c
+++ linux-5.15/kernel/sched/core.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:78 @ __read_mostly int sysctl_resched_latency
  * Number of tasks to iterate in a single balance run.
  * Limited because this is done with IRQs disabled.
  */
+#ifdef CONFIG_PREEMPT_RT
+const_debug unsigned int sysctl_sched_nr_migrate = 8;
+#else
 const_debug unsigned int sysctl_sched_nr_migrate = 32;
+#endif
 
 /*
  * period over which we measure -rt task CPU usage in us.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:990 @ void resched_curr(struct rq *rq)
 		trace_sched_wake_idle_without_ipi(cpu);
 }
 
+#ifdef CONFIG_PREEMPT_LAZY
+
+static int tsk_is_polling(struct task_struct *p)
+{
+#ifdef TIF_POLLING_NRFLAG
+	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
+#else
+	return 0;
+#endif
+}
+
+void resched_curr_lazy(struct rq *rq)
+{
+	struct task_struct *curr = rq->curr;
+	int cpu;
+
+	if (!sched_feat(PREEMPT_LAZY)) {
+		resched_curr(rq);
+		return;
+	}
+
+	if (test_tsk_need_resched(curr))
+		return;
+
+	if (test_tsk_need_resched_lazy(curr))
+		return;
+
+	set_tsk_need_resched_lazy(curr);
+
+	cpu = cpu_of(rq);
+	if (cpu == smp_processor_id())
+		return;
+
+	/* NEED_RESCHED_LAZY must be visible before we test polling */
+	smp_mb();
+	if (!tsk_is_polling(curr))
+		smp_send_reschedule(cpu);
+}
+#endif
+
 void resched_cpu(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2185 @ void migrate_disable(void)
 	preempt_disable();
 	this_rq()->nr_pinned++;
 	p->migration_disabled = 1;
+	preempt_lazy_disable();
 	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(migrate_disable);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2197 @ void migrate_enable(void)
 	if (p->migration_disabled > 1) {
 		p->migration_disabled--;
 		return;
+	} else if (WARN_ON_ONCE(p->migration_disabled == 0)) {
+		return;
 	}
 
 	/*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2216 @ void migrate_enable(void)
 	barrier();
 	p->migration_disabled = 0;
 	this_rq()->nr_pinned--;
+	preempt_lazy_enable();
 	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(migrate_enable);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3283 @ unsigned long wait_task_inactive(struct
 		 * is actually now running somewhere else!
 		 */
 		while (task_running(rq, p)) {
-			if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
+			if (match_state && !task_match_state_lock(p, match_state))
 				return 0;
 			cpu_relax();
 		}
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3298 @ unsigned long wait_task_inactive(struct
 		running = task_running(rq, p);
 		queued = task_on_rq_queued(p);
 		ncsw = 0;
-		if (!match_state || READ_ONCE(p->__state) == match_state)
+		if (!match_state || task_match_state_or_saved(p, match_state))
 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
 		task_rq_unlock(rq, p, &rf);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3332 @ unsigned long wait_task_inactive(struct
 			ktime_t to = NSEC_PER_SEC / HZ;
 
 			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
+			schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
 			continue;
 		}
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4486 @ int sched_fork(unsigned long clone_flags
 	p->on_cpu = 0;
 #endif
 	init_task_preempt_count(p);
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+	task_thread_info(p)->preempt_lazy_count = 0;
+#endif
 #ifdef CONFIG_SMP
 	plist_node_init(&p->pushable_tasks, MAX_PRIO);
 	RB_CLEAR_NODE(&p->pushable_dl_tasks);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4984 @ static struct rq *finish_task_switch(str
 	 */
 	if (mm) {
 		membarrier_mm_sync_core_before_usermode(mm);
-		mmdrop(mm);
+		mmdrop_sched(mm);
 	}
 	if (unlikely(prev_state == TASK_DEAD)) {
 		if (prev->sched_class->task_dead)
 			prev->sched_class->task_dead(prev);
 
 		/*
-		 * Remove function-return probe instances associated with this
-		 * task and put them back on the free list.
+		 * Release VMAP'ed task stack immediate for reuse. On RT
+		 * enabled kernels this is delayed for latency reasons.
 		 */
-		kprobe_flush_task(prev);
-
-		/* Task is done with its stack. */
-		put_task_stack(prev);
+		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+			put_task_stack(prev);
 
 		put_task_struct_rcu_user(prev);
 	}
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:6395 @ static void __sched notrace __schedule(u
 
 	next = pick_next_task(rq, prev, &rf);
 	clear_tsk_need_resched(prev);
+	clear_tsk_need_resched_lazy(prev);
 	clear_preempt_need_resched();
 #ifdef CONFIG_SCHED_DEBUG
 	rq->last_seen_need_resched_ns = 0;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:6617 @ static void __sched notrace preempt_sche
 	} while (need_resched());
 }
 
+#ifdef CONFIG_PREEMPT_LAZY
+/*
+ * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
+ * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
+ * preempt_lazy_count counter >0.
+ */
+static __always_inline int preemptible_lazy(void)
+{
+	if (test_thread_flag(TIF_NEED_RESCHED))
+		return 1;
+	if (current_thread_info()->preempt_lazy_count)
+		return 0;
+	return 1;
+}
+
+#else
+
+static inline int preemptible_lazy(void)
+{
+	return 1;
+}
+
+#endif
+
 #ifdef CONFIG_PREEMPTION
 /*
  * This is the entry point to schedule() from in-kernel preemption
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:6654 @ asmlinkage __visible void __sched notrac
 	 */
 	if (likely(!preemptible()))
 		return;
-
+	if (!preemptible_lazy())
+		return;
 	preempt_schedule_common();
 }
 NOKPROBE_SYMBOL(preempt_schedule);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:6688 @ asmlinkage __visible void __sched notrac
 	if (likely(!preemptible()))
 		return;
 
+	if (!preemptible_lazy())
+		return;
+
 	do {
 		/*
 		 * Because the function tracer can trace preempt_count_sub()
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:8844 @ void __init init_idle(struct task_struct
 
 	/* Set the preempt count _outside_ the spinlocks! */
 	init_idle_preempt_count(idle, cpu);
-
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+	task_thread_info(idle)->preempt_lazy_count = 0;
+#endif
 	/*
 	 * The idle tasks have their own, simple scheduling class:
 	 */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:9647 @ void __init sched_init(void)
 }
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-static inline int preempt_count_equals(int preempt_offset)
-{
-	int nested = preempt_count() + rcu_preempt_depth();
-
-	return (nested == preempt_offset);
-}
 
-void __might_sleep(const char *file, int line, int preempt_offset)
+void __might_sleep(const char *file, int line)
 {
 	unsigned int state = get_current_state();
 	/*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:9662 @ void __might_sleep(const char *file, int
 			(void *)current->task_state_change,
 			(void *)current->task_state_change);
 
-	___might_sleep(file, line, preempt_offset);
+	__might_resched(file, line, 0);
 }
 EXPORT_SYMBOL(__might_sleep);
 
-void ___might_sleep(const char *file, int line, int preempt_offset)
+static void print_preempt_disable_ip(int preempt_offset, unsigned long ip)
+{
+	if (!IS_ENABLED(CONFIG_DEBUG_PREEMPT))
+		return;
+
+	if (preempt_count() == preempt_offset)
+		return;
+
+	pr_err("Preemption disabled at:");
+	print_ip_sym(KERN_ERR, ip);
+}
+
+static inline bool resched_offsets_ok(unsigned int offsets)
+{
+	unsigned int nested = preempt_count();
+
+	nested += rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT;
+
+	return nested == offsets;
+}
+
+void __might_resched(const char *file, int line, unsigned int offsets)
 {
 	/* Ratelimiting timestamp: */
 	static unsigned long prev_jiffy;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:9697 @ void ___might_sleep(const char *file, in
 	/* WARN_ON_ONCE() by default, no rate limit required: */
 	rcu_sleep_check();
 
-	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
+	if ((resched_offsets_ok(offsets) && !irqs_disabled() &&
 	     !is_idle_task(current) && !current->non_block_count) ||
 	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
 	    oops_in_progress)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:9710 @ void ___might_sleep(const char *file, in
 	/* Save this before calling printk(), since that will clobber it: */
 	preempt_disable_ip = get_preempt_disable_ip(current);
 
-	printk(KERN_ERR
-		"BUG: sleeping function called from invalid context at %s:%d\n",
-			file, line);
-	printk(KERN_ERR
-		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-			in_atomic(), irqs_disabled(), current->non_block_count,
-			current->pid, current->comm);
+	pr_err("BUG: sleeping function called from invalid context at %s:%d\n",
+	       file, line);
+	pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
+	       in_atomic(), irqs_disabled(), current->non_block_count,
+	       current->pid, current->comm);
+	pr_err("preempt_count: %x, expected: %x\n", preempt_count(),
+	       offsets & MIGHT_RESCHED_PREEMPT_MASK);
+
+	if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
+		pr_err("RCU nest depth: %d, expected: %u\n",
+		       rcu_preempt_depth(), offsets >> MIGHT_RESCHED_RCU_SHIFT);
+	}
 
 	if (task_stack_end_corrupted(current))
-		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
+		pr_emerg("Thread overran stack, or stack corrupted\n");
 
 	debug_show_held_locks(current);
 	if (irqs_disabled())
 		print_irqtrace_events(current);
-	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-	    && !preempt_count_equals(preempt_offset)) {
-		pr_err("Preemption disabled at:");
-		print_ip_sym(KERN_ERR, preempt_disable_ip);
-	}
+
+	print_preempt_disable_ip(offsets & MIGHT_RESCHED_PREEMPT_MASK,
+				 preempt_disable_ip);
+
 	dump_stack();
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 }
-EXPORT_SYMBOL(___might_sleep);
+EXPORT_SYMBOL(__might_resched);
 
 void __cant_sleep(const char *file, int line, int preempt_offset)
 {
Index: linux-5.15/kernel/sched/fair.c
===================================================================
--- linux-5.15.orig/kernel/sched/fair.c
+++ linux-5.15/kernel/sched/fair.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4587 @ check_preempt_tick(struct cfs_rq *cfs_rq
 	ideal_runtime = sched_slice(cfs_rq, curr);
 	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
 	if (delta_exec > ideal_runtime) {
-		resched_curr(rq_of(cfs_rq));
+		resched_curr_lazy(rq_of(cfs_rq));
 		/*
 		 * The current task ran long enough, ensure it doesn't get
 		 * re-elected due to buddy favours.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4611 @ check_preempt_tick(struct cfs_rq *cfs_rq
 		return;
 
 	if (delta > ideal_runtime)
-		resched_curr(rq_of(cfs_rq));
+		resched_curr_lazy(rq_of(cfs_rq));
 }
 
 static void
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4754 @ entity_tick(struct cfs_rq *cfs_rq, struc
 	 * validating it and just reschedule.
 	 */
 	if (queued) {
-		resched_curr(rq_of(cfs_rq));
+		resched_curr_lazy(rq_of(cfs_rq));
 		return;
 	}
 	/*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4894 @ static void __account_cfs_rq_runtime(str
 	 * hierarchy can be throttled
 	 */
 	if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
-		resched_curr(rq_of(cfs_rq));
+		resched_curr_lazy(rq_of(cfs_rq));
 }
 
 static __always_inline
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5657 @ static void hrtick_start_fair(struct rq
 
 		if (delta < 0) {
 			if (task_current(rq, p))
-				resched_curr(rq);
+				resched_curr_lazy(rq);
 			return;
 		}
 		hrtick_start(rq, delta);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:7363 @ static void check_preempt_wakeup(struct
 	return;
 
 preempt:
-	resched_curr(rq);
+	resched_curr_lazy(rq);
 	/*
 	 * Only set the backward buddy when the current task is still
 	 * on the rq. This can happen when a wakeup gets interleaved
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:11444 @ static void task_fork_fair(struct task_s
 		 * 'current' within the tree based on its new key value.
 		 */
 		swap(curr->vruntime, se->vruntime);
-		resched_curr(rq);
+		resched_curr_lazy(rq);
 	}
 
 	se->vruntime -= cfs_rq->min_vruntime;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:11471 @ prio_changed_fair(struct rq *rq, struct
 	 */
 	if (task_current(rq, p)) {
 		if (p->prio > oldprio)
-			resched_curr(rq);
+			resched_curr_lazy(rq);
 	} else
 		check_preempt_curr(rq, p, 0);
 }
Index: linux-5.15/kernel/sched/features.h
===================================================================
--- linux-5.15.orig/kernel/sched/features.h
+++ linux-5.15/kernel/sched/features.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:49 @ SCHED_FEAT(DOUBLE_TICK, false)
  */
 SCHED_FEAT(NONTASK_CAPACITY, true)
 
+#ifdef CONFIG_PREEMPT_RT
+SCHED_FEAT(TTWU_QUEUE, false)
+# ifdef CONFIG_PREEMPT_LAZY
+SCHED_FEAT(PREEMPT_LAZY, true)
+# endif
+#else
+
 /*
  * Queue remote wakeups on the target CPU and process them
  * using the scheduler IPI. Reduces rq->lock contention/bounces.
  */
 SCHED_FEAT(TTWU_QUEUE, true)
+#endif
 
 /*
  * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
Index: linux-5.15/kernel/sched/sched.h
===================================================================
--- linux-5.15.orig/kernel/sched/sched.h
+++ linux-5.15/kernel/sched/sched.h
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2330 @ extern void reweight_task(struct task_st
 extern void resched_curr(struct rq *rq);
 extern void resched_cpu(int cpu);
 
+#ifdef CONFIG_PREEMPT_LAZY
+extern void resched_curr_lazy(struct rq *rq);
+#else
+static inline void resched_curr_lazy(struct rq *rq)
+{
+	resched_curr(rq);
+}
+#endif
+
 extern struct rt_bandwidth def_rt_bandwidth;
 extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
 
Index: linux-5.15/kernel/sched/swait.c
===================================================================
--- linux-5.15.orig/kernel/sched/swait.c
+++ linux-5.15/kernel/sched/swait.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:67 @ void swake_up_all(struct swait_queue_hea
 	struct swait_queue *curr;
 	LIST_HEAD(tmp);
 
+	WARN_ON(irqs_disabled());
 	raw_spin_lock_irq(&q->lock);
 	list_splice_init(&q->task_list, &tmp);
 	while (!list_empty(&tmp)) {
Index: linux-5.15/kernel/sched/topology.c
===================================================================
--- linux-5.15.orig/kernel/sched/topology.c
+++ linux-5.15/kernel/sched/topology.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:529 @ static int init_rootdomain(struct root_d
 #ifdef HAVE_RT_PUSH_IPI
 	rd->rto_cpu = -1;
 	raw_spin_lock_init(&rd->rto_lock);
-	init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
+	rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func);
 #endif
 
 	rd->visit_gen = 0;
Index: linux-5.15/kernel/signal.c
===================================================================
--- linux-5.15.orig/kernel/signal.c
+++ linux-5.15/kernel/signal.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1327 @ force_sig_info_to_task(struct kernel_sig
 	struct k_sigaction *action;
 	int sig = info->si_signo;
 
+	/*
+	 * On some archs, PREEMPT_RT has to delay sending a signal from a trap
+	 * since it can not enable preemption, and the signal code's spin_locks
+	 * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
+	 * send the signal on exit of the trap.
+	 */
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+	if (in_atomic()) {
+		struct task_struct *t = current;
+
+		if (WARN_ON_ONCE(t->forced_info.si_signo))
+			return 0;
+
+		if (is_si_special(info)) {
+			WARN_ON_ONCE(info != SEND_SIG_PRIV);
+			t->forced_info.si_signo = info->si_signo;
+			t->forced_info.si_errno = 0;
+			t->forced_info.si_code = SI_KERNEL;
+			t->forced_info.si_pid = 0;
+			t->forced_info.si_uid = 0;
+		} else {
+			t->forced_info = *info;
+		}
+
+		set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+		return 0;
+	}
+#endif
 	spin_lock_irqsave(&t->sighand->siglock, flags);
 	action = &t->sighand->action[sig-1];
 	ignored = action->sa.sa_handler == SIG_IGN;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2339 @ static void ptrace_stop(int exit_code, i
 		if (gstop_done && ptrace_reparented(current))
 			do_notify_parent_cldstop(current, false, why);
 
-		/*
-		 * Don't want to allow preemption here, because
-		 * sys_ptrace() needs this task to be inactive.
-		 *
-		 * XXX: implement read_unlock_no_resched().
-		 */
-		preempt_disable();
 		read_unlock(&tasklist_lock);
 		cgroup_enter_frozen();
-		preempt_enable_no_resched();
 		freezable_schedule();
 		cgroup_leave_frozen(true);
 	} else {
Index: linux-5.15/kernel/smp.c
===================================================================
--- linux-5.15.orig/kernel/smp.c
+++ linux-5.15/kernel/smp.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:693 @ void flush_smp_call_function_from_idle(v
 
 	cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU,
 		      smp_processor_id(), CFD_SEQ_IDLE);
+
 	local_irq_save(flags);
 	flush_smp_call_function_queue(true);
-	if (local_softirq_pending())
-		do_softirq();
+
+	if (local_softirq_pending()) {
+		if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+			do_softirq();
+		} else {
+			struct task_struct *ksoftirqd = this_cpu_ksoftirqd();
+
+			if (ksoftirqd && !task_is_running(ksoftirqd))
+				wake_up_process(ksoftirqd);
+		}
+	}
 
 	local_irq_restore(flags);
 }
Index: linux-5.15/kernel/trace/trace.c
===================================================================
--- linux-5.15.orig/kernel/trace/trace.c
+++ linux-5.15/kernel/trace/trace.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2633 @ unsigned int tracing_gen_ctx_irq_test(un
 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
 	if (test_preempt_need_resched())
 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
-	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
+#ifdef CONFIG_PREEMPT_LAZY
+	if (need_resched_lazy())
+		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
+#endif
+
+	return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) |
+		(preempt_lazy_count() & 0xff) << 16 |
 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4220 @ unsigned long trace_total_entries(struct
 
 static void print_lat_help_header(struct seq_file *m)
 {
-	seq_puts(m, "#                    _------=> CPU#            \n"
-		    "#                   / _-----=> irqs-off        \n"
-		    "#                  | / _----=> need-resched    \n"
-		    "#                  || / _---=> hardirq/softirq \n"
-		    "#                  ||| / _--=> preempt-depth   \n"
-		    "#                  |||| / _-=> migrate-disable \n"
-		    "#                  ||||| /     delay           \n"
-		    "#  cmd     pid     |||||| time  |   caller     \n"
-		    "#     \\   /        ||||||  \\    |    /       \n");
+	seq_puts(m, "#                    _--------=> CPU#            \n"
+		    "#                   / _-------=> irqs-off        \n"
+		    "#                  | / _------=> need-resched    \n"
+		    "#                  || / _-----=> need-resched-lazy\n"
+		    "#                  ||| / _----=> hardirq/softirq \n"
+		    "#                  |||| / _---=> preempt-depth   \n"
+		    "#                  ||||| / _--=> preempt-lazy-depth\n"
+		    "#                  |||||| / _-=> migrate-disable \n"
+		    "#                  ||||||| /     delay           \n"
+		    "#  cmd     pid     |||||||| time  |   caller     \n"
+		    "#     \\   /        ||||||||  \\    |    /       \n");
 }
 
 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4264 @ static void print_func_help_header_irq(s
 
 	print_event_info(buf, m);
 
-	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
-	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
-	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
-	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
-	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
-	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
-	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
-	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
+	seq_printf(m, "#                            %.*s  _-------=> irqs-off\n", prec, space);
+	seq_printf(m, "#                            %.*s / _------=> need-resched\n", prec, space);
+	seq_printf(m, "#                            %.*s| / _-----=> need-resched-lazy\n", prec, space);
+	seq_printf(m, "#                            %.*s|| / _----=> hardirq/softirq\n", prec, space);
+	seq_printf(m, "#                            %.*s||| / _---=> preempt-depth\n", prec, space);
+	seq_printf(m, "#                            %.*s|||| / _--=> preempt-lazy-depth\n", prec, space);
+	seq_printf(m, "#                            %.*s||||| / _-=> migrate-disable\n", prec, space);
+	seq_printf(m, "#                            %.*s|||||| /     delay\n", prec, space);
+	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
+	seq_printf(m, "#              | |    %.*s   |   |||||||      |         |\n", prec, "       |    ");
 }
 
 void
Index: linux-5.15/kernel/trace/trace_events.c
===================================================================
--- linux-5.15.orig/kernel/trace/trace_events.c
+++ linux-5.15/kernel/trace/trace_events.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:196 @ static int trace_define_common_fields(vo
 	/* Holds both preempt_count and migrate_disable */
 	__common_field(unsigned char, preempt_count);
 	__common_field(int, pid);
+	__common_field(unsigned char, preempt_lazy_count);
 
 	return ret;
 }
Index: linux-5.15/kernel/trace/trace_output.c
===================================================================
--- linux-5.15.orig/kernel/trace/trace_output.c
+++ linux-5.15/kernel/trace/trace_output.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:454 @ int trace_print_lat_fmt(struct trace_seq
 {
 	char hardsoft_irq;
 	char need_resched;
+	char need_resched_lazy;
 	char irqs_off;
 	int hardirq;
 	int softirq;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:485 @ int trace_print_lat_fmt(struct trace_seq
 		break;
 	}
 
+	need_resched_lazy =
+		(entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
+
 	hardsoft_irq =
 		(nmi && hardirq)     ? 'Z' :
 		nmi                  ? 'z' :
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:496 @ int trace_print_lat_fmt(struct trace_seq
 		softirq              ? 's' :
 		                       '.' ;
 
-	trace_seq_printf(s, "%c%c%c",
-			 irqs_off, need_resched, hardsoft_irq);
+	trace_seq_printf(s, "%c%c%c%c",
+			 irqs_off, need_resched, need_resched_lazy,
+			 hardsoft_irq);
 
 	if (entry->preempt_count & 0xf)
 		trace_seq_printf(s, "%x", entry->preempt_count & 0xf);
 	else
 		trace_seq_putc(s, '.');
 
+	if (entry->preempt_lazy_count)
+		trace_seq_printf(s, "%x", entry->preempt_lazy_count);
+	else
+		trace_seq_putc(s, '.');
+
 	if (entry->preempt_count & 0xf0)
 		trace_seq_printf(s, "%x", entry->preempt_count >> 4);
 	else
Index: linux-5.15/lib/bug.c
===================================================================
--- linux-5.15.orig/lib/bug.c
+++ linux-5.15/lib/bug.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:208 @ enum bug_trap_type report_bug(unsigned l
 	else
 		pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n",
 			(void *)bugaddr);
+	pr_flush(1000, true);
 
 	return BUG_TRAP_TYPE_BUG;
 }
Index: linux-5.15/lib/dump_stack.c
===================================================================
--- linux-5.15.orig/lib/dump_stack.c
+++ linux-5.15/lib/dump_stack.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:105 @ asmlinkage __visible void dump_stack_lvl
 	 * Permit this cpu to perform nested stack dumps while serialising
 	 * against other CPUs
 	 */
-	printk_cpu_lock_irqsave(flags);
+	raw_printk_cpu_lock_irqsave(flags);
 	__dump_stack(log_lvl);
-	printk_cpu_unlock_irqrestore(flags);
+	raw_printk_cpu_unlock_irqrestore(flags);
 }
 EXPORT_SYMBOL(dump_stack_lvl);
 
Index: linux-5.15/lib/irq_poll.c
===================================================================
--- linux-5.15.orig/lib/irq_poll.c
+++ linux-5.15/lib/irq_poll.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:194 @ static int irq_poll_cpu_dead(unsigned in
 	 * If a CPU goes away, splice its entries to the current CPU
 	 * and trigger a run of the softirq
 	 */
+	local_bh_disable();
 	local_irq_disable();
 	list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
 			 this_cpu_ptr(&blk_cpu_iopoll));
 	__raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
 	local_irq_enable();
+	local_bh_enable();
 
 	return 0;
 }
Index: linux-5.15/lib/locking-selftest.c
===================================================================
--- linux-5.15.orig/lib/locking-selftest.c
+++ linux-5.15/lib/locking-selftest.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:29 @
 #include <linux/rtmutex.h>
 #include <linux/local_lock.h>
 
+#ifdef CONFIG_PREEMPT_RT
+# define NON_RT(...)
+#else
+# define NON_RT(...)	__VA_ARGS__
+#endif
+
 /*
  * Change this to 1 if you want to see the failure printouts:
  */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:148 @ static DEFINE_RT_MUTEX(rtmutex_Z2);
 
 #endif
 
-static local_lock_t local_A = INIT_LOCAL_LOCK(local_A);
+static DEFINE_PER_CPU(local_lock_t, local_A);
 
 /*
  * non-inlined runtime initializers, to let separate locks share
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:721 @ GENERATE_TESTCASE(ABCDBCDA_rtmutex);
 
 #undef E
 
+#ifdef CONFIG_PREEMPT_RT
+# define RT_PREPARE_DBL_UNLOCK()	{ migrate_disable(); rcu_read_lock(); }
+#else
+# define RT_PREPARE_DBL_UNLOCK()
+#endif
 /*
  * Double unlock:
  */
 #define E()					\
 						\
 	LOCK(A);				\
+	RT_PREPARE_DBL_UNLOCK();		\
 	UNLOCK(A);				\
 	UNLOCK(A); /* fail */
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:817 @ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_
 #include "locking-selftest-wlock-hardirq.h"
 GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_wlock)
 
+#ifndef CONFIG_PREEMPT_RT
 #include "locking-selftest-spin-softirq.h"
 GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_spin)
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:826 @ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_
 
 #include "locking-selftest-wlock-softirq.h"
 GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
+#endif
 
 #undef E1
 #undef E2
 
+#ifndef CONFIG_PREEMPT_RT
 /*
  * Enabling hardirqs with a softirq-safe lock held:
  */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:864 @ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A
 #undef E1
 #undef E2
 
+#endif
+
 /*
  * Enabling irqs with an irq-safe lock held:
  */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:895 @ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B
 #include "locking-selftest-wlock-hardirq.h"
 GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_wlock)
 
+#ifndef CONFIG_PREEMPT_RT
 #include "locking-selftest-spin-softirq.h"
 GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_spin)
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:904 @ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B
 
 #include "locking-selftest-wlock-softirq.h"
 GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
+#endif
 
 #undef E1
 #undef E2
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:943 @ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_
 #include "locking-selftest-wlock-hardirq.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_wlock)
 
+#ifndef CONFIG_PREEMPT_RT
 #include "locking-selftest-spin-softirq.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_spin)
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:952 @ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_
 
 #include "locking-selftest-wlock-softirq.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
+#endif
 
 #undef E1
 #undef E2
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:993 @ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_
 #include "locking-selftest-wlock-hardirq.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_wlock)
 
+#ifndef CONFIG_PREEMPT_RT
 #include "locking-selftest-spin-softirq.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_spin)
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1002 @ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_
 
 #include "locking-selftest-wlock-softirq.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
+#endif
 
 #undef E1
 #undef E2
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1057 @ GENERATE_PERMUTATIONS_3_EVENTS(irq_inver
 #include "locking-selftest-wlock-hardirq.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_wlock)
 
+#ifndef CONFIG_PREEMPT_RT
 #include "locking-selftest-spin-softirq.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_spin)
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1066 @ GENERATE_PERMUTATIONS_3_EVENTS(irq_inver
 
 #include "locking-selftest-wlock-softirq.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
+#endif
 
 #undef E1
 #undef E2
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1234 @ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_
 #include "locking-selftest-wlock.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_wlock)
 
+#ifndef CONFIG_PREEMPT_RT
 #include "locking-selftest-softirq.h"
 #include "locking-selftest-rlock.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_rlock)
 
 #include "locking-selftest-wlock.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_wlock)
+#endif
 
 #undef E1
 #undef E2
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1282 @ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_
 #include "locking-selftest-wlock.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_wlock)
 
+#ifndef CONFIG_PREEMPT_RT
 #include "locking-selftest-softirq.h"
 #include "locking-selftest-rlock.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_rlock)
 
 #include "locking-selftest-wlock.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_wlock)
+#endif
 
 #undef E1
 #undef E2
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1338 @ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_
 #include "locking-selftest-wlock.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_wlock)
 
+#ifndef CONFIG_PREEMPT_RT
 #include "locking-selftest-softirq.h"
 #include "locking-selftest-rlock.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_rlock)
 
 #include "locking-selftest-wlock.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock)
+#endif
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define I_SPINLOCK(x)	lockdep_reset_lock(&lock_##x.dep_map)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1354 @ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_
 # define I_MUTEX(x)	lockdep_reset_lock(&mutex_##x.dep_map)
 # define I_RWSEM(x)	lockdep_reset_lock(&rwsem_##x.dep_map)
 # define I_WW(x)	lockdep_reset_lock(&x.dep_map)
-# define I_LOCAL_LOCK(x) lockdep_reset_lock(&local_##x.dep_map)
+# define I_LOCAL_LOCK(x) lockdep_reset_lock(this_cpu_ptr(&local_##x.dep_map))
 #ifdef CONFIG_RT_MUTEXES
 # define I_RTMUTEX(x)	lockdep_reset_lock(&rtmutex_##x.dep_map)
 #endif
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1414 @ static void reset_locks(void)
 	init_shared_classes();
 	raw_spin_lock_init(&raw_lock_A);
 	raw_spin_lock_init(&raw_lock_B);
-	local_lock_init(&local_A);
+	local_lock_init(this_cpu_ptr(&local_A));
 
 	ww_mutex_init(&o, &ww_lockdep); ww_mutex_init(&o2, &ww_lockdep); ww_mutex_init(&o3, &ww_lockdep);
 	memset(&t, 0, sizeof(t)); memset(&t2, 0, sizeof(t2));
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1432 @ static int unexpected_testcase_failures;
 
 static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
 {
-	unsigned long saved_preempt_count = preempt_count();
+	int saved_preempt_count = preempt_count();
+#ifdef CONFIG_PREEMPT_RT
+#ifdef CONFIG_SMP
+	int saved_mgd_count = current->migration_disabled;
+#endif
+	int saved_rcu_count = current->rcu_read_lock_nesting;
+#endif
 
 	WARN_ON(irqs_disabled());
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1472 @ static void dotest(void (*testcase_fn)(v
 	 * count, so restore it:
 	 */
 	preempt_count_set(saved_preempt_count);
+
+#ifdef CONFIG_PREEMPT_RT
+#ifdef CONFIG_SMP
+	while (current->migration_disabled > saved_mgd_count)
+		migrate_enable();
+#endif
+
+	while (current->rcu_read_lock_nesting > saved_rcu_count)
+		rcu_read_unlock();
+	WARN_ON_ONCE(current->rcu_read_lock_nesting < saved_rcu_count);
+#endif
+
 #ifdef CONFIG_TRACE_IRQFLAGS
 	if (softirq_count())
 		current->softirqs_enabled = 0;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1551 @ static inline void print_testname(const
 
 #define DO_TESTCASE_2x2RW(desc, name, nr)			\
 	DO_TESTCASE_2RW("hard-"desc, name##_hard, nr)		\
-	DO_TESTCASE_2RW("soft-"desc, name##_soft, nr)		\
+	NON_RT(DO_TESTCASE_2RW("soft-"desc, name##_soft, nr))	\
 
 #define DO_TESTCASE_6x2x2RW(desc, name)				\
 	DO_TESTCASE_2x2RW(desc, name, 123);			\
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1599 @ static inline void print_testname(const
 
 #define DO_TESTCASE_2I(desc, name, nr)				\
 	DO_TESTCASE_1("hard-"desc, name##_hard, nr);		\
-	DO_TESTCASE_1("soft-"desc, name##_soft, nr);
+	NON_RT(DO_TESTCASE_1("soft-"desc, name##_soft, nr));
 
 #define DO_TESTCASE_2IB(desc, name, nr)				\
 	DO_TESTCASE_1B("hard-"desc, name##_hard, nr);		\
-	DO_TESTCASE_1B("soft-"desc, name##_soft, nr);
+	NON_RT(DO_TESTCASE_1B("soft-"desc, name##_soft, nr));
 
 #define DO_TESTCASE_6I(desc, name, nr)				\
 	DO_TESTCASE_3("hard-"desc, name##_hard, nr);		\
-	DO_TESTCASE_3("soft-"desc, name##_soft, nr);
+	NON_RT(DO_TESTCASE_3("soft-"desc, name##_soft, nr));
 
 #define DO_TESTCASE_6IRW(desc, name, nr)			\
 	DO_TESTCASE_3RW("hard-"desc, name##_hard, nr);		\
-	DO_TESTCASE_3RW("soft-"desc, name##_soft, nr);
+	NON_RT(DO_TESTCASE_3RW("soft-"desc, name##_soft, nr));
 
 #define DO_TESTCASE_2x3(desc, name)				\
 	DO_TESTCASE_3(desc, name, 12);				\
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1703 @ static void ww_test_fail_acquire(void)
 #endif
 }
 
+#ifdef CONFIG_PREEMPT_RT
+#define ww_mutex_base_lock(b)			rt_mutex_lock(b)
+#define ww_mutex_base_lock_nest_lock(b, b2)	rt_mutex_lock_nest_lock(b, b2)
+#define ww_mutex_base_lock_interruptible(b)	rt_mutex_lock_interruptible(b)
+#define ww_mutex_base_lock_killable(b)		rt_mutex_lock_killable(b)
+#define ww_mutex_base_unlock(b)			rt_mutex_unlock(b)
+#else
+#define ww_mutex_base_lock(b)			mutex_lock(b)
+#define ww_mutex_base_lock_nest_lock(b, b2)	mutex_lock_nest_lock(b, b2)
+#define ww_mutex_base_lock_interruptible(b)	mutex_lock_interruptible(b)
+#define ww_mutex_base_lock_killable(b)		mutex_lock_killable(b)
+#define ww_mutex_base_unlock(b)			mutex_unlock(b)
+#endif
+
 static void ww_test_normal(void)
 {
 	int ret;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1731 @ static void ww_test_normal(void)
 
 	/* mutex_lock (and indirectly, mutex_lock_nested) */
 	o.ctx = (void *)~0UL;
-	mutex_lock(&o.base);
-	mutex_unlock(&o.base);
+	ww_mutex_base_lock(&o.base);
+	ww_mutex_base_unlock(&o.base);
 	WARN_ON(o.ctx != (void *)~0UL);
 
 	/* mutex_lock_interruptible (and *_nested) */
 	o.ctx = (void *)~0UL;
-	ret = mutex_lock_interruptible(&o.base);
+	ret = ww_mutex_base_lock_interruptible(&o.base);
 	if (!ret)
-		mutex_unlock(&o.base);
+		ww_mutex_base_unlock(&o.base);
 	else
 		WARN_ON(1);
 	WARN_ON(o.ctx != (void *)~0UL);
 
 	/* mutex_lock_killable (and *_nested) */
 	o.ctx = (void *)~0UL;
-	ret = mutex_lock_killable(&o.base);
+	ret = ww_mutex_base_lock_killable(&o.base);
 	if (!ret)
-		mutex_unlock(&o.base);
+		ww_mutex_base_unlock(&o.base);
 	else
 		WARN_ON(1);
 	WARN_ON(o.ctx != (void *)~0UL);
 
 	/* trylock, succeeding */
 	o.ctx = (void *)~0UL;
-	ret = mutex_trylock(&o.base);
+	ret = ww_mutex_base_trylock(&o.base);
 	WARN_ON(!ret);
 	if (ret)
-		mutex_unlock(&o.base);
+		ww_mutex_base_unlock(&o.base);
 	else
 		WARN_ON(1);
 	WARN_ON(o.ctx != (void *)~0UL);
 
 	/* trylock, failing */
 	o.ctx = (void *)~0UL;
-	mutex_lock(&o.base);
-	ret = mutex_trylock(&o.base);
+	ww_mutex_base_lock(&o.base);
+	ret = ww_mutex_base_trylock(&o.base);
 	WARN_ON(ret);
-	mutex_unlock(&o.base);
+	ww_mutex_base_unlock(&o.base);
 	WARN_ON(o.ctx != (void *)~0UL);
 
 	/* nest_lock */
 	o.ctx = (void *)~0UL;
-	mutex_lock_nest_lock(&o.base, &t);
-	mutex_unlock(&o.base);
+	ww_mutex_base_lock_nest_lock(&o.base, &t);
+	ww_mutex_base_unlock(&o.base);
 	WARN_ON(o.ctx != (void *)~0UL);
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1787 @ static void ww_test_two_contexts(void)
 static void ww_test_diff_class(void)
 {
 	WWAI(&t);
-#ifdef CONFIG_DEBUG_MUTEXES
+#ifdef DEBUG_WW_MUTEXES
 	t.ww_class = NULL;
 #endif
 	WWL(&o, &t);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1851 @ static void ww_test_edeadlk_normal(void)
 {
 	int ret;
 
-	mutex_lock(&o2.base);
+	ww_mutex_base_lock(&o2.base);
 	o2.ctx = &t2;
 	mutex_release(&o2.base.dep_map, _THIS_IP_);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1867 @ static void ww_test_edeadlk_normal(void)
 
 	o2.ctx = NULL;
 	mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
-	mutex_unlock(&o2.base);
+	ww_mutex_base_unlock(&o2.base);
 	WWU(&o);
 
 	WWL(&o2, &t);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1877 @ static void ww_test_edeadlk_normal_slow(
 {
 	int ret;
 
-	mutex_lock(&o2.base);
+	ww_mutex_base_lock(&o2.base);
 	mutex_release(&o2.base.dep_map, _THIS_IP_);
 	o2.ctx = &t2;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1893 @ static void ww_test_edeadlk_normal_slow(
 
 	o2.ctx = NULL;
 	mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
-	mutex_unlock(&o2.base);
+	ww_mutex_base_unlock(&o2.base);
 	WWU(&o);
 
 	ww_mutex_lock_slow(&o2, &t);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1903 @ static void ww_test_edeadlk_no_unlock(vo
 {
 	int ret;
 
-	mutex_lock(&o2.base);
+	ww_mutex_base_lock(&o2.base);
 	o2.ctx = &t2;
 	mutex_release(&o2.base.dep_map, _THIS_IP_);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1919 @ static void ww_test_edeadlk_no_unlock(vo
 
 	o2.ctx = NULL;
 	mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
-	mutex_unlock(&o2.base);
+	ww_mutex_base_unlock(&o2.base);
 
 	WWL(&o2, &t);
 }
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1928 @ static void ww_test_edeadlk_no_unlock_sl
 {
 	int ret;
 
-	mutex_lock(&o2.base);
+	ww_mutex_base_lock(&o2.base);
 	mutex_release(&o2.base.dep_map, _THIS_IP_);
 	o2.ctx = &t2;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1944 @ static void ww_test_edeadlk_no_unlock_sl
 
 	o2.ctx = NULL;
 	mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
-	mutex_unlock(&o2.base);
+	ww_mutex_base_unlock(&o2.base);
 
 	ww_mutex_lock_slow(&o2, &t);
 }
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1953 @ static void ww_test_edeadlk_acquire_more
 {
 	int ret;
 
-	mutex_lock(&o2.base);
+	ww_mutex_base_lock(&o2.base);
 	mutex_release(&o2.base.dep_map, _THIS_IP_);
 	o2.ctx = &t2;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1974 @ static void ww_test_edeadlk_acquire_more
 {
 	int ret;
 
-	mutex_lock(&o2.base);
+	ww_mutex_base_lock(&o2.base);
 	mutex_release(&o2.base.dep_map, _THIS_IP_);
 	o2.ctx = &t2;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1995 @ static void ww_test_edeadlk_acquire_more
 {
 	int ret;
 
-	mutex_lock(&o2.base);
+	ww_mutex_base_lock(&o2.base);
 	mutex_release(&o2.base.dep_map, _THIS_IP_);
 	o2.ctx = &t2;
 
-	mutex_lock(&o3.base);
+	ww_mutex_base_lock(&o3.base);
 	mutex_release(&o3.base.dep_map, _THIS_IP_);
 	o3.ctx = &t2;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2021 @ static void ww_test_edeadlk_acquire_more
 {
 	int ret;
 
-	mutex_lock(&o2.base);
+	ww_mutex_base_lock(&o2.base);
 	mutex_release(&o2.base.dep_map, _THIS_IP_);
 	o2.ctx = &t2;
 
-	mutex_lock(&o3.base);
+	ww_mutex_base_lock(&o3.base);
 	mutex_release(&o3.base.dep_map, _THIS_IP_);
 	o3.ctx = &t2;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2046 @ static void ww_test_edeadlk_acquire_wron
 {
 	int ret;
 
-	mutex_lock(&o2.base);
+	ww_mutex_base_lock(&o2.base);
 	mutex_release(&o2.base.dep_map, _THIS_IP_);
 	o2.ctx = &t2;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2071 @ static void ww_test_edeadlk_acquire_wron
 {
 	int ret;
 
-	mutex_lock(&o2.base);
+	ww_mutex_base_lock(&o2.base);
 	mutex_release(&o2.base.dep_map, _THIS_IP_);
 	o2.ctx = &t2;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2712 @ static void wait_context_tests(void)
 
 static void local_lock_2(void)
 {
-	local_lock_acquire(&local_A);	/* IRQ-ON */
-	local_lock_release(&local_A);
+	local_lock(&local_A);	/* IRQ-ON */
+	local_unlock(&local_A);
 
 	HARDIRQ_ENTER();
 	spin_lock(&lock_A);		/* IN-IRQ */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2722 @ static void local_lock_2(void)
 
 	HARDIRQ_DISABLE();
 	spin_lock(&lock_A);
-	local_lock_acquire(&local_A);	/* IN-IRQ <-> IRQ-ON cycle, false */
-	local_lock_release(&local_A);
+	local_lock(&local_A);	/* IN-IRQ <-> IRQ-ON cycle, false */
+	local_unlock(&local_A);
 	spin_unlock(&lock_A);
 	HARDIRQ_ENABLE();
 }
 
 static void local_lock_3A(void)
 {
-	local_lock_acquire(&local_A);	/* IRQ-ON */
+	local_lock(&local_A);	/* IRQ-ON */
 	spin_lock(&lock_B);		/* IRQ-ON */
 	spin_unlock(&lock_B);
-	local_lock_release(&local_A);
+	local_unlock(&local_A);
 
 	HARDIRQ_ENTER();
 	spin_lock(&lock_A);		/* IN-IRQ */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2742 @ static void local_lock_3A(void)
 
 	HARDIRQ_DISABLE();
 	spin_lock(&lock_A);
-	local_lock_acquire(&local_A);	/* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
-	local_lock_release(&local_A);
+	local_lock(&local_A);	/* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
+	local_unlock(&local_A);
 	spin_unlock(&lock_A);
 	HARDIRQ_ENABLE();
 }
 
 static void local_lock_3B(void)
 {
-	local_lock_acquire(&local_A);	/* IRQ-ON */
+	local_lock(&local_A);	/* IRQ-ON */
 	spin_lock(&lock_B);		/* IRQ-ON */
 	spin_unlock(&lock_B);
-	local_lock_release(&local_A);
+	local_unlock(&local_A);
 
 	HARDIRQ_ENTER();
 	spin_lock(&lock_A);		/* IN-IRQ */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2762 @ static void local_lock_3B(void)
 
 	HARDIRQ_DISABLE();
 	spin_lock(&lock_A);
-	local_lock_acquire(&local_A);	/* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
-	local_lock_release(&local_A);
+	local_lock(&local_A);	/* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
+	local_unlock(&local_A);
 	spin_unlock(&lock_A);
 	HARDIRQ_ENABLE();
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2878 @ void locking_selftest(void)
 	printk("------------------------\n");
 	printk("| Locking API testsuite:\n");
 	printk("----------------------------------------------------------------------------\n");
-	printk("                                 | spin |wlock |rlock |mutex | wsem | rsem |\n");
+	printk("                                 | spin |wlock |rlock |mutex | wsem | rsem |rtmutex\n");
 	printk("  --------------------------------------------------------------------------\n");
 
 	init_shared_classes();
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2951 @ void locking_selftest(void)
 	DO_TESTCASE_6x1RR("rlock W1R2/R2R3/W3W1", W1R2_R2R3_W3W1);
 
 	printk("  --------------------------------------------------------------------------\n");
-
 	/*
 	 * irq-context testcases:
 	 */
 	DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
-	DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
+	NON_RT(DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A));
 	DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
 	DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3);
 	DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
Index: linux-5.15/lib/nmi_backtrace.c
===================================================================
--- linux-5.15.orig/lib/nmi_backtrace.c
+++ linux-5.15/lib/nmi_backtrace.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:102 @ bool nmi_cpu_backtrace(struct pt_regs *r
 		 * Allow nested NMI backtraces while serializing
 		 * against other CPUs.
 		 */
-		printk_cpu_lock_irqsave(flags);
+		raw_printk_cpu_lock_irqsave(flags);
 		if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) {
 			pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n",
 				cpu, (void *)instruction_pointer(regs));
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:113 @ bool nmi_cpu_backtrace(struct pt_regs *r
 			else
 				dump_stack();
 		}
-		printk_cpu_unlock_irqrestore(flags);
+		raw_printk_cpu_unlock_irqrestore(flags);
 		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
 		return true;
 	}
Index: linux-5.15/lib/scatterlist.c
===================================================================
--- linux-5.15.orig/lib/scatterlist.c
+++ linux-5.15/lib/scatterlist.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:831 @ static bool sg_miter_get_next_page(struc
  *   stops @miter.
  *
  * Context:
- *   Don't care if @miter is stopped, or not proceeded yet.
- *   Otherwise, preemption disabled if the SG_MITER_ATOMIC is set.
+ *   Don't care.
  *
  * Returns:
  *   true if @miter contains the valid mapping.  false if end of sg
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:867 @ EXPORT_SYMBOL(sg_miter_skip);
  *   @miter->addr and @miter->length point to the current mapping.
  *
  * Context:
- *   Preemption disabled if SG_MITER_ATOMIC.  Preemption must stay disabled
- *   till @miter is stopped.  May sleep if !SG_MITER_ATOMIC.
+ *   May sleep if !SG_MITER_ATOMIC.
  *
  * Returns:
  *   true if @miter contains the next mapping.  false if end of sg
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:907 @ EXPORT_SYMBOL(sg_miter_next);
  *   need to be released during iteration.
  *
  * Context:
- *   Preemption disabled if the SG_MITER_ATOMIC is set.  Don't care
- *   otherwise.
+ *   Don't care otherwise.
  */
 void sg_miter_stop(struct sg_mapping_iter *miter)
 {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:922 @ void sg_miter_stop(struct sg_mapping_ite
 			flush_dcache_page(miter->page);
 
 		if (miter->__flags & SG_MITER_ATOMIC) {
-			WARN_ON_ONCE(preemptible());
+			WARN_ON_ONCE(!pagefault_disabled());
 			kunmap_atomic(miter->addr);
 		} else
 			kunmap(miter->page);
Index: linux-5.15/localversion-rt
===================================================================
--- /dev/null
+++ linux-5.15/localversion-rt
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1 @
+-rt61
Index: linux-5.15/mm/Kconfig
===================================================================
--- linux-5.15.orig/mm/Kconfig
+++ linux-5.15/mm/Kconfig
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:374 @ config NOMMU_INITIAL_TRIM_EXCESS
 
 config TRANSPARENT_HUGEPAGE
 	bool "Transparent Hugepage Support"
-	depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT
 	select COMPACTION
 	select XARRAY_MULTI
 	help
Index: linux-5.15/mm/memcontrol.c
===================================================================
--- linux-5.15.orig/mm/memcontrol.c
+++ linux-5.15/mm/memcontrol.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:657 @ static u64 flush_next_time;
 
 #define FLUSH_TIME (2UL*HZ)
 
+/*
+ * Accessors to ensure that preemption is disabled on PREEMPT_RT because it can
+ * not rely on this as part of an acquired spinlock_t lock. These functions are
+ * never used in hardirq context on PREEMPT_RT and therefore disabling preemtion
+ * is sufficient.
+ */
+static void memcg_stats_lock(void)
+{
+#ifdef CONFIG_PREEMPT_RT
+      preempt_disable();
+#else
+      VM_BUG_ON(!irqs_disabled());
+#endif
+}
+
+static void __memcg_stats_lock(void)
+{
+#ifdef CONFIG_PREEMPT_RT
+      preempt_disable();
+#endif
+}
+
+static void memcg_stats_unlock(void)
+{
+#ifdef CONFIG_PREEMPT_RT
+      preempt_enable();
+#endif
+}
+
 static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
 {
 	unsigned int x;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:769 @ void __mod_memcg_lruvec_state(struct lru
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	memcg = pn->memcg;
 
+	/*
+	 * The caller from rmap relay on disabled preemption becase they never
+	 * update their counter from in-interrupt context. For these two
+	 * counters we check that the update is never performed from an
+	 * interrupt context while other caller need to have disabled interrupt.
+	 */
+	__memcg_stats_lock();
+	if (IS_ENABLED(CONFIG_DEBUG_VM) && !IS_ENABLED(CONFIG_PREEMPT_RT)) {
+		switch (idx) {
+		case NR_ANON_MAPPED:
+		case NR_FILE_MAPPED:
+		case NR_ANON_THPS:
+		case NR_SHMEM_PMDMAPPED:
+		case NR_FILE_PMDMAPPED:
+			WARN_ON_ONCE(!in_task());
+			break;
+		default:
+			WARN_ON_ONCE(!irqs_disabled());
+		}
+	}
+
 	/* Update memcg */
 	__this_cpu_add(memcg->vmstats_percpu->state[idx], val);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:797 @ void __mod_memcg_lruvec_state(struct lru
 	__this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
 
 	memcg_rstat_updated(memcg, val);
+	memcg_stats_unlock();
 }
 
 /**
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:898 @ void __count_memcg_events(struct mem_cgr
 	if (mem_cgroup_disabled())
 		return;
 
+	memcg_stats_lock();
 	__this_cpu_add(memcg->vmstats_percpu->events[idx], count);
 	memcg_rstat_updated(memcg, count);
+	memcg_stats_unlock();
 }
 
 static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:965 @ static bool mem_cgroup_event_ratelimit(s
  */
 static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 {
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		return;
+
 	/* threshold event is triggered in finer grain than soft limit */
 	if (unlikely(mem_cgroup_event_ratelimit(memcg,
 						MEM_CGROUP_TARGET_THRESH))) {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2161 @ void unlock_page_memcg(struct page *page
 }
 EXPORT_SYMBOL(unlock_page_memcg);
 
-struct obj_stock {
+struct memcg_stock_pcp {
+	local_lock_t stock_lock;
+	struct mem_cgroup *cached; /* this never be root cgroup */
+	unsigned int nr_pages;
+
 #ifdef CONFIG_MEMCG_KMEM
 	struct obj_cgroup *cached_objcg;
 	struct pglist_data *cached_pgdat;
 	unsigned int nr_bytes;
 	int nr_slab_reclaimable_b;
 	int nr_slab_unreclaimable_b;
-#else
-	int dummy[0];
 #endif
-};
-
-struct memcg_stock_pcp {
-	struct mem_cgroup *cached; /* this never be root cgroup */
-	unsigned int nr_pages;
-	struct obj_stock task_obj;
-	struct obj_stock irq_obj;
 
 	struct work_struct work;
 	unsigned long flags;
 #define FLUSHING_CACHED_CHARGE	0
 };
-static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
+static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = {
+	.stock_lock = INIT_LOCAL_LOCK(stock_lock),
+};
 static DEFINE_MUTEX(percpu_charge_mutex);
 
 #ifdef CONFIG_MEMCG_KMEM
-static void drain_obj_stock(struct obj_stock *stock);
+static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock);
 static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
 				     struct mem_cgroup *root_memcg);
 
 #else
-static inline void drain_obj_stock(struct obj_stock *stock)
+static inline struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
 {
+	return NULL;
 }
 static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
 				     struct mem_cgroup *root_memcg)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2201 @ static bool obj_stock_flush_required(str
 #endif
 
 /*
- * Most kmem_cache_alloc() calls are from user context. The irq disable/enable
- * sequence used in this case to access content from object stock is slow.
- * To optimize for user context access, there are now two object stocks for
- * task context and interrupt context access respectively.
- *
- * The task context object stock can be accessed by disabling preemption only
- * which is cheap in non-preempt kernel. The interrupt context object stock
- * can only be accessed after disabling interrupt. User context code can
- * access interrupt object stock, but not vice versa.
- */
-static inline struct obj_stock *get_obj_stock(unsigned long *pflags)
-{
-	struct memcg_stock_pcp *stock;
-
-	if (likely(in_task())) {
-		*pflags = 0UL;
-		preempt_disable();
-		stock = this_cpu_ptr(&memcg_stock);
-		return &stock->task_obj;
-	}
-
-	local_irq_save(*pflags);
-	stock = this_cpu_ptr(&memcg_stock);
-	return &stock->irq_obj;
-}
-
-static inline void put_obj_stock(unsigned long flags)
-{
-	if (likely(in_task()))
-		preempt_enable();
-	else
-		local_irq_restore(flags);
-}
-
-/**
  * consume_stock: Try to consume stocked charge on this cpu.
  * @memcg: memcg to consume from.
  * @nr_pages: how many pages to charge.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2220 @ static bool consume_stock(struct mem_cgr
 	if (nr_pages > MEMCG_CHARGE_BATCH)
 		return ret;
 
-	local_irq_save(flags);
+	local_lock_irqsave(&memcg_stock.stock_lock, flags);
 
 	stock = this_cpu_ptr(&memcg_stock);
 	if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2228 @ static bool consume_stock(struct mem_cgr
 		ret = true;
 	}
 
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
 
 	return ret;
 }
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2257 @ static void drain_stock(struct memcg_sto
 static void drain_local_stock(struct work_struct *dummy)
 {
 	struct memcg_stock_pcp *stock;
+	struct obj_cgroup *old = NULL;
 	unsigned long flags;
 
 	/*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2265 @ static void drain_local_stock(struct wor
 	 * drain_stock races is that we always operate on local CPU stock
 	 * here with IRQ disabled
 	 */
-	local_irq_save(flags);
+	local_lock_irqsave(&memcg_stock.stock_lock, flags);
 
 	stock = this_cpu_ptr(&memcg_stock);
-	drain_obj_stock(&stock->irq_obj);
-	if (in_task())
-		drain_obj_stock(&stock->task_obj);
+	old = drain_obj_stock(stock);
 	drain_stock(stock);
 	clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
 
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+	if (old)
+		obj_cgroup_put(old);
 }
 
 /*
  * Cache charges(val) to local per_cpu area.
  * This will be consumed by consume_stock() function, later.
  */
-static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
 	struct memcg_stock_pcp *stock;
-	unsigned long flags;
-
-	local_irq_save(flags);
 
 	stock = this_cpu_ptr(&memcg_stock);
 	if (stock->cached != memcg) { /* reset if necessary */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2295 @ static void refill_stock(struct mem_cgro
 
 	if (stock->nr_pages > MEMCG_CHARGE_BATCH)
 		drain_stock(stock);
+}
 
-	local_irq_restore(flags);
+static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+{
+	unsigned long flags;
+
+	local_lock_irqsave(&memcg_stock.stock_lock, flags);
+	__refill_stock(memcg, nr_pages);
+	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
 }
 
 /*
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2323 @ static void drain_all_stock(struct mem_c
 	 * as well as workers from this path always operate on the local
 	 * per-cpu data. CPU up doesn't touch memcg_stock at all.
 	 */
-	curcpu = get_cpu();
+	migrate_disable();
+	curcpu = smp_processor_id();
 	for_each_online_cpu(cpu) {
 		struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
 		struct mem_cgroup *memcg;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2347 @ static void drain_all_stock(struct mem_c
 				schedule_work_on(cpu, &stock->work);
 		}
 	}
-	put_cpu();
+	migrate_enable();
 	mutex_unlock(&percpu_charge_mutex);
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3112 @ void __memcg_kmem_uncharge_page(struct p
 void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
 		     enum node_stat_item idx, int nr)
 {
+	struct memcg_stock_pcp *stock;
+	struct obj_cgroup *old = NULL;
 	unsigned long flags;
-	struct obj_stock *stock = get_obj_stock(&flags);
 	int *bytes;
 
+	local_lock_irqsave(&memcg_stock.stock_lock, flags);
+	stock = this_cpu_ptr(&memcg_stock);
+
 	/*
 	 * Save vmstat data in stock and skip vmstat array update unless
 	 * accumulating over a page of vmstat data or when pgdat or idx
 	 * changes.
 	 */
 	if (stock->cached_objcg != objcg) {
-		drain_obj_stock(stock);
+		old = drain_obj_stock(stock);
 		obj_cgroup_get(objcg);
 		stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
 				? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3170 @ void mod_objcg_state(struct obj_cgroup *
 	if (nr)
 		mod_objcg_mlstate(objcg, pgdat, idx, nr);
 
-	put_obj_stock(flags);
+	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+	if (old)
+		obj_cgroup_put(old);
 }
 
 static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 {
+	struct memcg_stock_pcp *stock;
 	unsigned long flags;
-	struct obj_stock *stock = get_obj_stock(&flags);
 	bool ret = false;
 
+	local_lock_irqsave(&memcg_stock.stock_lock, flags);
+
+	stock = this_cpu_ptr(&memcg_stock);
 	if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
 		stock->nr_bytes -= nr_bytes;
 		ret = true;
 	}
 
-	put_obj_stock(flags);
+	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
 
 	return ret;
 }
 
-static void drain_obj_stock(struct obj_stock *stock)
+static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
 {
 	struct obj_cgroup *old = stock->cached_objcg;
 
 	if (!old)
-		return;
+		return NULL;
 
 	if (stock->nr_bytes) {
 		unsigned int nr_pages = stock->nr_bytes >> PAGE_SHIFT;
 		unsigned int nr_bytes = stock->nr_bytes & (PAGE_SIZE - 1);
 
-		if (nr_pages)
-			obj_cgroup_uncharge_pages(old, nr_pages);
+		if (nr_pages) {
+			struct mem_cgroup *memcg;
+
+			memcg = get_mem_cgroup_from_objcg(old);
+
+			if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+				page_counter_uncharge(&memcg->kmem, nr_pages);
+
+			__refill_stock(memcg, nr_pages);
+
+			css_put(&memcg->css);
+		}
 
 		/*
 		 * The leftover is flushed to the centralized per-memcg value.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3251 @ static void drain_obj_stock(struct obj_s
 		stock->cached_pgdat = NULL;
 	}
 
-	obj_cgroup_put(old);
 	stock->cached_objcg = NULL;
+	/*
+	 * The `old' objects needs to be released by the caller via
+	 * obj_cgroup_put() outside of memcg_stock_pcp::stock_lock.
+	 */
+	return old;
 }
 
 static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3264 @ static bool obj_stock_flush_required(str
 {
 	struct mem_cgroup *memcg;
 
-	if (in_task() && stock->task_obj.cached_objcg) {
-		memcg = obj_cgroup_memcg(stock->task_obj.cached_objcg);
-		if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
-			return true;
-	}
-	if (stock->irq_obj.cached_objcg) {
-		memcg = obj_cgroup_memcg(stock->irq_obj.cached_objcg);
+	if (stock->cached_objcg) {
+		memcg = obj_cgroup_memcg(stock->cached_objcg);
 		if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
 			return true;
 	}
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3276 @ static bool obj_stock_flush_required(str
 static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
 			     bool allow_uncharge)
 {
+	struct memcg_stock_pcp *stock;
+	struct obj_cgroup *old = NULL;
 	unsigned long flags;
-	struct obj_stock *stock = get_obj_stock(&flags);
 	unsigned int nr_pages = 0;
 
+	local_lock_irqsave(&memcg_stock.stock_lock, flags);
+
+	stock = this_cpu_ptr(&memcg_stock);
 	if (stock->cached_objcg != objcg) { /* reset if necessary */
-		drain_obj_stock(stock);
+		old = drain_obj_stock(stock);
 		obj_cgroup_get(objcg);
 		stock->cached_objcg = objcg;
 		stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3299 @ static void refill_obj_stock(struct obj_
 		stock->nr_bytes &= (PAGE_SIZE - 1);
 	}
 
-	put_obj_stock(flags);
+	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+	if (old)
+		obj_cgroup_put(old);
 
 	if (nr_pages)
 		obj_cgroup_uncharge_pages(objcg, nr_pages);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3868 @ static ssize_t mem_cgroup_write(struct k
 		}
 		break;
 	case RES_SOFT_LIMIT:
-		memcg->soft_limit = nr_pages;
-		ret = 0;
+		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+			ret = -EOPNOTSUPP;
+		} else {
+			memcg->soft_limit = nr_pages;
+			ret = 0;
+		}
 		break;
 	}
 	return ret ?: nbytes;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4854 @ static ssize_t memcg_write_event_control
 	char *endp;
 	int ret;
 
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		return -EOPNOTSUPP;
+
 	buf = strstrip(buf);
 
 	efd = simple_strtoul(buf, &endp, 10);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:6948 @ static void uncharge_page(struct page *p
 	unsigned long nr_pages;
 	struct mem_cgroup *memcg;
 	struct obj_cgroup *objcg;
-	bool use_objcg = PageMemcgKmem(page);
 
 	VM_BUG_ON_PAGE(PageLRU(page), page);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:6956 @ static void uncharge_page(struct page *p
 	 * page memcg or objcg at this point, we have fully
 	 * exclusive access to the page.
 	 */
-	if (use_objcg) {
+	if (PageMemcgKmem(page)) {
 		objcg = __page_objcg(page);
 		/*
 		 * This get matches the put at the end of the function and
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:6984 @ static void uncharge_page(struct page *p
 
 	nr_pages = compound_nr(page);
 
-	if (use_objcg) {
+	if (PageMemcgKmem(page)) {
 		ug->nr_memory += nr_pages;
 		ug->nr_kmem += nr_pages;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:7314 @ void mem_cgroup_swapout(struct page *pag
 	 * important here to have the interrupts disabled because it is the
 	 * only synchronisation we have for updating the per-CPU variables.
 	 */
-	VM_BUG_ON(!irqs_disabled());
+	memcg_stats_lock();
 	mem_cgroup_charge_statistics(memcg, page, -nr_entries);
+	memcg_stats_unlock();
 	memcg_check_events(memcg, page);
 
 	css_put(&memcg->css);
Index: linux-5.15/mm/memory.c
===================================================================
--- linux-5.15.orig/mm/memory.c
+++ linux-5.15/mm/memory.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:5295 @ void __might_fault(const char *file, int
 		return;
 	if (pagefault_disabled())
 		return;
-	__might_sleep(file, line, 0);
+	__might_sleep(file, line);
 #if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
 	if (current->mm)
 		might_lock_read(&current->mm->mmap_lock);
Index: linux-5.15/mm/page_alloc.c
===================================================================
--- linux-5.15.orig/mm/page_alloc.c
+++ linux-5.15/mm/page_alloc.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3152 @ static void drain_local_pages_wq(struct
 	 * cpu which is alright but we also have to make sure to not move to
 	 * a different one.
 	 */
-	preempt_disable();
+	migrate_disable();
 	drain_local_pages(drain->zone);
-	preempt_enable();
+	migrate_enable();
 }
 
 /*
Index: linux-5.15/mm/vmalloc.c
===================================================================
--- linux-5.15.orig/mm/vmalloc.c
+++ linux-5.15/mm/vmalloc.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1921 @ static void *new_vmap_block(unsigned int
 		return ERR_PTR(err);
 	}
 
-	vbq = &get_cpu_var(vmap_block_queue);
+	get_cpu_light();
+	vbq = this_cpu_ptr(&vmap_block_queue);
 	spin_lock(&vbq->lock);
 	list_add_tail_rcu(&vb->free_list, &vbq->free);
 	spin_unlock(&vbq->lock);
-	put_cpu_var(vmap_block_queue);
+	put_cpu_light();
 
 	return vaddr;
 }
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2005 @ static void *vb_alloc(unsigned long size
 	order = get_order(size);
 
 	rcu_read_lock();
-	vbq = &get_cpu_var(vmap_block_queue);
+	get_cpu_light();
+	vbq = this_cpu_ptr(&vmap_block_queue);
 	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
 		unsigned long pages_off;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2029 @ static void *vb_alloc(unsigned long size
 		break;
 	}
 
-	put_cpu_var(vmap_block_queue);
+	put_cpu_light();
 	rcu_read_unlock();
 
 	/* Allocate new block if nothing was found */
Index: linux-5.15/mm/workingset.c
===================================================================
--- linux-5.15.orig/mm/workingset.c
+++ linux-5.15/mm/workingset.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:436 @ static struct list_lru shadow_nodes;
 
 void workingset_update_node(struct xa_node *node)
 {
+	struct address_space *mapping;
+
 	/*
 	 * Track non-empty nodes that contain only shadow entries;
 	 * unlink those that contain pages or are being freed.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:446 @ void workingset_update_node(struct xa_no
 	 * already where they should be. The list_empty() test is safe
 	 * as node->private_list is protected by the i_pages lock.
 	 */
-	VM_WARN_ON_ONCE(!irqs_disabled());  /* For __inc_lruvec_page_state */
+	mapping = container_of(node->array, struct address_space, i_pages);
+	lockdep_assert_held(&mapping->i_pages.xa_lock);
 
 	if (node->count && node->count == node->nr_values) {
 		if (list_empty(&node->private_list)) {
Index: linux-5.15/mm/zsmalloc.c
===================================================================
--- linux-5.15.orig/mm/zsmalloc.c
+++ linux-5.15/mm/zsmalloc.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:60 @
 #include <linux/wait.h>
 #include <linux/pagemap.h>
 #include <linux/fs.h>
+#include <linux/local_lock.h>
 
 #define ZSPAGE_MAGIC	0x58
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:81 @
 
 #define ZS_HANDLE_SIZE (sizeof(unsigned long))
 
+#ifdef CONFIG_PREEMPT_RT
+
+struct zsmalloc_handle {
+	unsigned long addr;
+	spinlock_t lock;
+};
+
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
+
+#else
+
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
+#endif
+
 /*
  * Object location (<PFN>, <obj_idx>) is encoded as
  * a single (unsigned long) handle value.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:311 @ struct zspage {
 };
 
 struct mapping_area {
+	local_lock_t lock;
 	char *vm_buf; /* copy buffer for objects that span pages */
 	char *vm_addr; /* address of kmap_atomic()'ed pages */
 	enum zs_mapmode vm_mm; /* mapping mode */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:341 @ static void SetZsPageMovable(struct zs_p
 
 static int create_cache(struct zs_pool *pool)
 {
-	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
+	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
 					0, 0, NULL);
 	if (!pool->handle_cachep)
 		return 1;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:365 @ static void destroy_cache(struct zs_pool
 
 static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
 {
-	return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
-			gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+	void *p;
+
+	p = kmem_cache_alloc(pool->handle_cachep,
+			     gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+#ifdef CONFIG_PREEMPT_RT
+	if (p) {
+		struct zsmalloc_handle *zh = p;
+
+		spin_lock_init(&zh->lock);
+	}
+#endif
+	return (unsigned long)p;
 }
 
+#ifdef CONFIG_PREEMPT_RT
+static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
+{
+	return (void *)(handle & ~((1 << OBJ_TAG_BITS) - 1));
+}
+#endif
+
 static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
 {
 	kmem_cache_free(pool->handle_cachep, (void *)handle);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:404 @ static void cache_free_zspage(struct zs_
 
 static void record_obj(unsigned long handle, unsigned long obj)
 {
+#ifdef CONFIG_PREEMPT_RT
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	WRITE_ONCE(zh->addr, obj);
+#else
 	/*
 	 * lsb of @obj represents handle lock while other bits
 	 * represent object value the handle is pointing so
 	 * updating shouldn't do store tearing.
 	 */
 	WRITE_ONCE(*(unsigned long *)handle, obj);
+#endif
 }
 
 /* zpool driver */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:497 @ MODULE_ALIAS("zpool-zsmalloc");
 #endif /* CONFIG_ZPOOL */
 
 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
-static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
+static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = {
+	.lock	= INIT_LOCAL_LOCK(lock),
+};
 
 static bool is_zspage_isolated(struct zspage *zspage)
 {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:906 @ static unsigned long location_to_obj(str
 
 static unsigned long handle_to_obj(unsigned long handle)
 {
+#ifdef CONFIG_PREEMPT_RT
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return zh->addr;
+#else
 	return *(unsigned long *)handle;
+#endif
 }
 
 static unsigned long obj_to_head(struct page *page, void *obj)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:926 @ static unsigned long obj_to_head(struct
 
 static inline int testpin_tag(unsigned long handle)
 {
+#ifdef CONFIG_PREEMPT_RT
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return spin_is_locked(&zh->lock);
+#else
 	return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
 }
 
 static inline int trypin_tag(unsigned long handle)
 {
+#ifdef CONFIG_PREEMPT_RT
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return spin_trylock(&zh->lock);
+#else
 	return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
 }
 
 static void pin_tag(unsigned long handle) __acquires(bitlock)
 {
+#ifdef CONFIG_PREEMPT_RT
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return spin_lock(&zh->lock);
+#else
 	bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
 }
 
 static void unpin_tag(unsigned long handle) __releases(bitlock)
 {
+#ifdef CONFIG_PREEMPT_RT
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return spin_unlock(&zh->lock);
+#else
 	bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
 }
 
 static void reset_page(struct page *page)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1348 @ void *zs_map_object(struct zs_pool *pool
 	class = pool->size_class[class_idx];
 	off = (class->size * obj_idx) & ~PAGE_MASK;
 
-	area = &get_cpu_var(zs_map_area);
+	local_lock(&zs_map_area.lock);
+	area = this_cpu_ptr(&zs_map_area);
 	area->vm_mm = mm;
 	if (off + class->size <= PAGE_SIZE) {
 		/* this object is contained entirely within a page */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1403 @ void zs_unmap_object(struct zs_pool *poo
 
 		__zs_unmap_object(area, pages, off, class->size);
 	}
-	put_cpu_var(zs_map_area);
+	local_unlock(&zs_map_area.lock);
 
 	migrate_read_unlock(zspage);
 	unpin_tag(handle);
Index: linux-5.15/net/Kconfig
===================================================================
--- linux-5.15.orig/net/Kconfig
+++ linux-5.15/net/Kconfig
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:297 @ config CGROUP_NET_CLASSID
 
 config NET_RX_BUSY_POLL
 	bool
-	default y
+	default y if !PREEMPT_RT
 
 config BQL
 	bool
Index: linux-5.15/net/core/dev.c
===================================================================
--- linux-5.15.orig/net/core/dev.c
+++ linux-5.15/net/core/dev.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:228 @ static inline struct hlist_head *dev_ind
 static inline void rps_lock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
-	spin_lock(&sd->input_pkt_queue.lock);
+	raw_spin_lock(&sd->input_pkt_queue.raw_lock);
 #endif
 }
 
 static inline void rps_unlock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
-	spin_unlock(&sd->input_pkt_queue.lock);
+	raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
 #endif
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3067 @ static void __netif_reschedule(struct Qd
 	sd->output_queue_tailp = &q->next_sched;
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_restore(flags);
+	preempt_check_resched_rt();
 }
 
 void __netif_schedule(struct Qdisc *q)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3130 @ void __dev_kfree_skb_irq(struct sk_buff
 	__this_cpu_write(softnet_data.completion_queue, skb);
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_restore(flags);
+	preempt_check_resched_rt();
 }
 EXPORT_SYMBOL(__dev_kfree_skb_irq);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:3862 @ no_lock_out:
 	 * This permits qdisc->running owner to get the lock more
 	 * often and dequeue packets faster.
 	 */
+#ifdef CONFIG_PREEMPT_RT
+	contended = true;
+#else
 	contended = qdisc_is_running(q);
+#endif
 	if (unlikely(contended))
 		spin_lock(&q->busylock);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4692 @ drop:
 	rps_unlock(sd);
 
 	local_irq_restore(flags);
+	preempt_check_resched_rt();
 
 	atomic_long_inc(&skb->dev->rx_dropped);
 	kfree_skb(skb);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4933 @ static int netif_rx_internal(struct sk_b
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu;
 
-		preempt_disable();
+		migrate_disable();
 		rcu_read_lock();
 
 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4943 @ static int netif_rx_internal(struct sk_b
 		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 
 		rcu_read_unlock();
-		preempt_enable();
+		migrate_enable();
 	} else
 #endif
 	{
 		unsigned int qtail;
 
-		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
-		put_cpu();
+		ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
+		put_cpu_light();
 	}
 	return ret;
 }
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:4989 @ int netif_rx_ni(struct sk_buff *skb)
 
 	trace_netif_rx_ni_entry(skb);
 
-	preempt_disable();
+	local_bh_disable();
 	err = netif_rx_internal(skb);
-	if (local_softirq_pending())
-		do_softirq();
-	preempt_enable();
+	local_bh_enable();
 	trace_netif_rx_ni_exit(err);
 
 	return err;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:6435 @ static void net_rps_action_and_irq_enabl
 		sd->rps_ipi_list = NULL;
 
 		local_irq_enable();
+		preempt_check_resched_rt();
 
 		/* Send pending IPI's to kick RPS processing on remote cpus. */
 		net_rps_send_ipi(remsd);
 	} else
 #endif
 		local_irq_enable();
+	preempt_check_resched_rt();
 }
 
 static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:6520 @ void __napi_schedule(struct napi_struct
 	local_irq_save(flags);
 	____napi_schedule(this_cpu_ptr(&softnet_data), n);
 	local_irq_restore(flags);
+	preempt_check_resched_rt();
 }
 EXPORT_SYMBOL(__napi_schedule);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:11343 @ static int dev_cpu_dead(unsigned int old
 
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_enable();
+	preempt_check_resched_rt();
 
 #ifdef CONFIG_RPS
 	remsd = oldsd->rps_ipi_list;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:11357 @ static int dev_cpu_dead(unsigned int old
 		netif_rx_ni(skb);
 		input_queue_head_incr(oldsd);
 	}
-	while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
+	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx_ni(skb);
 		input_queue_head_incr(oldsd);
 	}
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:11672 @ static int __init net_dev_init(void)
 
 		INIT_WORK(flush, flush_backlog);
 
-		skb_queue_head_init(&sd->input_pkt_queue);
+		skb_queue_head_init_raw(&sd->input_pkt_queue);
 		skb_queue_head_init(&sd->process_queue);
 #ifdef CONFIG_XFRM_OFFLOAD
 		skb_queue_head_init(&sd->xfrm_backlog);
Index: linux-5.15/net/core/gen_estimator.c
===================================================================
--- linux-5.15.orig/net/core/gen_estimator.c
+++ linux-5.15/net/core/gen_estimator.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:43 @
  */
 
 struct net_rate_estimator {
-	struct gnet_stats_basic_packed	*bstats;
+	struct gnet_stats_basic_sync	*bstats;
 	spinlock_t		*stats_lock;
-	seqcount_t		*running;
-	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+	bool			running;
+	struct gnet_stats_basic_sync __percpu *cpu_bstats;
 	u8			ewma_log;
 	u8			intvl_log; /* period : (250ms << intvl_log) */
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:63 @ struct net_rate_estimator {
 };
 
 static void est_fetch_counters(struct net_rate_estimator *e,
-			       struct gnet_stats_basic_packed *b)
+			       struct gnet_stats_basic_sync *b)
 {
-	memset(b, 0, sizeof(*b));
+	gnet_stats_basic_sync_init(b);
 	if (e->stats_lock)
 		spin_lock(e->stats_lock);
 
-	__gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats);
+	gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running);
 
 	if (e->stats_lock)
 		spin_unlock(e->stats_lock);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:79 @ static void est_fetch_counters(struct ne
 static void est_timer(struct timer_list *t)
 {
 	struct net_rate_estimator *est = from_timer(est, t, timer);
-	struct gnet_stats_basic_packed b;
+	struct gnet_stats_basic_sync b;
+	u64 b_bytes, b_packets;
 	u64 rate, brate;
 
 	est_fetch_counters(est, &b);
-	brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
+	b_bytes = u64_stats_read(&b.bytes);
+	b_packets = u64_stats_read(&b.packets);
+
+	brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log);
 	brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
 
-	rate = (b.packets - est->last_packets) << (10 - est->intvl_log);
+	rate = (b_packets - est->last_packets) << (10 - est->intvl_log);
 	rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
 
 	write_seqcount_begin(&est->seq);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:98 @ static void est_timer(struct timer_list
 	est->avpps += rate;
 	write_seqcount_end(&est->seq);
 
-	est->last_bytes = b.bytes;
-	est->last_packets = b.packets;
+	est->last_bytes = b_bytes;
+	est->last_packets = b_packets;
 
 	est->next_jiffies += ((HZ/4) << est->intvl_log);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:116 @ static void est_timer(struct timer_list
  * @cpu_bstats: bstats per cpu
  * @rate_est: rate estimator statistics
  * @lock: lock for statistics and control path
- * @running: qdisc running seqcount
+ * @running: true if @bstats represents a running qdisc, thus @bstats'
+ *           internal values might change during basic reads. Only used
+ *           if @bstats_cpu is NULL
  * @opt: rate estimator configuration TLV
  *
  * Creates a new rate estimator with &bstats as source and &rate_est
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:130 @ static void est_timer(struct timer_list
  * Returns 0 on success or a negative error code.
  *
  */
-int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
-		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
+		      struct gnet_stats_basic_sync __percpu *cpu_bstats,
 		      struct net_rate_estimator __rcu **rate_est,
 		      spinlock_t *lock,
-		      seqcount_t *running,
+		      bool running,
 		      struct nlattr *opt)
 {
 	struct gnet_estimator *parm = nla_data(opt);
 	struct net_rate_estimator *old, *est;
-	struct gnet_stats_basic_packed b;
+	struct gnet_stats_basic_sync b;
 	int intvl_log;
 
 	if (nla_len(opt) < sizeof(*parm))
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:173 @ int gen_new_estimator(struct gnet_stats_
 	est_fetch_counters(est, &b);
 	if (lock)
 		local_bh_enable();
-	est->last_bytes = b.bytes;
-	est->last_packets = b.packets;
+	est->last_bytes = u64_stats_read(&b.bytes);
+	est->last_packets = u64_stats_read(&b.packets);
 
 	if (lock)
 		spin_lock_bh(lock);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:223 @ EXPORT_SYMBOL(gen_kill_estimator);
  * @cpu_bstats: bstats per cpu
  * @rate_est: rate estimator statistics
  * @lock: lock for statistics and control path
- * @running: qdisc running seqcount (might be NULL)
+ * @running: true if @bstats represents a running qdisc, thus @bstats'
+ *           internal values might change during basic reads. Only used
+ *           if @cpu_bstats is NULL
  * @opt: rate estimator configuration TLV
  *
  * Replaces the configuration of a rate estimator by calling
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:233 @ EXPORT_SYMBOL(gen_kill_estimator);
  *
  * Returns 0 on success or a negative error code.
  */
-int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
-			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
+			  struct gnet_stats_basic_sync __percpu *cpu_bstats,
 			  struct net_rate_estimator __rcu **rate_est,
 			  spinlock_t *lock,
-			  seqcount_t *running, struct nlattr *opt)
+			  bool running, struct nlattr *opt)
 {
 	return gen_new_estimator(bstats, cpu_bstats, rate_est,
 				 lock, running, opt);
Index: linux-5.15/net/core/gen_stats.c
===================================================================
--- linux-5.15.orig/net/core/gen_stats.c
+++ linux-5.15/net/core/gen_stats.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:21 @
 #include <linux/gen_stats.h>
 #include <net/netlink.h>
 #include <net/gen_stats.h>
-
+#include <net/sch_generic.h>
 
 static inline int
 gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:117 @ gnet_stats_start_copy(struct sk_buff *sk
 }
 EXPORT_SYMBOL(gnet_stats_start_copy);
 
-static void
-__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
-			    struct gnet_stats_basic_cpu __percpu *cpu)
+/* Must not be inlined, due to u64_stats seqcount_t lockdep key */
+void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b)
 {
+	u64_stats_set(&b->bytes, 0);
+	u64_stats_set(&b->packets, 0);
+	u64_stats_init(&b->syncp);
+}
+EXPORT_SYMBOL(gnet_stats_basic_sync_init);
+
+static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats,
+				     struct gnet_stats_basic_sync __percpu *cpu)
+{
+	u64 t_bytes = 0, t_packets = 0;
 	int i;
 
 	for_each_possible_cpu(i) {
-		struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
+		struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i);
 		unsigned int start;
 		u64 bytes, packets;
 
 		do {
 			start = u64_stats_fetch_begin_irq(&bcpu->syncp);
-			bytes = bcpu->bstats.bytes;
-			packets = bcpu->bstats.packets;
+			bytes = u64_stats_read(&bcpu->bytes);
+			packets = u64_stats_read(&bcpu->packets);
 		} while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
 
-		bstats->bytes += bytes;
-		bstats->packets += packets;
+		t_bytes += bytes;
+		t_packets += packets;
 	}
+	_bstats_update(bstats, t_bytes, t_packets);
 }
 
-void
-__gnet_stats_copy_basic(const seqcount_t *running,
-			struct gnet_stats_basic_packed *bstats,
-			struct gnet_stats_basic_cpu __percpu *cpu,
-			struct gnet_stats_basic_packed *b)
+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+			  struct gnet_stats_basic_sync __percpu *cpu,
+			  struct gnet_stats_basic_sync *b, bool running)
 {
-	unsigned int seq;
+	unsigned int start;
+	u64 bytes = 0;
+	u64 packets = 0;
+
+	WARN_ON_ONCE((cpu || running) && in_hardirq());
 
 	if (cpu) {
-		__gnet_stats_copy_basic_cpu(bstats, cpu);
+		gnet_stats_add_basic_cpu(bstats, cpu);
 		return;
 	}
 	do {
 		if (running)
-			seq = read_seqcount_begin(running);
-		bstats->bytes = b->bytes;
-		bstats->packets = b->packets;
-	} while (running && read_seqcount_retry(running, seq));
+			start = u64_stats_fetch_begin_irq(&b->syncp);
+		bytes = u64_stats_read(&b->bytes);
+		packets = u64_stats_read(&b->packets);
+	} while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
+
+	_bstats_update(bstats, bytes, packets);
+}
+EXPORT_SYMBOL(gnet_stats_add_basic);
+
+static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets,
+				  struct gnet_stats_basic_sync __percpu *cpu,
+				  struct gnet_stats_basic_sync *b, bool running)
+{
+	unsigned int start;
+
+	if (cpu) {
+		u64 t_bytes = 0, t_packets = 0;
+		int i;
+
+		for_each_possible_cpu(i) {
+			struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i);
+			unsigned int start;
+			u64 bytes, packets;
+
+			do {
+				start = u64_stats_fetch_begin_irq(&bcpu->syncp);
+				bytes = u64_stats_read(&bcpu->bytes);
+				packets = u64_stats_read(&bcpu->packets);
+			} while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
+
+			t_bytes += bytes;
+			t_packets += packets;
+		}
+		*ret_bytes = t_bytes;
+		*ret_packets = t_packets;
+		return;
+	}
+	do {
+		if (running)
+			start = u64_stats_fetch_begin_irq(&b->syncp);
+		*ret_bytes = u64_stats_read(&b->bytes);
+		*ret_packets = u64_stats_read(&b->packets);
+	} while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
 }
-EXPORT_SYMBOL(__gnet_stats_copy_basic);
 
 static int
-___gnet_stats_copy_basic(const seqcount_t *running,
-			 struct gnet_dump *d,
-			 struct gnet_stats_basic_cpu __percpu *cpu,
-			 struct gnet_stats_basic_packed *b,
-			 int type)
+___gnet_stats_copy_basic(struct gnet_dump *d,
+			 struct gnet_stats_basic_sync __percpu *cpu,
+			 struct gnet_stats_basic_sync *b,
+			 int type, bool running)
 {
-	struct gnet_stats_basic_packed bstats = {0};
+	u64 bstats_bytes, bstats_packets;
 
-	__gnet_stats_copy_basic(running, &bstats, cpu, b);
+	gnet_stats_read_basic(&bstats_bytes, &bstats_packets, cpu, b, running);
 
 	if (d->compat_tc_stats && type == TCA_STATS_BASIC) {
-		d->tc_stats.bytes = bstats.bytes;
-		d->tc_stats.packets = bstats.packets;
+		d->tc_stats.bytes = bstats_bytes;
+		d->tc_stats.packets = bstats_packets;
 	}
 
 	if (d->tail) {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:230 @ ___gnet_stats_copy_basic(const seqcount_
 		int res;
 
 		memset(&sb, 0, sizeof(sb));
-		sb.bytes = bstats.bytes;
-		sb.packets = bstats.packets;
+		sb.bytes = bstats_bytes;
+		sb.packets = bstats_packets;
 		res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD);
-		if (res < 0 || sb.packets == bstats.packets)
+		if (res < 0 || sb.packets == bstats_packets)
 			return res;
 		/* emit 64bit stats only if needed */
-		return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets,
-				       sizeof(bstats.packets), TCA_STATS_PAD);
+		return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats_packets,
+				       sizeof(bstats_packets), TCA_STATS_PAD);
 	}
 	return 0;
 }
 
 /**
  * gnet_stats_copy_basic - copy basic statistics into statistic TLV
- * @running: seqcount_t pointer
  * @d: dumping handle
  * @cpu: copy statistic per cpu
  * @b: basic statistics
+ * @running: true if @b represents a running qdisc, thus @b's
+ *           internal values might change during basic reads.
+ *           Only used if @cpu is NULL
+ *
+ * Context: task; must not be run from IRQ or BH contexts
  *
  * Appends the basic statistics to the top level TLV created by
  * gnet_stats_start_copy().
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:260 @ ___gnet_stats_copy_basic(const seqcount_
  * if the room in the socket buffer was not sufficient.
  */
 int
-gnet_stats_copy_basic(const seqcount_t *running,
-		      struct gnet_dump *d,
-		      struct gnet_stats_basic_cpu __percpu *cpu,
-		      struct gnet_stats_basic_packed *b)
+gnet_stats_copy_basic(struct gnet_dump *d,
+		      struct gnet_stats_basic_sync __percpu *cpu,
+		      struct gnet_stats_basic_sync *b,
+		      bool running)
 {
-	return ___gnet_stats_copy_basic(running, d, cpu, b,
-					TCA_STATS_BASIC);
+	return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running);
 }
 EXPORT_SYMBOL(gnet_stats_copy_basic);
 
 /**
  * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV
- * @running: seqcount_t pointer
  * @d: dumping handle
  * @cpu: copy statistic per cpu
  * @b: basic statistics
+ * @running: true if @b represents a running qdisc, thus @b's
+ *           internal values might change during basic reads.
+ *           Only used if @cpu is NULL
+ *
+ * Context: task; must not be run from IRQ or BH contexts
  *
  * Appends the basic statistics to the top level TLV created by
  * gnet_stats_start_copy().
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:287 @ EXPORT_SYMBOL(gnet_stats_copy_basic);
  * if the room in the socket buffer was not sufficient.
  */
 int
-gnet_stats_copy_basic_hw(const seqcount_t *running,
-			 struct gnet_dump *d,
-			 struct gnet_stats_basic_cpu __percpu *cpu,
-			 struct gnet_stats_basic_packed *b)
+gnet_stats_copy_basic_hw(struct gnet_dump *d,
+			 struct gnet_stats_basic_sync __percpu *cpu,
+			 struct gnet_stats_basic_sync *b,
+			 bool running)
 {
-	return ___gnet_stats_copy_basic(running, d, cpu, b,
-					TCA_STATS_BASIC_HW);
+	return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running);
 }
 EXPORT_SYMBOL(gnet_stats_copy_basic_hw);
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:340 @ gnet_stats_copy_rate_est(struct gnet_dum
 }
 EXPORT_SYMBOL(gnet_stats_copy_rate_est);
 
-static void
-__gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
-			    const struct gnet_stats_queue __percpu *q)
+static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats,
+				     const struct gnet_stats_queue __percpu *q)
 {
 	int i;
 
 	for_each_possible_cpu(i) {
 		const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
 
-		qstats->qlen = 0;
+		qstats->qlen += qcpu->backlog;
 		qstats->backlog += qcpu->backlog;
 		qstats->drops += qcpu->drops;
 		qstats->requeues += qcpu->requeues;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:356 @ __gnet_stats_copy_queue_cpu(struct gnet_
 	}
 }
 
-void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
-			     const struct gnet_stats_queue __percpu *cpu,
-			     const struct gnet_stats_queue *q,
-			     __u32 qlen)
+void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
+			  const struct gnet_stats_queue __percpu *cpu,
+			  const struct gnet_stats_queue *q)
 {
 	if (cpu) {
-		__gnet_stats_copy_queue_cpu(qstats, cpu);
+		gnet_stats_add_queue_cpu(qstats, cpu);
 	} else {
-		qstats->qlen = q->qlen;
-		qstats->backlog = q->backlog;
-		qstats->drops = q->drops;
-		qstats->requeues = q->requeues;
-		qstats->overlimits = q->overlimits;
+		qstats->qlen += q->qlen;
+		qstats->backlog += q->backlog;
+		qstats->drops += q->drops;
+		qstats->requeues += q->requeues;
+		qstats->overlimits += q->overlimits;
 	}
-
-	qstats->qlen = qlen;
 }
-EXPORT_SYMBOL(__gnet_stats_copy_queue);
+EXPORT_SYMBOL(gnet_stats_add_queue);
 
 /**
  * gnet_stats_copy_queue - copy queue statistics into statistics TLV
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:393 @ gnet_stats_copy_queue(struct gnet_dump *
 {
 	struct gnet_stats_queue qstats = {0};
 
-	__gnet_stats_copy_queue(&qstats, cpu_q, q, qlen);
+	gnet_stats_add_queue(&qstats, cpu_q, q);
+	qstats.qlen = qlen;
 
 	if (d->compat_tc_stats) {
 		d->tc_stats.drops = qstats.drops;
Index: linux-5.15/net/netfilter/xt_RATEEST.c
===================================================================
--- linux-5.15.orig/net/netfilter/xt_RATEEST.c
+++ linux-5.15/net/netfilter/xt_RATEEST.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:97 @ static unsigned int
 xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_rateest_target_info *info = par->targinfo;
-	struct gnet_stats_basic_packed *stats = &info->est->bstats;
+	struct gnet_stats_basic_sync *stats = &info->est->bstats;
 
 	spin_lock_bh(&info->est->lock);
-	stats->bytes += skb->len;
-	stats->packets++;
+	u64_stats_add(&stats->bytes, skb->len);
+	u64_stats_inc(&stats->packets);
 	spin_unlock_bh(&info->est->lock);
 
 	return XT_CONTINUE;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:146 @ static int xt_rateest_tg_checkentry(cons
 	if (!est)
 		goto err1;
 
+	gnet_stats_basic_sync_init(&est->bstats);
 	strlcpy(est->name, info->name, sizeof(est->name));
 	spin_lock_init(&est->lock);
 	est->refcnt		= 1;
Index: linux-5.15/net/sched/act_api.c
===================================================================
--- linux-5.15.orig/net/sched/act_api.c
+++ linux-5.15/net/sched/act_api.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:489 @ int tcf_idr_create(struct tc_action_net
 		atomic_set(&p->tcfa_bindcnt, 1);
 
 	if (cpustats) {
-		p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+		p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
 		if (!p->cpu_bstats)
 			goto err1;
-		p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+		p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
 		if (!p->cpu_bstats_hw)
 			goto err2;
 		p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
 		if (!p->cpu_qstats)
 			goto err3;
 	}
+	gnet_stats_basic_sync_init(&p->tcfa_bstats);
+	gnet_stats_basic_sync_init(&p->tcfa_bstats_hw);
 	spin_lock_init(&p->tcfa_lock);
 	p->tcfa_index = index;
 	p->tcfa_tm.install = jiffies;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:510 @ int tcf_idr_create(struct tc_action_net
 	if (est) {
 		err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
 					&p->tcfa_rate_est,
-					&p->tcfa_lock, NULL, est);
+					&p->tcfa_lock, false, est);
 		if (err)
 			goto err4;
 	}
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1146 @ void tcf_action_update_stats(struct tc_a
 			     u64 drops, bool hw)
 {
 	if (a->cpu_bstats) {
-		_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+		_bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
 
 		this_cpu_ptr(a->cpu_qstats)->drops += drops;
 
 		if (hw)
-			_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
-					   bytes, packets);
+			_bstats_update(this_cpu_ptr(a->cpu_bstats_hw),
+				       bytes, packets);
 		return;
 	}
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1191 @ int tcf_action_copy_stats(struct sk_buff
 	if (err < 0)
 		goto errout;
 
-	if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
-	    gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
-				     &p->tcfa_bstats_hw) < 0 ||
+	if (gnet_stats_copy_basic(&d, p->cpu_bstats,
+				  &p->tcfa_bstats, false) < 0 ||
+	    gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw,
+				     &p->tcfa_bstats_hw, false) < 0 ||
 	    gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
 	    gnet_stats_copy_queue(&d, p->cpu_qstats,
 				  &p->tcfa_qstats,
Index: linux-5.15/net/sched/act_bpf.c
===================================================================
--- linux-5.15.orig/net/sched/act_bpf.c
+++ linux-5.15/net/sched/act_bpf.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:44 @ static int tcf_bpf_act(struct sk_buff *s
 	int action, filter_res;
 
 	tcf_lastuse_update(&prog->tcf_tm);
-	bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
+	bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
 
 	filter = rcu_dereference(prog->filter);
 	if (at_ingress) {
Index: linux-5.15/net/sched/act_ife.c
===================================================================
--- linux-5.15.orig/net/sched/act_ife.c
+++ linux-5.15/net/sched/act_ife.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:721 @ static int tcf_ife_decode(struct sk_buff
 	u8 *tlv_data;
 	u16 metalen;
 
-	bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
+	bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
 	tcf_lastuse_update(&ife->tcf_tm);
 
 	if (skb_at_tc_ingress(skb))
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:809 @ static int tcf_ife_encode(struct sk_buff
 			exceed_mtu = true;
 	}
 
-	bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
+	bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
 	tcf_lastuse_update(&ife->tcf_tm);
 
 	if (!metalen) {		/* no metadata to send */
Index: linux-5.15/net/sched/act_mpls.c
===================================================================
--- linux-5.15.orig/net/sched/act_mpls.c
+++ linux-5.15/net/sched/act_mpls.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:62 @ static int tcf_mpls_act(struct sk_buff *
 	int ret, mac_len;
 
 	tcf_lastuse_update(&m->tcf_tm);
-	bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
+	bstats_update(this_cpu_ptr(m->common.cpu_bstats), skb);
 
 	/* Ensure 'data' points at mac_header prior calling mpls manipulating
 	 * functions.
Index: linux-5.15/net/sched/act_police.c
===================================================================
--- linux-5.15.orig/net/sched/act_police.c
+++ linux-5.15/net/sched/act_police.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:128 @ static int tcf_police_init(struct net *n
 					    police->common.cpu_bstats,
 					    &police->tcf_rate_est,
 					    &police->tcf_lock,
-					    NULL, est);
+					    false, est);
 		if (err)
 			goto failure;
 	} else if (tb[TCA_POLICE_AVRATE] &&
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:265 @ static int tcf_police_act(struct sk_buff
 	int ret;
 
 	tcf_lastuse_update(&police->tcf_tm);
-	bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb);
+	bstats_update(this_cpu_ptr(police->common.cpu_bstats), skb);
 
 	ret = READ_ONCE(police->tcf_action);
 	p = rcu_dereference_bh(police->params);
Index: linux-5.15/net/sched/act_sample.c
===================================================================
--- linux-5.15.orig/net/sched/act_sample.c
+++ linux-5.15/net/sched/act_sample.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:166 @ static int tcf_sample_act(struct sk_buff
 	int retval;
 
 	tcf_lastuse_update(&s->tcf_tm);
-	bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
+	bstats_update(this_cpu_ptr(s->common.cpu_bstats), skb);
 	retval = READ_ONCE(s->tcf_action);
 
 	psample_group = rcu_dereference_bh(s->psample_group);
Index: linux-5.15/net/sched/act_simple.c
===================================================================
--- linux-5.15.orig/net/sched/act_simple.c
+++ linux-5.15/net/sched/act_simple.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:39 @ static int tcf_simp_act(struct sk_buff *
 	 * then it would look like "hello_3" (without quotes)
 	 */
 	pr_info("simple: %s_%llu\n",
-	       (char *)d->tcfd_defdata, d->tcf_bstats.packets);
+		(char *)d->tcfd_defdata,
+		u64_stats_read(&d->tcf_bstats.packets));
 	spin_unlock(&d->tcf_lock);
 	return d->tcf_action;
 }
Index: linux-5.15/net/sched/act_skbedit.c
===================================================================
--- linux-5.15.orig/net/sched/act_skbedit.c
+++ linux-5.15/net/sched/act_skbedit.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:34 @ static int tcf_skbedit_act(struct sk_buf
 	int action;
 
 	tcf_lastuse_update(&d->tcf_tm);
-	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+	bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
 
 	params = rcu_dereference_bh(d->params);
 	action = READ_ONCE(d->tcf_action);
Index: linux-5.15/net/sched/act_skbmod.c
===================================================================
--- linux-5.15.orig/net/sched/act_skbmod.c
+++ linux-5.15/net/sched/act_skbmod.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:34 @ static int tcf_skbmod_act(struct sk_buff
 	u64 flags;
 
 	tcf_lastuse_update(&d->tcf_tm);
-	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+	bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
 
 	action = READ_ONCE(d->tcf_action);
 	if (unlikely(action == TC_ACT_SHOT))
Index: linux-5.15/net/sched/sch_api.c
===================================================================
--- linux-5.15.orig/net/sched/sch_api.c
+++ linux-5.15/net/sched/sch_api.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:887 @ static void qdisc_offload_graft_root(str
 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 			 u32 portid, u32 seq, u16 flags, int event)
 {
-	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
+	struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:945 @ static int tc_fill_qdisc(struct sk_buff
 		cpu_qstats = q->cpu_qstats;
 	}
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
-				  &d, cpu_bstats, &q->bstats) < 0 ||
+	if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
 		goto nla_put_failure;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1272 @ static struct Qdisc *qdisc_create(struct
 		rcu_assign_pointer(sch->stab, stab);
 	}
 	if (tca[TCA_RATE]) {
-		seqcount_t *running;
-
 		err = -EOPNOTSUPP;
 		if (sch->flags & TCQ_F_MQROOT) {
 			NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
 			goto err_out4;
 		}
 
-		if (sch->parent != TC_H_ROOT &&
-		    !(sch->flags & TCQ_F_INGRESS) &&
-		    (!p || !(p->flags & TCQ_F_MQROOT)))
-			running = qdisc_root_sleeping_running(sch);
-		else
-			running = &sch->running;
-
 		err = gen_new_estimator(&sch->bstats,
 					sch->cpu_bstats,
 					&sch->rate_est,
 					NULL,
-					running,
+					true,
 					tca[TCA_RATE]);
 		if (err) {
 			NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1358 @ static int qdisc_change(struct Qdisc *sc
 				      sch->cpu_bstats,
 				      &sch->rate_est,
 				      NULL,
-				      qdisc_root_sleeping_running(sch),
+				      true,
 				      tca[TCA_RATE]);
 	}
 out:
Index: linux-5.15/net/sched/sch_atm.c
===================================================================
--- linux-5.15.orig/net/sched/sch_atm.c
+++ linux-5.15/net/sched/sch_atm.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:55 @ struct atm_flow_data {
 	struct atm_qdisc_data	*parent;	/* parent qdisc */
 	struct socket		*sock;		/* for closing */
 	int			ref;		/* reference count */
-	struct gnet_stats_basic_packed	bstats;
+	struct gnet_stats_basic_sync	bstats;
 	struct gnet_stats_queue	qstats;
 	struct list_head	list;
 	struct atm_flow_data	*excess;	/* flow for excess traffic;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:554 @ static int atm_tc_init(struct Qdisc *sch
 	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
 	INIT_LIST_HEAD(&p->flows);
 	INIT_LIST_HEAD(&p->link.list);
+	gnet_stats_basic_sync_init(&p->link.bstats);
 	list_add(&p->link.list, &p->flows);
 	p->link.q = qdisc_create_dflt(sch->dev_queue,
 				      &pfifo_qdisc_ops, sch->handle, extack);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:658 @ atm_tc_dump_class_stats(struct Qdisc *sc
 {
 	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &flow->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
 		return -1;
 
Index: linux-5.15/net/sched/sch_cbq.c
===================================================================
--- linux-5.15.orig/net/sched/sch_cbq.c
+++ linux-5.15/net/sched/sch_cbq.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:119 @ struct cbq_class {
 	long			avgidle;
 	long			deficit;	/* Saved deficit for WRR */
 	psched_time_t		penalized;
-	struct gnet_stats_basic_packed bstats;
+	struct gnet_stats_basic_sync bstats;
 	struct gnet_stats_queue qstats;
 	struct net_rate_estimator __rcu *rate_est;
 	struct tc_cbq_xstats	xstats;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:568 @ cbq_update(struct cbq_sched_data *q)
 		long avgidle = cl->avgidle;
 		long idle;
 
-		cl->bstats.packets++;
-		cl->bstats.bytes += len;
+		_bstats_update(&cl->bstats, len, 1);
 
 		/*
 		 * (now - last) is total time between packet right edges.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1385 @ cbq_dump_class_stats(struct Qdisc *sch,
 	if (cl->undertime != PSCHED_PASTPERFECT)
 		cl->xstats.undertime = cl->undertime - q->now;
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
 		return -1;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1519 @ cbq_change_class(struct Qdisc *sch, u32
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
 						    NULL,
-						    qdisc_root_sleeping_running(sch),
+						    true,
 						    tca[TCA_RATE]);
 			if (err) {
 				NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1611 @ cbq_change_class(struct Qdisc *sch, u32
 	if (cl == NULL)
 		goto failure;
 
+	gnet_stats_basic_sync_init(&cl->bstats);
 	err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
 	if (err) {
 		kfree(cl);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1620 @ cbq_change_class(struct Qdisc *sch, u32
 
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
-					NULL,
-					qdisc_root_sleeping_running(sch),
-					tca[TCA_RATE]);
+					NULL, true, tca[TCA_RATE]);
 		if (err) {
 			NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
 			tcf_block_put(cl->block);
Index: linux-5.15/net/sched/sch_drr.c
===================================================================
--- linux-5.15.orig/net/sched/sch_drr.c
+++ linux-5.15/net/sched/sch_drr.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:22 @ struct drr_class {
 	struct Qdisc_class_common	common;
 	unsigned int			filter_cnt;
 
-	struct gnet_stats_basic_packed		bstats;
+	struct gnet_stats_basic_sync		bstats;
 	struct gnet_stats_queue		qstats;
 	struct net_rate_estimator __rcu *rate_est;
 	struct list_head		alist;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:88 @ static int drr_change_class(struct Qdisc
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
-						    NULL,
-						    qdisc_root_sleeping_running(sch),
+						    NULL, true,
 						    tca[TCA_RATE]);
 			if (err) {
 				NL_SET_ERR_MSG(extack, "Failed to replace estimator");
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:108 @ static int drr_change_class(struct Qdisc
 	if (cl == NULL)
 		return -ENOBUFS;
 
+	gnet_stats_basic_sync_init(&cl->bstats);
 	cl->common.classid = classid;
 	cl->quantum	   = quantum;
 	cl->qdisc	   = qdisc_create_dflt(sch->dev_queue,
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:121 @ static int drr_change_class(struct Qdisc
 
 	if (tca[TCA_RATE]) {
 		err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
-					    NULL,
-					    qdisc_root_sleeping_running(sch),
-					    tca[TCA_RATE]);
+					    NULL, true, tca[TCA_RATE]);
 		if (err) {
 			NL_SET_ERR_MSG(extack, "Failed to replace estimator");
 			qdisc_put(cl->qdisc);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:268 @ static int drr_dump_class_stats(struct Q
 	if (qlen)
 		xstats.deficit = cl->deficit;
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0)
 		return -1;
Index: linux-5.15/net/sched/sch_ets.c
===================================================================
--- linux-5.15.orig/net/sched/sch_ets.c
+++ linux-5.15/net/sched/sch_ets.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:44 @ struct ets_class {
 	struct Qdisc *qdisc;
 	u32 quantum;
 	u32 deficit;
-	struct gnet_stats_basic_packed bstats;
+	struct gnet_stats_basic_sync bstats;
 	struct gnet_stats_queue qstats;
 };
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:328 @ static int ets_class_dump_stats(struct Q
 	struct ets_class *cl = ets_class_from_arg(sch, arg);
 	struct Qdisc *cl_q = cl->qdisc;
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &cl_q->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
 	    qdisc_qstats_copy(d, cl_q) < 0)
 		return -1;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:663 @ static int ets_qdisc_change(struct Qdisc
 
 	q->nbands = nbands;
 	for (i = nstrict; i < q->nstrict; i++) {
-		INIT_LIST_HEAD(&q->classes[i].alist);
 		if (q->classes[i].qdisc->q.qlen) {
 			list_add_tail(&q->classes[i].alist, &q->active);
 			q->classes[i].deficit = quanta[i];
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:690 @ static int ets_qdisc_change(struct Qdisc
 	ets_offload_change(sch);
 	for (i = q->nbands; i < oldbands; i++) {
 		qdisc_put(q->classes[i].qdisc);
-		memset(&q->classes[i], 0, sizeof(q->classes[i]));
+		q->classes[i].qdisc = NULL;
+		q->classes[i].quantum = 0;
+		q->classes[i].deficit = 0;
+		gnet_stats_basic_sync_init(&q->classes[i].bstats);
+		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
 	}
 	return 0;
 }
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:703 @ static int ets_qdisc_init(struct Qdisc *
 			  struct netlink_ext_ack *extack)
 {
 	struct ets_sched *q = qdisc_priv(sch);
-	int err;
+	int err, i;
 
 	if (!opt)
 		return -EINVAL;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:713 @ static int ets_qdisc_init(struct Qdisc *
 		return err;
 
 	INIT_LIST_HEAD(&q->active);
+	for (i = 0; i < TCQ_ETS_MAX_BANDS; i++)
+		INIT_LIST_HEAD(&q->classes[i].alist);
+
 	return ets_qdisc_change(sch, opt, extack);
 }
 
Index: linux-5.15/net/sched/sch_generic.c
===================================================================
--- linux-5.15.orig/net/sched/sch_generic.c
+++ linux-5.15/net/sched/sch_generic.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:307 @ trace:
 
 /*
  * Transmit possibly several skbs, and handle the return status as
- * required. Owning running seqcount bit guarantees that
- * only one CPU can execute this function.
+ * required. Owning qdisc running bit guarantees that only one CPU
+ * can execute this function.
  *
  * Returns to the caller:
  *				false  - hardware queue frozen backoff
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:609 @ struct Qdisc noop_qdisc = {
 	.ops		=	&noop_qdisc_ops,
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.dev_queue	=	&noop_netdev_queue,
-	.running	=	SEQCNT_ZERO(noop_qdisc.running),
 	.busylock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
 	.gso_skb = {
 		.next = (struct sk_buff *)&noop_qdisc.gso_skb,
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:869 @ struct Qdisc_ops pfifo_fast_ops __read_m
 EXPORT_SYMBOL(pfifo_fast_ops);
 
 static struct lock_class_key qdisc_tx_busylock;
-static struct lock_class_key qdisc_running_key;
 
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  const struct Qdisc_ops *ops,
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:893 @ struct Qdisc *qdisc_alloc(struct netdev_
 	__skb_queue_head_init(&sch->gso_skb);
 	__skb_queue_head_init(&sch->skb_bad_txq);
 	qdisc_skb_head_init(&sch->q);
+	gnet_stats_basic_sync_init(&sch->bstats);
 	spin_lock_init(&sch->q.lock);
 
 	if (ops->static_flags & TCQ_F_CPUSTATS) {
 		sch->cpu_bstats =
-			netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+			netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
 		if (!sch->cpu_bstats)
 			goto errout1;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:918 @ struct Qdisc *qdisc_alloc(struct netdev_
 	lockdep_set_class(&sch->seqlock,
 			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
 
-	seqcount_init(&sch->running);
-	lockdep_set_class(&sch->running,
-			  dev->qdisc_running_key ?: &qdisc_running_key);
-
 	sch->ops = ops;
 	sch->flags = ops->static_flags;
 	sch->enqueue = ops->enqueue;
Index: linux-5.15/net/sched/sch_gred.c
===================================================================
--- linux-5.15.orig/net/sched/sch_gred.c
+++ linux-5.15/net/sched/sch_gred.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:59 @ struct gred_sched {
 	u32 		DPs;
 	u32 		def;
 	struct red_vars wred_set;
+	struct tc_gred_qopt_offload *opt;
 };
 
 static inline int gred_wred_mode(struct gred_sched *table)
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:315 @ static void gred_offload(struct Qdisc *s
 {
 	struct gred_sched *table = qdisc_priv(sch);
 	struct net_device *dev = qdisc_dev(sch);
-	struct tc_gred_qopt_offload opt = {
-		.command	= command,
-		.handle		= sch->handle,
-		.parent		= sch->parent,
-	};
+	struct tc_gred_qopt_offload *opt = table->opt;
 
 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 		return;
 
+	memset(opt, 0, sizeof(*opt));
+	opt->command = command;
+	opt->handle = sch->handle;
+	opt->parent = sch->parent;
+
 	if (command == TC_GRED_REPLACE) {
 		unsigned int i;
 
-		opt.set.grio_on = gred_rio_mode(table);
-		opt.set.wred_on = gred_wred_mode(table);
-		opt.set.dp_cnt = table->DPs;
-		opt.set.dp_def = table->def;
+		opt->set.grio_on = gred_rio_mode(table);
+		opt->set.wred_on = gred_wred_mode(table);
+		opt->set.dp_cnt = table->DPs;
+		opt->set.dp_def = table->def;
 
 		for (i = 0; i < table->DPs; i++) {
 			struct gred_sched_data *q = table->tab[i];
 
 			if (!q)
 				continue;
-			opt.set.tab[i].present = true;
-			opt.set.tab[i].limit = q->limit;
-			opt.set.tab[i].prio = q->prio;
-			opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
-			opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
-			opt.set.tab[i].is_ecn = gred_use_ecn(q);
-			opt.set.tab[i].is_harddrop = gred_use_harddrop(q);
-			opt.set.tab[i].probability = q->parms.max_P;
-			opt.set.tab[i].backlog = &q->backlog;
+			opt->set.tab[i].present = true;
+			opt->set.tab[i].limit = q->limit;
+			opt->set.tab[i].prio = q->prio;
+			opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
+			opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
+			opt->set.tab[i].is_ecn = gred_use_ecn(q);
+			opt->set.tab[i].is_harddrop = gred_use_harddrop(q);
+			opt->set.tab[i].probability = q->parms.max_P;
+			opt->set.tab[i].backlog = &q->backlog;
 		}
-		opt.set.qstats = &sch->qstats;
+		opt->set.qstats = &sch->qstats;
 	}
 
-	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt);
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt);
 }
 
 static int gred_offload_dump_stats(struct Qdisc *sch)
 {
 	struct gred_sched *table = qdisc_priv(sch);
 	struct tc_gred_qopt_offload *hw_stats;
+	u64 bytes = 0, packets = 0;
 	unsigned int i;
 	int ret;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:370 @ static int gred_offload_dump_stats(struc
 	hw_stats->handle = sch->handle;
 	hw_stats->parent = sch->parent;
 
-	for (i = 0; i < MAX_DPs; i++)
+	for (i = 0; i < MAX_DPs; i++) {
+		gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]);
 		if (table->tab[i])
 			hw_stats->stats.xstats[i] = &table->tab[i]->stats;
+	}
 
 	ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats);
 	/* Even if driver returns failure adjust the stats - in case offload
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:383 @ static int gred_offload_dump_stats(struc
 	for (i = 0; i < MAX_DPs; i++) {
 		if (!table->tab[i])
 			continue;
-		table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets;
-		table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes;
+		table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets);
+		table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
 		table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog;
 
-		_bstats_update(&sch->bstats,
-			       hw_stats->stats.bstats[i].bytes,
-			       hw_stats->stats.bstats[i].packets);
+		bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
+		packets += u64_stats_read(&hw_stats->stats.bstats[i].packets);
 		sch->qstats.qlen += hw_stats->stats.qstats[i].qlen;
 		sch->qstats.backlog += hw_stats->stats.qstats[i].backlog;
 		sch->qstats.drops += hw_stats->stats.qstats[i].drops;
 		sch->qstats.requeues += hw_stats->stats.qstats[i].requeues;
 		sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
 	}
+	_bstats_update(&sch->bstats, bytes, packets);
 
 	kfree(hw_stats);
 	return ret;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:736 @ err_unlock_free:
 static int gred_init(struct Qdisc *sch, struct nlattr *opt,
 		     struct netlink_ext_ack *extack)
 {
+	struct gred_sched *table = qdisc_priv(sch);
 	struct nlattr *tb[TCA_GRED_MAX + 1];
 	int err;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:760 @ static int gred_init(struct Qdisc *sch,
 		sch->limit = qdisc_dev(sch)->tx_queue_len
 		             * psched_mtu(qdisc_dev(sch));
 
+	if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) {
+		table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL);
+		if (!table->opt)
+			return -ENOMEM;
+	}
+
 	return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
 }
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:922 @ static void gred_destroy(struct Qdisc *s
 			gred_destroy_vq(table->tab[i]);
 	}
 	gred_offload(sch, TC_GRED_DESTROY);
+	kfree(table->opt);
 }
 
 static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
Index: linux-5.15/net/sched/sch_hfsc.c
===================================================================
--- linux-5.15.orig/net/sched/sch_hfsc.c
+++ linux-5.15/net/sched/sch_hfsc.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:114 @ enum hfsc_class_flags {
 struct hfsc_class {
 	struct Qdisc_class_common cl_common;
 
-	struct gnet_stats_basic_packed bstats;
+	struct gnet_stats_basic_sync bstats;
 	struct gnet_stats_queue qstats;
 	struct net_rate_estimator __rcu *rate_est;
 	struct tcf_proto __rcu *filter_list; /* filter list */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:968 @ hfsc_change_class(struct Qdisc *sch, u32
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
 						    NULL,
-						    qdisc_root_sleeping_running(sch),
+						    true,
 						    tca[TCA_RATE]);
 			if (err)
 				return err;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1036 @ hfsc_change_class(struct Qdisc *sch, u32
 
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
-					NULL,
-					qdisc_root_sleeping_running(sch),
-					tca[TCA_RATE]);
+					NULL, true, tca[TCA_RATE]);
 		if (err) {
 			tcf_block_put(cl->block);
 			kfree(cl);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1329 @ hfsc_dump_class_stats(struct Qdisc *sch,
 	xstats.work    = cl->cl_total;
 	xstats.rtwork  = cl->cl_cumul;
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
 		return -1;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1407 @ hfsc_init_qdisc(struct Qdisc *sch, struc
 	if (err)
 		return err;
 
+	gnet_stats_basic_sync_init(&q->root.bstats);
 	q->root.cl_common.classid = sch->handle;
 	q->root.sched   = q;
 	q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
Index: linux-5.15/net/sched/sch_htb.c
===================================================================
--- linux-5.15.orig/net/sched/sch_htb.c
+++ linux-5.15/net/sched/sch_htb.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:116 @ struct htb_class {
 	/*
 	 * Written often fields
 	 */
-	struct gnet_stats_basic_packed bstats;
-	struct gnet_stats_basic_packed bstats_bias;
+	struct gnet_stats_basic_sync bstats;
+	struct gnet_stats_basic_sync bstats_bias;
 	struct tc_htb_xstats	xstats;	/* our special stats */
 
 	/* token bucket parameters */
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1312 @ nla_put_failure:
 static void htb_offload_aggregate_stats(struct htb_sched *q,
 					struct htb_class *cl)
 {
+	u64 bytes = 0, packets = 0;
 	struct htb_class *c;
 	unsigned int i;
 
-	memset(&cl->bstats, 0, sizeof(cl->bstats));
+	gnet_stats_basic_sync_init(&cl->bstats);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1328 @ static void htb_offload_aggregate_stats(
 			if (p != cl)
 				continue;
 
-			cl->bstats.bytes += c->bstats_bias.bytes;
-			cl->bstats.packets += c->bstats_bias.packets;
+			bytes += u64_stats_read(&c->bstats_bias.bytes);
+			packets += u64_stats_read(&c->bstats_bias.packets);
 			if (c->level == 0) {
-				cl->bstats.bytes += c->leaf.q->bstats.bytes;
-				cl->bstats.packets += c->leaf.q->bstats.packets;
+				bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
+				packets += u64_stats_read(&c->leaf.q->bstats.packets);
 			}
 		}
 	}
+	_bstats_update(&cl->bstats, bytes, packets);
 }
 
 static int
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1363 @ htb_dump_class_stats(struct Qdisc *sch,
 			if (cl->leaf.q)
 				cl->bstats = cl->leaf.q->bstats;
 			else
-				memset(&cl->bstats, 0, sizeof(cl->bstats));
-			cl->bstats.bytes += cl->bstats_bias.bytes;
-			cl->bstats.packets += cl->bstats_bias.packets;
+				gnet_stats_basic_sync_init(&cl->bstats);
+			_bstats_update(&cl->bstats,
+				       u64_stats_read(&cl->bstats_bias.bytes),
+				       u64_stats_read(&cl->bstats_bias.packets));
 		} else {
 			htb_offload_aggregate_stats(q, cl);
 		}
 	}
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
 		return -1;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1587 @ static int htb_destroy_class_offload(str
 	}
 
 	if (cl->parent) {
-		cl->parent->bstats_bias.bytes += q->bstats.bytes;
-		cl->parent->bstats_bias.packets += q->bstats.packets;
+		_bstats_update(&cl->parent->bstats_bias,
+			       u64_stats_read(&q->bstats.bytes),
+			       u64_stats_read(&q->bstats.packets));
 	}
 
 	offload_opt = (struct tc_htb_qopt_offload) {
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1881 @ static int htb_change_class(struct Qdisc
 		if (!cl)
 			goto failure;
 
+		gnet_stats_basic_sync_init(&cl->bstats);
+		gnet_stats_basic_sync_init(&cl->bstats_bias);
+
 		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
 		if (err) {
 			kfree(cl);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1893 @ static int htb_change_class(struct Qdisc
 			err = gen_new_estimator(&cl->bstats, NULL,
 						&cl->rate_est,
 						NULL,
-						qdisc_root_sleeping_running(sch),
+						true,
 						tca[TCA_RATE] ? : &est.nla);
 			if (err)
 				goto err_block_put;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1957 @ static int htb_change_class(struct Qdisc
 				htb_graft_helper(dev_queue, old_q);
 				goto err_kill_estimator;
 			}
-			parent->bstats_bias.bytes += old_q->bstats.bytes;
-			parent->bstats_bias.packets += old_q->bstats.packets;
+			_bstats_update(&parent->bstats_bias,
+				       u64_stats_read(&old_q->bstats.bytes),
+				       u64_stats_read(&old_q->bstats.packets));
 			qdisc_put(old_q);
 		}
 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2019 @ static int htb_change_class(struct Qdisc
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
 						    NULL,
-						    qdisc_root_sleeping_running(sch),
+						    true,
 						    tca[TCA_RATE]);
 			if (err)
 				return err;
Index: linux-5.15/net/sched/sch_mq.c
===================================================================
--- linux-5.15.orig/net/sched/sch_mq.c
+++ linux-5.15/net/sched/sch_mq.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:156 @ static int mq_dump(struct Qdisc *sch, st
 	struct net_device *dev = qdisc_dev(sch);
 	struct Qdisc *qdisc;
 	unsigned int ntx;
-	__u32 qlen = 0;
 
 	sch->q.qlen = 0;
-	memset(&sch->bstats, 0, sizeof(sch->bstats));
+	gnet_stats_basic_sync_init(&sch->bstats);
 	memset(&sch->qstats, 0, sizeof(sch->qstats));
 
 	/* MQ supports lockless qdiscs. However, statistics accounting needs
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:170 @ static int mq_dump(struct Qdisc *sch, st
 		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
 		spin_lock_bh(qdisc_lock(qdisc));
 
-		if (qdisc_is_percpu_stats(qdisc)) {
-			qlen = qdisc_qlen_sum(qdisc);
-			__gnet_stats_copy_basic(NULL, &sch->bstats,
-						qdisc->cpu_bstats,
-						&qdisc->bstats);
-			__gnet_stats_copy_queue(&sch->qstats,
-						qdisc->cpu_qstats,
-						&qdisc->qstats, qlen);
-			sch->q.qlen		+= qlen;
-		} else {
-			sch->q.qlen		+= qdisc->q.qlen;
-			sch->bstats.bytes	+= qdisc->bstats.bytes;
-			sch->bstats.packets	+= qdisc->bstats.packets;
-			sch->qstats.qlen	+= qdisc->qstats.qlen;
-			sch->qstats.backlog	+= qdisc->qstats.backlog;
-			sch->qstats.drops	+= qdisc->qstats.drops;
-			sch->qstats.requeues	+= qdisc->qstats.requeues;
-			sch->qstats.overlimits	+= qdisc->qstats.overlimits;
-		}
+		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
+				     &qdisc->bstats, false);
+		gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
+				     &qdisc->qstats);
+		sch->q.qlen += qdisc_qlen(qdisc);
 
 		spin_unlock_bh(qdisc_lock(qdisc));
 	}
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:257 @ static int mq_dump_class_stats(struct Qd
 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
 
 	sch = dev_queue->qdisc_sleeping;
-	if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats,
-				  &sch->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 ||
 	    qdisc_qstats_copy(d, sch) < 0)
 		return -1;
 	return 0;
Index: linux-5.15/net/sched/sch_mqprio.c
===================================================================
--- linux-5.15.orig/net/sched/sch_mqprio.c
+++ linux-5.15/net/sched/sch_mqprio.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:448 @ static int mqprio_dump(struct Qdisc *sch
 	unsigned int ntx, tc;
 
 	sch->q.qlen = 0;
-	memset(&sch->bstats, 0, sizeof(sch->bstats));
+	gnet_stats_basic_sync_init(&sch->bstats);
 	memset(&sch->qstats, 0, sizeof(sch->qstats));
 
 	/* MQ supports lockless qdiscs. However, statistics accounting needs
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:460 @ static int mqprio_dump(struct Qdisc *sch
 		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
 		spin_lock_bh(qdisc_lock(qdisc));
 
-		if (qdisc_is_percpu_stats(qdisc)) {
-			__u32 qlen = qdisc_qlen_sum(qdisc);
-
-			__gnet_stats_copy_basic(NULL, &sch->bstats,
-						qdisc->cpu_bstats,
-						&qdisc->bstats);
-			__gnet_stats_copy_queue(&sch->qstats,
-						qdisc->cpu_qstats,
-						&qdisc->qstats, qlen);
-			sch->q.qlen		+= qlen;
-		} else {
-			sch->q.qlen		+= qdisc->q.qlen;
-			sch->bstats.bytes	+= qdisc->bstats.bytes;
-			sch->bstats.packets	+= qdisc->bstats.packets;
-			sch->qstats.backlog	+= qdisc->qstats.backlog;
-			sch->qstats.drops	+= qdisc->qstats.drops;
-			sch->qstats.requeues	+= qdisc->qstats.requeues;
-			sch->qstats.overlimits	+= qdisc->qstats.overlimits;
-		}
+		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
+				     &qdisc->bstats, false);
+		gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
+				     &qdisc->qstats);
+		sch->q.qlen += qdisc_qlen(qdisc);
 
 		spin_unlock_bh(qdisc_lock(qdisc));
 	}
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:560 @ static int mqprio_dump_class_stats(struc
 {
 	if (cl >= TC_H_MIN_PRIORITY) {
 		int i;
-		__u32 qlen = 0;
+		__u32 qlen;
 		struct gnet_stats_queue qstats = {0};
-		struct gnet_stats_basic_packed bstats = {0};
+		struct gnet_stats_basic_sync bstats;
 		struct net_device *dev = qdisc_dev(sch);
 		struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
 
+		gnet_stats_basic_sync_init(&bstats);
 		/* Drop lock here it will be reclaimed before touching
 		 * statistics this is required because the d->lock we
 		 * hold here is the look on dev_queue->qdisc_sleeping
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:581 @ static int mqprio_dump_class_stats(struc
 
 			spin_lock_bh(qdisc_lock(qdisc));
 
-			if (qdisc_is_percpu_stats(qdisc)) {
-				qlen = qdisc_qlen_sum(qdisc);
+			gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
+					     &qdisc->bstats, false);
+			gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
+					     &qdisc->qstats);
+			sch->q.qlen += qdisc_qlen(qdisc);
 
-				__gnet_stats_copy_basic(NULL, &bstats,
-							qdisc->cpu_bstats,
-							&qdisc->bstats);
-				__gnet_stats_copy_queue(&qstats,
-							qdisc->cpu_qstats,
-							&qdisc->qstats,
-							qlen);
-			} else {
-				qlen		+= qdisc->q.qlen;
-				bstats.bytes	+= qdisc->bstats.bytes;
-				bstats.packets	+= qdisc->bstats.packets;
-				qstats.backlog	+= qdisc->qstats.backlog;
-				qstats.drops	+= qdisc->qstats.drops;
-				qstats.requeues	+= qdisc->qstats.requeues;
-				qstats.overlimits += qdisc->qstats.overlimits;
-			}
 			spin_unlock_bh(qdisc_lock(qdisc));
 		}
+		qlen = qdisc_qlen(sch) + qstats.qlen;
 
 		/* Reclaim root sleeping lock before completing stats */
 		if (d->lock)
 			spin_lock_bh(d->lock);
-		if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 ||
+		if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 ||
 		    gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
 			return -1;
 	} else {
 		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
 
 		sch = dev_queue->qdisc_sleeping;
-		if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d,
-					  sch->cpu_bstats, &sch->bstats) < 0 ||
+		if (gnet_stats_copy_basic(d, sch->cpu_bstats,
+					  &sch->bstats, true) < 0 ||
 		    qdisc_qstats_copy(d, sch) < 0)
 			return -1;
 	}
Index: linux-5.15/net/sched/sch_multiq.c
===================================================================
--- linux-5.15.orig/net/sched/sch_multiq.c
+++ linux-5.15/net/sched/sch_multiq.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:340 @ static int multiq_dump_class_stats(struc
 	struct Qdisc *cl_q;
 
 	cl_q = q->queues[cl - 1];
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats, true) < 0 ||
 	    qdisc_qstats_copy(d, cl_q) < 0)
 		return -1;
 
Index: linux-5.15/net/sched/sch_prio.c
===================================================================
--- linux-5.15.orig/net/sched/sch_prio.c
+++ linux-5.15/net/sched/sch_prio.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:362 @ static int prio_dump_class_stats(struct
 	struct Qdisc *cl_q;
 
 	cl_q = q->queues[cl - 1];
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, cl_q->cpu_bstats,
+				  &cl_q->bstats, true) < 0 ||
 	    qdisc_qstats_copy(d, cl_q) < 0)
 		return -1;
 
Index: linux-5.15/net/sched/sch_qfq.c
===================================================================
--- linux-5.15.orig/net/sched/sch_qfq.c
+++ linux-5.15/net/sched/sch_qfq.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:134 @ struct qfq_class {
 
 	unsigned int filter_cnt;
 
-	struct gnet_stats_basic_packed bstats;
+	struct gnet_stats_basic_sync bstats;
 	struct gnet_stats_queue qstats;
 	struct net_rate_estimator __rcu *rate_est;
 	struct Qdisc *qdisc;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:455 @ static int qfq_change_class(struct Qdisc
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
 						    NULL,
-						    qdisc_root_sleeping_running(sch),
+						    true,
 						    tca[TCA_RATE]);
 			if (err)
 				return err;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:469 @ static int qfq_change_class(struct Qdisc
 	if (cl == NULL)
 		return -ENOBUFS;
 
+	gnet_stats_basic_sync_init(&cl->bstats);
 	cl->common.classid = classid;
 	cl->deficit = lmax;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:482 @ static int qfq_change_class(struct Qdisc
 		err = gen_new_estimator(&cl->bstats, NULL,
 					&cl->rate_est,
 					NULL,
-					qdisc_root_sleeping_running(sch),
+					true,
 					tca[TCA_RATE]);
 		if (err)
 			goto destroy_class;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:644 @ static int qfq_dump_class_stats(struct Q
 	xstats.weight = cl->agg->class_weight;
 	xstats.lmax = cl->agg->lmax;
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
 	    qdisc_qstats_copy(d, cl->qdisc) < 0)
 		return -1;
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:1238 @ static int qfq_enqueue(struct sk_buff *s
 		return err;
 	}
 
-	cl->bstats.bytes += len;
-	cl->bstats.packets += gso_segs;
+	_bstats_update(&cl->bstats, len, gso_segs);
 	sch->qstats.backlog += len;
 	++sch->q.qlen;
 
Index: linux-5.15/net/sched/sch_taprio.c
===================================================================
--- linux-5.15.orig/net/sched/sch_taprio.c
+++ linux-5.15/net/sched/sch_taprio.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2025 @ static int taprio_dump_class_stats(struc
 	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
 
 	sch = dev_queue->qdisc_sleeping;
-	if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
 	    qdisc_qstats_copy(d, sch) < 0)
 		return -1;
 	return 0;
Index: linux-5.15/net/sunrpc/svc_xprt.c
===================================================================
--- linux-5.15.orig/net/sunrpc/svc_xprt.c
+++ linux-5.15/net/sunrpc/svc_xprt.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:444 @ void svc_xprt_do_enqueue(struct svc_xprt
 	if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
 		return;
 
-	cpu = get_cpu();
+	cpu = get_cpu_light();
 	pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
 
 	atomic_long_inc(&pool->sp_stats.packets);
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:468 @ void svc_xprt_do_enqueue(struct svc_xprt
 	rqstp = NULL;
 out_unlock:
 	rcu_read_unlock();
-	put_cpu();
+	put_cpu_light();
 	trace_svc_xprt_do_enqueue(xprt, rqstp);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
Index: linux-5.15/samples/kfifo/bytestream-example.c
===================================================================
--- linux-5.15.orig/samples/kfifo/bytestream-example.c
+++ linux-5.15/samples/kfifo/bytestream-example.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:25 @
 #define	PROC_FIFO	"bytestream-fifo"
 
 /* lock for procfs read access */
-static DEFINE_MUTEX(read_lock);
+static DEFINE_MUTEX(read_access);
 
 /* lock for procfs write access */
-static DEFINE_MUTEX(write_lock);
+static DEFINE_MUTEX(write_access);
 
 /*
  * define DYNAMIC in this example for a dynamically allocated fifo.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:119 @ static ssize_t fifo_write(struct file *f
 	int ret;
 	unsigned int copied;
 
-	if (mutex_lock_interruptible(&write_lock))
+	if (mutex_lock_interruptible(&write_access))
 		return -ERESTARTSYS;
 
 	ret = kfifo_from_user(&test, buf, count, &copied);
 
-	mutex_unlock(&write_lock);
+	mutex_unlock(&write_access);
 	if (ret)
 		return ret;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:137 @ static ssize_t fifo_read(struct file *fi
 	int ret;
 	unsigned int copied;
 
-	if (mutex_lock_interruptible(&read_lock))
+	if (mutex_lock_interruptible(&read_access))
 		return -ERESTARTSYS;
 
 	ret = kfifo_to_user(&test, buf, count, &copied);
 
-	mutex_unlock(&read_lock);
+	mutex_unlock(&read_access);
 	if (ret)
 		return ret;
 
Index: linux-5.15/samples/kfifo/inttype-example.c
===================================================================
--- linux-5.15.orig/samples/kfifo/inttype-example.c
+++ linux-5.15/samples/kfifo/inttype-example.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:25 @
 #define	PROC_FIFO	"int-fifo"
 
 /* lock for procfs read access */
-static DEFINE_MUTEX(read_lock);
+static DEFINE_MUTEX(read_access);
 
 /* lock for procfs write access */
-static DEFINE_MUTEX(write_lock);
+static DEFINE_MUTEX(write_access);
 
 /*
  * define DYNAMIC in this example for a dynamically allocated fifo.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:112 @ static ssize_t fifo_write(struct file *f
 	int ret;
 	unsigned int copied;
 
-	if (mutex_lock_interruptible(&write_lock))
+	if (mutex_lock_interruptible(&write_access))
 		return -ERESTARTSYS;
 
 	ret = kfifo_from_user(&test, buf, count, &copied);
 
-	mutex_unlock(&write_lock);
+	mutex_unlock(&write_access);
 	if (ret)
 		return ret;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:130 @ static ssize_t fifo_read(struct file *fi
 	int ret;
 	unsigned int copied;
 
-	if (mutex_lock_interruptible(&read_lock))
+	if (mutex_lock_interruptible(&read_access))
 		return -ERESTARTSYS;
 
 	ret = kfifo_to_user(&test, buf, count, &copied);
 
-	mutex_unlock(&read_lock);
+	mutex_unlock(&read_access);
 	if (ret)
 		return ret;
 
Index: linux-5.15/samples/kfifo/record-example.c
===================================================================
--- linux-5.15.orig/samples/kfifo/record-example.c
+++ linux-5.15/samples/kfifo/record-example.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:25 @
 #define	PROC_FIFO	"record-fifo"
 
 /* lock for procfs read access */
-static DEFINE_MUTEX(read_lock);
+static DEFINE_MUTEX(read_access);
 
 /* lock for procfs write access */
-static DEFINE_MUTEX(write_lock);
+static DEFINE_MUTEX(write_access);
 
 /*
  * define DYNAMIC in this example for a dynamically allocated fifo.
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:126 @ static ssize_t fifo_write(struct file *f
 	int ret;
 	unsigned int copied;
 
-	if (mutex_lock_interruptible(&write_lock))
+	if (mutex_lock_interruptible(&write_access))
 		return -ERESTARTSYS;
 
 	ret = kfifo_from_user(&test, buf, count, &copied);
 
-	mutex_unlock(&write_lock);
+	mutex_unlock(&write_access);
 	if (ret)
 		return ret;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:144 @ static ssize_t fifo_read(struct file *fi
 	int ret;
 	unsigned int copied;
 
-	if (mutex_lock_interruptible(&read_lock))
+	if (mutex_lock_interruptible(&read_access))
 		return -ERESTARTSYS;
 
 	ret = kfifo_to_user(&test, buf, count, &copied);
 
-	mutex_unlock(&read_lock);
+	mutex_unlock(&read_access);
 	if (ret)
 		return ret;
 
Index: linux-5.15/security/smack/smack_lsm.c
===================================================================
--- linux-5.15.orig/security/smack/smack_lsm.c
+++ linux-5.15/security/smack/smack_lsm.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:54 @
 #define SMK_RECEIVING	1
 #define SMK_SENDING	2
 
+#ifdef SMACK_IPV6_PORT_LABELING
 static DEFINE_MUTEX(smack_ipv6_lock);
 static LIST_HEAD(smk_ipv6_port_list);
+#endif
 struct kmem_cache *smack_rule_cache;
 int smack_enabled __initdata;
 
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2601 @ static void smk_ipv6_port_label(struct s
 	mutex_unlock(&smack_ipv6_lock);
 	return;
 }
-#endif
 
 /**
  * smk_ipv6_port_check - check Smack port access
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2663 @ static int smk_ipv6_port_check(struct so
 
 	return smk_ipv6_check(skp, object, address, act);
 }
+#endif
 
 /**
  * smack_inode_setsecurity - set smack xattrs
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:2850 @ static int smack_socket_connect(struct s
 			rc = smk_ipv6_check(ssp->smk_out, rsp, sip,
 					    SMK_CONNECTING);
 		}
-		if (__is_defined(SMACK_IPV6_PORT_LABELING))
-			rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING);
+#ifdef SMACK_IPV6_PORT_LABELING
+		rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING);
+#endif
 
 		return rc;
 	}
Index: linux-5.15/sound/soc/mediatek/common/mtk-afe-fe-dai.c
===================================================================
--- linux-5.15.orig/sound/soc/mediatek/common/mtk-afe-fe-dai.c
+++ linux-5.15/sound/soc/mediatek/common/mtk-afe-fe-dai.c
@ linux-5.15/Documentation/admin-guide/cgroup-v1/memory.rst:291 @ const struct snd_soc_dai_ops mtk_afe_fe_
 };
 EXPORT_SYMBOL_GPL(mtk_afe_fe_ops);
 
-static DEFINE_MUTEX(irqs_lock);
 int mtk_dynamic_irq_acquire(struct mtk_base_afe *afe)
 {
 	int i;