From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 27 Apr 2023 13:19:34 +0200
Subject: [PATCH 1/4] sched/core: Provide sched_rtmutex() and expose sched work
 helpers

schedule() invokes sched_submit_work() before scheduling and
sched_update_worker() afterwards to ensure that queued block requests are
flushed and the (IO)worker machineries can instantiate new workers if
required. This avoids deadlocks and starvation.

With rt_mutexes this can lead to subtle problem:

  When rtmutex blocks current::pi_blocked_on points to the rtmutex it
  blocks on. When one of the functions in sched_submit/resume_work()
  contends on a rtmutex based lock then that would corrupt
  current::pi_blocked_on.

Make it possible to let rtmutex issue the calls outside of the slowpath,
i.e. when it is guaranteed that current::pi_blocked_on is NULL, by:

  - Exposing sched_submit_work() and moving the task_running() condition
    into schedule()

  - Renamimg sched_update_worker() to sched_resume_work() and exposing it
    too.

  - Providing sched_rtmutex() which just does the inner loop of scheduling
    until need_resched() is not longer set. Split out the loop so this does
    not create yet another copy.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20230427111937.2745231-2-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/linux/sched.h |    5 +++++
 kernel/sched/core.c   |   40 ++++++++++++++++++++++------------------
 2 files changed, 27 insertions(+), 18 deletions(-)

Index: linux-6.3.0-rt11/include/linux/sched.h
===================================================================
@ linux-6.3.0-rt11/include/linux/sched.h:306 @ extern long schedule_timeout_idle(long t
 asmlinkage void schedule(void);
 extern void schedule_preempt_disabled(void);
 asmlinkage void preempt_schedule_irq(void);
+
+extern void sched_submit_work(void);
+extern void sched_resume_work(void);
+extern void schedule_rtmutex(void);
+
 #ifdef CONFIG_PREEMPT_RT
  extern void schedule_rtlock(void);
 #endif
Index: linux-6.3.0-rt11/kernel/sched/core.c
===================================================================
--- linux-6.3.0-rt11.orig/kernel/sched/core.c
+++ linux-6.3.0-rt11/kernel/sched/core.c
@ linux-6.3.0-rt11/include/linux/sched.h:6722 @ void __noreturn do_task_dead(void)
 		cpu_relax();
 }
 
-static inline void sched_submit_work(struct task_struct *tsk)
+void sched_submit_work(void)
 {
-	unsigned int task_flags;
-
-	if (task_is_running(tsk))
-		return;
+	struct task_struct *tsk = current;
+	unsigned int task_flags = tsk->flags;
 
-	task_flags = tsk->flags;
 	/*
 	 * If a worker goes to sleep, notify and ask workqueue whether it
 	 * wants to wake up a task to maintain concurrency.
@ linux-6.3.0-rt11/include/linux/sched.h:6752 @ static inline void sched_submit_work(str
 	blk_flush_plug(tsk->plug, true);
 }
 
-static void sched_update_worker(struct task_struct *tsk)
+void sched_resume_work(void)
 {
+	struct task_struct *tsk = current;
+
 	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
 		if (tsk->flags & PF_WQ_WORKER)
 			wq_worker_running(tsk);
@ linux-6.3.0-rt11/include/linux/sched.h:6764 @ static void sched_update_worker(struct t
 	}
 }
 
-asmlinkage __visible void __sched schedule(void)
+static void schedule_loop(unsigned int sched_mode)
 {
-	struct task_struct *tsk = current;
-
-	sched_submit_work(tsk);
 	do {
 		preempt_disable();
-		__schedule(SM_NONE);
+		__schedule(sched_mode);
 		sched_preempt_enable_no_resched();
 	} while (need_resched());
-	sched_update_worker(tsk);
+}
+
+asmlinkage __visible void __sched schedule(void)
+{
+	if (!task_is_running(current))
+		sched_submit_work();
+	schedule_loop(SM_NONE);
+	sched_resume_work();
 }
 EXPORT_SYMBOL(schedule);
 
+void schedule_rtmutex(void)
+{
+	schedule_loop(SM_NONE);
+}
+
 /*
  * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
  * state (have scheduled out non-voluntarily) by making sure that all
@ linux-6.3.0-rt11/include/linux/sched.h:6846 @ void __sched schedule_preempt_disabled(v
 #ifdef CONFIG_PREEMPT_RT
 void __sched notrace schedule_rtlock(void)
 {
-	do {
-		preempt_disable();
-		__schedule(SM_RTLOCK_WAIT);
-		sched_preempt_enable_no_resched();
-	} while (need_resched());
+	schedule_loop(SM_RTLOCK_WAIT);
 }
 NOKPROBE_SYMBOL(schedule_rtlock);
 #endif