Subject: printk: introduce kernel sync mode
From: John Ogness <john.ogness@linutronix.de>
Date: Mon Nov 30 01:42:06 2020 +0106

From: John Ogness <john.ogness@linutronix.de>

When the kernel performs an OOPS, enter into "sync mode":

- only atomic consoles (write_atomic() callback) will print
- printing occurs within vprintk_store() instead of console_unlock()

CONSOLE_LOG_MAX is moved to printk.h to support the per-console
buffer used in sync mode.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

---
 include/linux/console.h |    4 +
 include/linux/printk.h  |    6 +
 kernel/printk/printk.c  |  178 +++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 178 insertions(+), 10 deletions(-)
---
@ include/linux/console.h:19 @
 
 #include <linux/atomic.h>
 #include <linux/types.h>
+#include <linux/printk.h>
 
 struct vc_data;
 struct console_font_op;
@ include/linux/console.h:154 @ struct console {
 	short	flags;
 	short	index;
 	int	cflag;
+#ifdef CONFIG_PRINTK
+	char	sync_buf[CONSOLE_LOG_MAX];
+#endif
 	void	*data;
 	struct	 console *next;
 };
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@ include/linux/console.h:50 @ static inline const char *printk_skip_he
 
 #define CONSOLE_EXT_LOG_MAX	8192
 
+/*
+ * The maximum size of a record formatted for console printing
+ * (i.e. with the prefix prepended to every line).
+ */
+#define CONSOLE_LOG_MAX		1024
+
 /* printk's without a loglevel use this.. */
 #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
 
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@ include/linux/console.h:48 @
 #include <linux/ctype.h>
 #include <linux/uio.h>
 #include <linux/kgdb.h>
+#include <linux/clocksource.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
@ include/linux/console.h:359 @ static int console_msg_format = MSG_FORM
 static DEFINE_MUTEX(syslog_lock);
 
 #ifdef CONFIG_PRINTK
+/* Set to enable sync mode. Once set, it is never cleared. */
+static bool sync_mode;
+
 DECLARE_WAIT_QUEUE_HEAD(log_wait);
 /* All 3 protected by @syslog_lock. */
 /* the next printk record to read by syslog(READ) or /proc/kmsg */
@ include/linux/console.h:389 @ static struct latched_seq console_seq =
 	.val[1]		= 0,
 };
 
+static struct latched_seq console_sync_seq = {
+	.latch		= SEQCNT_LATCH_ZERO(console_sync_seq.latch),
+	.val[0]		= 0,
+	.val[1]		= 0,
+};
+
+#ifdef CONFIG_HAVE_NMI
+static struct latched_seq console_sync_nmi_seq = {
+	.latch		= SEQCNT_LATCH_ZERO(console_sync_nmi_seq.latch),
+	.val[0]		= 0,
+	.val[1]		= 0,
+};
+#endif
+
 /*
  * The next printk record to read after the last 'clear' command. There are
  * two copies (updated with seqcount_latch) so that reads can locklessly
@ include/linux/console.h:420 @ static struct latched_seq clear_seq = {
 #define PREFIX_MAX		32
 #endif
 
-/* the maximum size of a formatted record (i.e. with prefix added per line) */
-#define CONSOLE_LOG_MAX		1024
-
 /* the maximum size allowed to be reserved for a record */
 #define LOG_LINE_MAX		(CONSOLE_LOG_MAX - PREFIX_MAX)
 
@ include/linux/console.h:1791 @ static inline void printk_delay(int leve
 	}
 }
 
+static bool kernel_sync_mode(void)
+{
+	return (oops_in_progress || sync_mode);
+}
+
+static bool console_may_sync(struct console *con)
+{
+	if (!(con->flags & CON_ENABLED))
+		return false;
+	if (con->write_atomic && kernel_sync_mode())
+		return true;
+	return false;
+}
+
+static bool call_sync_console_driver(struct console *con, const char *text, size_t text_len)
+{
+	if (!(con->flags & CON_ENABLED))
+		return false;
+	if (con->write_atomic && kernel_sync_mode())
+		con->write_atomic(con, text, text_len);
+	else
+		return false;
+
+	return true;
+}
+
+static bool have_atomic_console(void)
+{
+	struct console *con;
+
+	for_each_console(con) {
+		if (!(con->flags & CON_ENABLED))
+			continue;
+		if (con->write_atomic)
+			return true;
+	}
+	return false;
+}
+
+static bool print_sync(struct console *con, u64 *seq)
+{
+	struct printk_info info;
+	struct printk_record r;
+	size_t text_len;
+
+	prb_rec_init_rd(&r, &info, &con->sync_buf[0], sizeof(con->sync_buf));
+
+	if (!prb_read_valid(prb, *seq, &r))
+		return false;
+
+	text_len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
+
+	if (!call_sync_console_driver(con, &con->sync_buf[0], text_len))
+		return false;
+
+	*seq = r.info->seq;
+
+	touch_softlockup_watchdog_sync();
+	clocksource_touch_watchdog();
+	rcu_cpu_stall_reset();
+	touch_nmi_watchdog();
+
+	if (text_len)
+		printk_delay(r.info->level);
+
+	return true;
+}
+
+static u64 read_console_seq(void)
+{
+	u64 seq2;
+	u64 seq;
+
+	seq = latched_seq_read_nolock(&console_seq);
+	seq2 = latched_seq_read_nolock(&console_sync_seq);
+	if (seq2 > seq)
+		seq = seq2;
+#ifdef CONFIG_HAVE_NMI
+	seq2 = latched_seq_read_nolock(&console_sync_nmi_seq);
+	if (seq2 > seq)
+		seq = seq2;
+#endif
+	return seq;
+}
+
+static void print_sync_until(struct console *con, u64 seq)
+{
+	u64 printk_seq;
+
+	while (!__printk_cpu_trylock())
+		cpu_relax();
+
+	for (;;) {
+		printk_seq = read_console_seq();
+		if (printk_seq >= seq)
+			break;
+		if (!print_sync(con, &printk_seq))
+			break;
+#ifdef CONFIG_PRINTK_NMI
+		if (in_nmi()) {
+			latched_seq_write(&console_sync_nmi_seq, printk_seq + 1);
+			continue;
+		}
+#endif
+		latched_seq_write(&console_sync_seq, printk_seq + 1);
+	}
+
+	__printk_cpu_unlock();
+}
+
 /*
  * Special console_lock variants that help to reduce the risk of soft-lockups.
  * They allow to pass console_lock to another printk() call using a busy wait.
@ include/linux/console.h:2075 @ static void call_console_drivers(const c
 		if (!cpu_online(smp_processor_id()) &&
 		    !(con->flags & CON_ANYTIME))
 			continue;
+		if (kernel_sync_mode())
+			continue;
 		if (con->flags & CON_EXTENDED)
 			con->write(con, ext_text, ext_len);
 		else {
@ include/linux/console.h:2244 @ int vprintk_store(int facility, int leve
 	const u32 caller_id = printk_caller_id();
 	struct prb_reserved_entry e;
 	enum printk_info_flags flags = 0;
+	bool final_commit = false;
 	struct printk_record r;
 	unsigned long irqflags;
 	u16 trunc_msg_len = 0;
@ include/linux/console.h:2255 @ int vprintk_store(int facility, int leve
 	u16 text_len;
 	int ret = 0;
 	u64 ts_nsec;
+	u64 seq;
 
 	/*
 	 * Since the duration of printk() can vary depending on the message
@ include/linux/console.h:2294 @ int vprintk_store(int facility, int leve
 	if (flags & LOG_CONT) {
 		prb_rec_init_wr(&r, reserve_size);
 		if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
+			seq = r.info->seq;
 			text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
 						 facility, &flags, fmt, args);
 			r.info->text_len += text_len;
@ include/linux/console.h:2302 @ int vprintk_store(int facility, int leve
 			if (flags & LOG_NEWLINE) {
 				r.info->flags |= LOG_NEWLINE;
 				prb_final_commit(&e);
+				final_commit = true;
 			} else {
 				prb_commit(&e);
 			}
@ include/linux/console.h:2326 @ int vprintk_store(int facility, int leve
 		if (!prb_reserve(&e, prb, &r))
 			goto out;
 	}
+	seq = r.info->seq;
 
 	/* fill message */
 	text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args);
@ include/linux/console.h:2342 @ int vprintk_store(int facility, int leve
 		memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
 
 	/* A message without a trailing newline can be continued. */
-	if (!(flags & LOG_NEWLINE))
+	if (!(flags & LOG_NEWLINE)) {
 		prb_commit(&e);
-	else
+	} else {
 		prb_final_commit(&e);
+		final_commit = true;
+	}
 
 	ret = text_len + trunc_msg_len;
 out:
+	/* only the kernel may perform synchronous printing */
+	if (facility == 0 && final_commit) {
+		struct console *con;
+
+		for_each_console(con) {
+			if (console_may_sync(con))
+				print_sync_until(con, seq + 1);
+		}
+	}
+
 	printk_exit_irqrestore(recursion_ptr, irqflags);
 	return ret;
 }
@ include/linux/console.h:2429 @ EXPORT_SYMBOL(_printk);
 
 #else /* CONFIG_PRINTK */
 
-#define CONSOLE_LOG_MAX		0
 #define printk_time		false
 
 #define prb_read_valid(rb, seq, r)	false
 #define prb_first_valid_seq(rb)		0
-#define latched_seq_read_nolock(seq)	0
+#define read_console_seq()		0
 #define latched_seq_write(dst, src)
+#define kernel_sync_mode()		false
 
 static u64 exclusive_console_stop_seq;
 static unsigned long console_dropped;
@ include/linux/console.h:2739 @ static int have_callable_console(void)
  */
 static inline int can_use_console(void)
 {
+	if (kernel_sync_mode())
+		return false;
 	return cpu_online(raw_smp_processor_id()) || have_callable_console();
 }
 
@ include/linux/console.h:2810 @ void console_unlock(void)
 		size_t len;
 
 skip:
-		seq = latched_seq_read_nolock(&console_seq);
+		seq = read_console_seq();
 		if (!prb_read_valid(prb, seq, &r))
 			break;
 
@ include/linux/console.h:2890 @ void console_unlock(void)
 	 * there's a new owner and the console_unlock() from them will do the
 	 * flush, no worries.
 	 */
-	retry = prb_read_valid(prb, latched_seq_read_nolock(&console_seq), NULL);
+	retry = prb_read_valid(prb, read_console_seq(), NULL);
 	if (retry && console_trylock())
 		goto again;
 }
@ include/linux/console.h:3190 @ void register_console(struct console *ne
 		 * ignores console_lock.
 		 */
 		exclusive_console = newcon;
-		exclusive_console_stop_seq = latched_seq_read_nolock(&console_seq);
+		exclusive_console_stop_seq = read_console_seq();
 
 		/* Get a consistent copy of @syslog_seq. */
 		mutex_lock(&syslog_lock);
@ include/linux/console.h:3560 @ void kmsg_dump(enum kmsg_dump_reason rea
 {
 	struct kmsg_dumper *dumper;
 
+	if (!oops_in_progress) {
+		/*
+		 * If atomic consoles are available, activate kernel sync mode
+		 * to make sure any final messages are visible. The trailing
+		 * printk message is important to flush any pending messages.
+		 */
+		if (have_atomic_console()) {
+			sync_mode = true;
+			pr_info("enabled sync mode\n");
+		}
+	}
+
 	rcu_read_lock();
 	list_for_each_entry_rcu(dumper, &dump_list, list) {
 		enum kmsg_dump_reason max_reason = dumper->max_reason;