From fa693acffafd61a4cca3c5d08766926121492997 Mon Sep 17 00:00:00 2001
From: "Ahmed S. Darwish" <a.darwish@linutronix.de>
Date: Sat, 16 Oct 2021 10:49:07 +0200
Subject: [PATCH 043/158] net: sched: Protect Qdisc::bstats with u64_stats

The not-per-CPU variant of qdisc tc (traffic control) statistics,
Qdisc::gnet_stats_basic_packed bstats, is protected with Qdisc::running
sequence counter.

This sequence counter is used for reliably protecting bstats reads from
parallel writes. Meanwhile, the seqcount's write section covers a much
wider area than bstats update: qdisc_run_begin() => qdisc_run_end().

That read/write section asymmetry can lead to needless retries of the
read section. To prepare for removing the Qdisc::running sequence
counter altogether, introduce a u64_stats sync point inside bstats
instead.

Modify _bstats_update() to start/end the bstats u64_stats write
section.

For bisectability, and finer commits granularity, the bstats read
section is still protected with a Qdisc::running read/retry loop and
qdisc_run_begin/end() still starts/ends that seqcount write section.
Once all call sites are modified to use _bstats_update(), the
Qdisc::running seqcount will be removed and bstats read/retry loop will
be modified to utilize the internal u64_stats sync point.

Note, using u64_stats implies no sequence counter protection for 64-bit
architectures. This can lead to the statistics "packets" vs. "bytes"
values getting out of sync on rare occasions. The individual values will
still be valid.

[bigeasy: Minor commit message edits, init all gnet_stats_basic_packed.]

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gen_stats.h    |  2 ++
 include/net/sch_generic.h  |  2 ++
 net/core/gen_estimator.c   |  2 +-
 net/core/gen_stats.c       | 14 ++++++++++++--
 net/netfilter/xt_RATEEST.c |  1 +
 net/sched/act_api.c        |  2 ++
 net/sched/sch_atm.c        |  1 +
 net/sched/sch_cbq.c        |  1 +
 net/sched/sch_drr.c        |  1 +
 net/sched/sch_ets.c        |  2 +-
 net/sched/sch_generic.c    |  1 +
 net/sched/sch_gred.c       |  4 +++-
 net/sched/sch_hfsc.c       |  1 +
 net/sched/sch_htb.c        |  7 +++++--
 net/sched/sch_mq.c         |  2 +-
 net/sched/sch_mqprio.c     |  5 +++--
 net/sched/sch_qfq.c        |  1 +
 17 files changed, 39 insertions(+), 10 deletions(-)

Index: linux-5.15.19-rt29/include/net/gen_stats.h
===================================================================
--- linux-5.15.19-rt29.orig/include/net/gen_stats.h
+++ linux-5.15.19-rt29/include/net/gen_stats.h
@@ -11,6 +11,7 @@
 struct gnet_stats_basic_packed {
 	__u64	bytes;
 	__u64	packets;
+	struct u64_stats_sync syncp;
 };
 
 struct gnet_stats_basic_cpu {
@@ -34,6 +35,7 @@ struct gnet_dump {
 	struct tc_stats   tc_stats;
 };
 
+void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b);
 int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
 			  struct gnet_dump *d, int padattr);
 
Index: linux-5.15.19-rt29/include/net/sch_generic.h
===================================================================
--- linux-5.15.19-rt29.orig/include/net/sch_generic.h
+++ linux-5.15.19-rt29/include/net/sch_generic.h
@@ -850,8 +850,10 @@ static inline int qdisc_enqueue(struct s
 static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
 				  __u64 bytes, __u32 packets)
 {
+	u64_stats_update_begin(&bstats->syncp);
 	bstats->bytes += bytes;
 	bstats->packets += packets;
+	u64_stats_update_end(&bstats->syncp);
 }
 
 static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
Index: linux-5.15.19-rt29/net/core/gen_estimator.c
===================================================================
--- linux-5.15.19-rt29.orig/net/core/gen_estimator.c
+++ linux-5.15.19-rt29/net/core/gen_estimator.c
@@ -62,7 +62,7 @@ struct net_rate_estimator {
 static void est_fetch_counters(struct net_rate_estimator *e,
 			       struct gnet_stats_basic_packed *b)
 {
-	memset(b, 0, sizeof(*b));
+	gnet_stats_basic_packed_init(b);
 	if (e->stats_lock)
 		spin_lock(e->stats_lock);
 
Index: linux-5.15.19-rt29/net/core/gen_stats.c
===================================================================
--- linux-5.15.19-rt29.orig/net/core/gen_stats.c
+++ linux-5.15.19-rt29/net/core/gen_stats.c
@@ -18,7 +18,7 @@
 #include <linux/gen_stats.h>
 #include <net/netlink.h>
 #include <net/gen_stats.h>
-
+#include <net/sch_generic.h>
 
 static inline int
 gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
@@ -114,6 +114,15 @@ gnet_stats_start_copy(struct sk_buff *sk
 }
 EXPORT_SYMBOL(gnet_stats_start_copy);
 
+/* Must not be inlined, due to u64_stats seqcount_t lockdep key */
+void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b)
+{
+	b->bytes = 0;
+	b->packets = 0;
+	u64_stats_init(&b->syncp);
+}
+EXPORT_SYMBOL(gnet_stats_basic_packed_init);
+
 static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats,
 				     struct gnet_stats_basic_cpu __percpu *cpu)
 {
@@ -167,8 +176,9 @@ ___gnet_stats_copy_basic(const seqcount_
 			 struct gnet_stats_basic_packed *b,
 			 int type)
 {
-	struct gnet_stats_basic_packed bstats = {0};
+	struct gnet_stats_basic_packed bstats;
 
+	gnet_stats_basic_packed_init(&bstats);
 	gnet_stats_add_basic(running, &bstats, cpu, b);
 
 	if (d->compat_tc_stats && type == TCA_STATS_BASIC) {
Index: linux-5.15.19-rt29/net/netfilter/xt_RATEEST.c
===================================================================
--- linux-5.15.19-rt29.orig/net/netfilter/xt_RATEEST.c
+++ linux-5.15.19-rt29/net/netfilter/xt_RATEEST.c
@@ -143,6 +143,7 @@ static int xt_rateest_tg_checkentry(cons
 	if (!est)
 		goto err1;
 
+	gnet_stats_basic_packed_init(&est->bstats);
 	strlcpy(est->name, info->name, sizeof(est->name));
 	spin_lock_init(&est->lock);
 	est->refcnt		= 1;
Index: linux-5.15.19-rt29/net/sched/act_api.c
===================================================================
--- linux-5.15.19-rt29.orig/net/sched/act_api.c
+++ linux-5.15.19-rt29/net/sched/act_api.c
@@ -490,6 +490,8 @@ int tcf_idr_create(struct tc_action_net
 		if (!p->cpu_qstats)
 			goto err3;
 	}
+	gnet_stats_basic_packed_init(&p->tcfa_bstats);
+	gnet_stats_basic_packed_init(&p->tcfa_bstats_hw);
 	spin_lock_init(&p->tcfa_lock);
 	p->tcfa_index = index;
 	p->tcfa_tm.install = jiffies;
Index: linux-5.15.19-rt29/net/sched/sch_atm.c
===================================================================
--- linux-5.15.19-rt29.orig/net/sched/sch_atm.c
+++ linux-5.15.19-rt29/net/sched/sch_atm.c
@@ -548,6 +548,7 @@ static int atm_tc_init(struct Qdisc *sch
 	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
 	INIT_LIST_HEAD(&p->flows);
 	INIT_LIST_HEAD(&p->link.list);
+	gnet_stats_basic_packed_init(&p->link.bstats);
 	list_add(&p->link.list, &p->flows);
 	p->link.q = qdisc_create_dflt(sch->dev_queue,
 				      &pfifo_qdisc_ops, sch->handle, extack);
Index: linux-5.15.19-rt29/net/sched/sch_cbq.c
===================================================================
--- linux-5.15.19-rt29.orig/net/sched/sch_cbq.c
+++ linux-5.15.19-rt29/net/sched/sch_cbq.c
@@ -1611,6 +1611,7 @@ cbq_change_class(struct Qdisc *sch, u32
 	if (cl == NULL)
 		goto failure;
 
+	gnet_stats_basic_packed_init(&cl->bstats);
 	err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
 	if (err) {
 		kfree(cl);
Index: linux-5.15.19-rt29/net/sched/sch_drr.c
===================================================================
--- linux-5.15.19-rt29.orig/net/sched/sch_drr.c
+++ linux-5.15.19-rt29/net/sched/sch_drr.c
@@ -106,6 +106,7 @@ static int drr_change_class(struct Qdisc
 	if (cl == NULL)
 		return -ENOBUFS;
 
+	gnet_stats_basic_packed_init(&cl->bstats);
 	cl->common.classid = classid;
 	cl->quantum	   = quantum;
 	cl->qdisc	   = qdisc_create_dflt(sch->dev_queue,
Index: linux-5.15.19-rt29/net/sched/sch_ets.c
===================================================================
--- linux-5.15.19-rt29.orig/net/sched/sch_ets.c
+++ linux-5.15.19-rt29/net/sched/sch_ets.c
@@ -691,7 +691,7 @@ static int ets_qdisc_change(struct Qdisc
 		q->classes[i].qdisc = NULL;
 		q->classes[i].quantum = 0;
 		q->classes[i].deficit = 0;
-		memset(&q->classes[i].bstats, 0, sizeof(q->classes[i].bstats));
+		gnet_stats_basic_packed_init(&q->classes[i].bstats);
 		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
 	}
 	return 0;
Index: linux-5.15.19-rt29/net/sched/sch_generic.c
===================================================================
--- linux-5.15.19-rt29.orig/net/sched/sch_generic.c
+++ linux-5.15.19-rt29/net/sched/sch_generic.c
@@ -892,6 +892,7 @@ struct Qdisc *qdisc_alloc(struct netdev_
 	__skb_queue_head_init(&sch->gso_skb);
 	__skb_queue_head_init(&sch->skb_bad_txq);
 	qdisc_skb_head_init(&sch->q);
+	gnet_stats_basic_packed_init(&sch->bstats);
 	spin_lock_init(&sch->q.lock);
 
 	if (ops->static_flags & TCQ_F_CPUSTATS) {
Index: linux-5.15.19-rt29/net/sched/sch_gred.c
===================================================================
--- linux-5.15.19-rt29.orig/net/sched/sch_gred.c
+++ linux-5.15.19-rt29/net/sched/sch_gred.c
@@ -364,9 +364,11 @@ static int gred_offload_dump_stats(struc
 	hw_stats->handle = sch->handle;
 	hw_stats->parent = sch->parent;
 
-	for (i = 0; i < MAX_DPs; i++)
+	for (i = 0; i < MAX_DPs; i++) {
+		gnet_stats_basic_packed_init(&hw_stats->stats.bstats[i]);
 		if (table->tab[i])
 			hw_stats->stats.xstats[i] = &table->tab[i]->stats;
+	}
 
 	ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats);
 	/* Even if driver returns failure adjust the stats - in case offload
Index: linux-5.15.19-rt29/net/sched/sch_hfsc.c
===================================================================
--- linux-5.15.19-rt29.orig/net/sched/sch_hfsc.c
+++ linux-5.15.19-rt29/net/sched/sch_hfsc.c
@@ -1406,6 +1406,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struc
 	if (err)
 		return err;
 
+	gnet_stats_basic_packed_init(&q->root.bstats);
 	q->root.cl_common.classid = sch->handle;
 	q->root.sched   = q;
 	q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
Index: linux-5.15.19-rt29/net/sched/sch_htb.c
===================================================================
--- linux-5.15.19-rt29.orig/net/sched/sch_htb.c
+++ linux-5.15.19-rt29/net/sched/sch_htb.c
@@ -1311,7 +1311,7 @@ static void htb_offload_aggregate_stats(
 	struct htb_class *c;
 	unsigned int i;
 
-	memset(&cl->bstats, 0, sizeof(cl->bstats));
+	gnet_stats_basic_packed_init(&cl->bstats);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
@@ -1357,7 +1357,7 @@ htb_dump_class_stats(struct Qdisc *sch,
 			if (cl->leaf.q)
 				cl->bstats = cl->leaf.q->bstats;
 			else
-				memset(&cl->bstats, 0, sizeof(cl->bstats));
+				gnet_stats_basic_packed_init(&cl->bstats);
 			cl->bstats.bytes += cl->bstats_bias.bytes;
 			cl->bstats.packets += cl->bstats_bias.packets;
 		} else {
@@ -1869,6 +1869,9 @@ static int htb_change_class(struct Qdisc
 		if (!cl)
 			goto failure;
 
+		gnet_stats_basic_packed_init(&cl->bstats);
+		gnet_stats_basic_packed_init(&cl->bstats_bias);
+
 		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
 		if (err) {
 			kfree(cl);
Index: linux-5.15.19-rt29/net/sched/sch_mq.c
===================================================================
--- linux-5.15.19-rt29.orig/net/sched/sch_mq.c
+++ linux-5.15.19-rt29/net/sched/sch_mq.c
@@ -155,7 +155,7 @@ static int mq_dump(struct Qdisc *sch, st
 	unsigned int ntx;
 
 	sch->q.qlen = 0;
-	memset(&sch->bstats, 0, sizeof(sch->bstats));
+	gnet_stats_basic_packed_init(&sch->bstats);
 	memset(&sch->qstats, 0, sizeof(sch->qstats));
 
 	/* MQ supports lockless qdiscs. However, statistics accounting needs
Index: linux-5.15.19-rt29/net/sched/sch_mqprio.c
===================================================================
--- linux-5.15.19-rt29.orig/net/sched/sch_mqprio.c
+++ linux-5.15.19-rt29/net/sched/sch_mqprio.c
@@ -412,7 +412,7 @@ static int mqprio_dump(struct Qdisc *sch
 	unsigned int ntx, tc;
 
 	sch->q.qlen = 0;
-	memset(&sch->bstats, 0, sizeof(sch->bstats));
+	gnet_stats_basic_packed_init(&sch->bstats);
 	memset(&sch->qstats, 0, sizeof(sch->qstats));
 
 	/* MQ supports lockless qdiscs. However, statistics accounting needs
@@ -522,10 +522,11 @@ static int mqprio_dump_class_stats(struc
 		int i;
 		__u32 qlen;
 		struct gnet_stats_queue qstats = {0};
-		struct gnet_stats_basic_packed bstats = {0};
+		struct gnet_stats_basic_packed bstats;
 		struct net_device *dev = qdisc_dev(sch);
 		struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
 
+		gnet_stats_basic_packed_init(&bstats);
 		/* Drop lock here it will be reclaimed before touching
 		 * statistics this is required because the d->lock we
 		 * hold here is the look on dev_queue->qdisc_sleeping
Index: linux-5.15.19-rt29/net/sched/sch_qfq.c
===================================================================
--- linux-5.15.19-rt29.orig/net/sched/sch_qfq.c
+++ linux-5.15.19-rt29/net/sched/sch_qfq.c
@@ -465,6 +465,7 @@ static int qfq_change_class(struct Qdisc
 	if (cl == NULL)
 		return -ENOBUFS;
 
+	gnet_stats_basic_packed_init(&cl->bstats);
 	cl->common.classid = classid;
 	cl->deficit = lmax;