diff options
Diffstat (limited to 'target/linux/generic-2.6/patches/200-sched_esfq.patch')
| -rw-r--r-- | target/linux/generic-2.6/patches/200-sched_esfq.patch | 215 | 
1 files changed, 139 insertions, 76 deletions
diff --git a/target/linux/generic-2.6/patches/200-sched_esfq.patch b/target/linux/generic-2.6/patches/200-sched_esfq.patch index 6830b833a..dba20cff2 100644 --- a/target/linux/generic-2.6/patches/200-sched_esfq.patch +++ b/target/linux/generic-2.6/patches/200-sched_esfq.patch @@ -1,7 +1,7 @@ -diff -urN linux-2.6.19.old/include/linux/pkt_sched.h linux-2.6.19.dev/include/linux/pkt_sched.h ---- linux-2.6.19.old/include/linux/pkt_sched.h	2006-11-29 22:57:37.000000000 +0100 -+++ linux-2.6.19.dev/include/linux/pkt_sched.h	2006-12-14 03:13:51.000000000 +0100 -@@ -146,8 +146,35 @@ +diff -Naur linux-2.6.20.orig/include/linux/pkt_sched.h linux-2.6.20/include/linux/pkt_sched.h +--- linux-2.6.20.orig/include/linux/pkt_sched.h	2007-02-04 10:44:54.000000000 -0800 ++++ linux-2.6.20/include/linux/pkt_sched.h	2007-02-14 23:58:41.000000000 -0800 +@@ -146,8 +146,40 @@    *    *	The only reason for this is efficiency, it is possible    *	to change these parameters in compile time. @@ -22,6 +22,11 @@ diff -urN linux-2.6.19.old/include/linux/pkt_sched.h linux-2.6.19.dev/include/li  +	TCA_SFQ_HASH_DSTDIR,  +	TCA_SFQ_HASH_SRCDIR,  +	TCA_SFQ_HASH_FWMARKDIR, ++	/* conntrack */ ++	TCA_SFQ_HASH_CTORIGDST, ++	TCA_SFQ_HASH_CTORIGSRC, ++	TCA_SFQ_HASH_CTREPLDST, ++	TCA_SFQ_HASH_CTREPLSRC,  +};  +  +struct tc_esfq_qopt @@ -37,31 +42,29 @@ diff -urN linux-2.6.19.old/include/linux/pkt_sched.h linux-2.6.19.dev/include/li   /* RED section */   enum -diff -urN linux-2.6.19.old/net/sched/Kconfig linux-2.6.19.dev/net/sched/Kconfig ---- linux-2.6.19.old/net/sched/Kconfig	2006-11-29 22:57:37.000000000 +0100 -+++ linux-2.6.19.dev/net/sched/Kconfig	2006-12-14 03:13:51.000000000 +0100 -@@ -185,6 +185,28 @@ +diff -Naur linux-2.6.20.orig/net/sched/Kconfig linux-2.6.20/net/sched/Kconfig +--- linux-2.6.20.orig/net/sched/Kconfig	2007-02-04 10:44:54.000000000 -0800 ++++ linux-2.6.20/net/sched/Kconfig	2007-02-14 23:58:41.000000000 -0800 +@@ -189,6 +189,26 @@   	  To compile this code as a module, choose M here: the   	  module will be called sch_sfq.  +config NET_SCH_ESFQ -+	tristate "ESFQ queue" -+	depends on NET_SCHED ++	tristate "Enhanced Stochastic Fairness Queueing (ESFQ)"  +	---help---  +	  Say Y here if you want to use the Enhanced Stochastic Fairness  +	  Queueing (ESFQ) packet scheduling algorithm for some of your network  +	  devices or as a leaf discipline for a classful qdisc such as HTB or  +	  CBQ (see the top of <file:net/sched/sch_esfq.c> for details and  +	  references to the SFQ algorithm). -+	   ++  +	  This is an enchanced SFQ version which allows you to control some -+	  hardcoded values in the SFQ scheduler: queue depth, hash table size, -+	  and queues limit. -+	   -+	  ESFQ also adds control to the hash function used to identify packet -+	  flows. The original SFQ hashes by individual flow (TCP session or UDP -+	  stream); ESFQ can hash by src or dst IP as well, which can be more -+	  fair to users in some networking situations. ++	  hardcoded values in the SFQ scheduler. ++ ++	  ESFQ also adds control of the hash function used to identify packet ++	  flows. The original SFQ discipline hashes by connection; ESFQ add ++	  several other hashing methods, such as by src IP or by dst IP, which ++	  can be more fair to users in some networking situations.  +	    +	  To compile this code as a module, choose M here: the  +	  module will be called sch_esfq. @@ -69,10 +72,10 @@ diff -urN linux-2.6.19.old/net/sched/Kconfig linux-2.6.19.dev/net/sched/Kconfig   config NET_SCH_TEQL   	tristate "True Link Equalizer (TEQL)"   	---help--- -diff -urN linux-2.6.19.old/net/sched/Makefile linux-2.6.19.dev/net/sched/Makefile ---- linux-2.6.19.old/net/sched/Makefile	2006-11-29 22:57:37.000000000 +0100 -+++ linux-2.6.19.dev/net/sched/Makefile	2006-12-14 03:13:51.000000000 +0100 -@@ -23,6 +23,7 @@ +diff -Naur linux-2.6.20.orig/net/sched/Makefile linux-2.6.20/net/sched/Makefile +--- linux-2.6.20.orig/net/sched/Makefile	2007-02-04 10:44:54.000000000 -0800 ++++ linux-2.6.20/net/sched/Makefile	2007-02-14 23:58:41.000000000 -0800 +@@ -24,6 +24,7 @@   obj-$(CONFIG_NET_SCH_INGRESS)	+= sch_ingress.o    obj-$(CONFIG_NET_SCH_DSMARK)	+= sch_dsmark.o   obj-$(CONFIG_NET_SCH_SFQ)	+= sch_sfq.o @@ -80,10 +83,10 @@ diff -urN linux-2.6.19.old/net/sched/Makefile linux-2.6.19.dev/net/sched/Makefil   obj-$(CONFIG_NET_SCH_TBF)	+= sch_tbf.o   obj-$(CONFIG_NET_SCH_TEQL)	+= sch_teql.o   obj-$(CONFIG_NET_SCH_PRIO)	+= sch_prio.o -diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_esfq.c ---- linux-2.6.19.old/net/sched/sch_esfq.c	1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.19.dev/net/sched/sch_esfq.c	2006-12-14 03:13:51.000000000 +0100 -@@ -0,0 +1,644 @@ +diff -Naur linux-2.6.20.orig/net/sched/sch_esfq.c linux-2.6.20/net/sched/sch_esfq.c +--- linux-2.6.20.orig/net/sched/sch_esfq.c	1969-12-31 16:00:00.000000000 -0800 ++++ linux-2.6.20/net/sched/sch_esfq.c	2007-02-15 00:19:56.000000000 -0800 +@@ -0,0 +1,704 @@  +/*  + * net/sched/sch_esfq.c	Extended Stochastic Fairness Queueing discipline.  + * @@ -103,12 +106,12 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e  + *  + * 		Corey Hickey, <bugfood-c@fatooh.org>  + *		Maintenance of the Linux 2.6 port. -+ *		Added fwmark hash (thanks to Robert Kurjata) ++ *		Added fwmark hash (thanks to Robert Kurjata).  + *		Added direct hashing for src, dst, and fwmark. ++ *		Added usage of jhash.  + *		  + */  + -+#include <linux/autoconf.h>  +#include <linux/module.h>  +#include <asm/uaccess.h>  +#include <asm/system.h> @@ -135,12 +138,16 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e  +#include <linux/skbuff.h>  +#include <net/sock.h>  +#include <net/pkt_sched.h> ++#include <linux/jhash.h>  + ++#ifdef CONFIG_NF_CONNTRACK_ENABLED ++#include <net/netfilter/nf_conntrack.h> ++#endif  +  +/*	Stochastic Fairness Queuing algorithm.  +	For more comments look at sch_sfq.c.  +	The difference is that you can change limit, depth, -+	hash table size and choose 7 hash types. ++	hash table size and choose alternate hash types.  +	  +	classic:	same as in sch_sfq.c  +	dst:		destination IP address @@ -149,9 +156,11 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e  +	dst_direct:  +	src_direct:  +	fwmark_direct:  direct hashing of the above sources ++	ctorigdst:	original destination IP address ++	ctorigsrc:	original source IP address ++	ctrepldst:	reply destination IP address ++	ctreplsrc:	reply source IP   +	 -+	TODO:  -+		make sfq_change work.  +*/  +  + @@ -190,20 +199,24 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e  +	unsigned	dyn_range;	/*	        		   saved range */  +};  + -+static __inline__ unsigned esfq_hash_u32(struct esfq_sched_data *q,u32 h) ++/* This contains the info we will hash. */ ++struct esfq_packet_info  +{ -+	int pert = q->perturbation; -+ -+	if (pert) -+		h = (h<<pert) ^ (h>>(0x1F - pert)); -+ -+	h = ntohl(h) * 2654435761UL; -+	return h & (q->hash_divisor-1); -+} ++	u32	proto;		/* protocol or port */ ++	u32	src;		/* source from packet header */ ++	u32	dst;		/* destination from packet header */ ++	u32	ctorigsrc;	/* original source from conntrack */ ++	u32	ctorigdst;	/* original destination from conntrack */ ++	u32	ctreplsrc;	/* reply source from conntrack */ ++	u32	ctrepldst;	/* reply destination from conntrack */ ++	u32	mark;		/* netfilter mark (fwmark) */ ++};  +  +/* Hash input values directly into the "nearest" slot, taking into account the  + * range of input values seen. This is most useful when the hash table is at -+ * least as large as the range of possible values. */ ++ * least as large as the range of possible values. ++ * Note: this functionality was added before the change to using jhash, and may ++ * no longer be useful. */  +static __inline__ unsigned esfq_hash_direct(struct esfq_sched_data *q, u32 h)  +{  +	/* adjust minimum and maximum */ @@ -224,83 +237,128 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e  +		return (h - q->dyn_min) * (q->hash_divisor - 1) / q->dyn_range;  +}  + -+static __inline__ unsigned esfq_fold_hash_classic(struct esfq_sched_data *q, u32 h, u32 h1) ++static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a)  +{ -+	int pert = q->perturbation; ++	return jhash_1word(a, q->perturbation) & (q->hash_divisor-1); ++}  + -+	/* Have we any rotation primitives? If not, WHY? */ -+	h ^= (h1<<pert) ^ (h1>>(0x1F - pert)); -+	h ^= h>>10; -+	return h & (q->hash_divisor-1); ++static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b) ++{ ++	return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1);  +}  + -+static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) ++static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c)  +{ -+	u32 h, h2; -+	u32 hs; -+	u32 nfm; ++	return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1); ++}  + ++ ++static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) ++{ ++	struct esfq_packet_info info; ++#ifdef CONFIG_NF_CONNTRACK_ENABLED ++	enum ip_conntrack_info ctinfo; ++	struct nf_conn *ct = nf_ct_get(skb, &ctinfo); ++#endif ++	  +	switch (skb->protocol) {  +	case __constant_htons(ETH_P_IP):  +	{  +		struct iphdr *iph = skb->nh.iph; -+		h = iph->daddr; -+		hs = iph->saddr; -+		nfm = skb->nfmark; -+		h2 = hs^iph->protocol; ++		info.dst = iph->daddr; ++		info.src = iph->saddr;  +		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&  +		    (iph->protocol == IPPROTO_TCP ||  +		     iph->protocol == IPPROTO_UDP ||  +		     iph->protocol == IPPROTO_SCTP ||  +		     iph->protocol == IPPROTO_DCCP ||  +		     iph->protocol == IPPROTO_ESP)) -+			h2 ^= *(((u32*)iph) + iph->ihl); ++			info.proto = *(((u32*)iph) + iph->ihl); ++		else ++			info.proto = iph->protocol;  +		break;  +	}  +	case __constant_htons(ETH_P_IPV6):  +	{  +		struct ipv6hdr *iph = skb->nh.ipv6h; -+		h = iph->daddr.s6_addr32[3]; -+		hs = iph->saddr.s6_addr32[3]; -+		nfm = skb->nfmark; -+		h2 = hs^iph->nexthdr; ++		/* Hash ipv6 addresses into a u32. This isn't ideal, ++		 * but the code is simple. */ ++		info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation); ++		info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation);  +		if (iph->nexthdr == IPPROTO_TCP ||  +		    iph->nexthdr == IPPROTO_UDP ||  +		    iph->nexthdr == IPPROTO_SCTP ||  +		    iph->nexthdr == IPPROTO_DCCP ||  +		    iph->nexthdr == IPPROTO_ESP) -+			h2 ^= *(u32*)&iph[1]; ++			info.proto = *(u32*)&iph[1]; ++		else ++			info.proto = iph->nexthdr;  +		break;  +	}  +	default: -+		h = (u32)(unsigned long)skb->dst; -+		hs = (u32)(unsigned long)skb->sk; -+		nfm = skb->nfmark; -+		h2 = hs^skb->protocol; ++		info.dst   = (u32)(unsigned long)skb->dst; ++		info.src   = (u32)(unsigned long)skb->sk; ++		info.proto = skb->protocol; ++	} ++ ++	info.mark = skb->mark; ++ ++#ifdef CONFIG_NF_CONNTRACK_ENABLED ++	/* defaults if there is no conntrack info */ ++	info.ctorigsrc = info.src; ++	info.ctorigdst = info.dst; ++	info.ctreplsrc = info.dst; ++	info.ctrepldst = info.src; ++	/* collect conntrack info */ ++	if (ct && ct != &nf_conntrack_untracked) { ++		if (skb->protocol == __constant_htons(ETH_P_IP)) { ++			info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; ++			info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip; ++			info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip; ++			info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip; ++		} ++		else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { ++			/* Again, hash ipv6 addresses into a single u32. */ ++			info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation); ++			info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation); ++			info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation); ++			info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation); ++		} ++  +	} ++#endif ++  +	switch(q->hash_kind)  +	{  +	case TCA_SFQ_HASH_CLASSIC: -+		return esfq_fold_hash_classic(q, h, h2); ++		return esfq_jhash_3words(q, info.dst, info.src, info.proto);  +	case TCA_SFQ_HASH_DST: -+		return esfq_hash_u32(q,h); ++		return esfq_jhash_1word(q, info.dst);  +	case TCA_SFQ_HASH_DSTDIR: -+		return esfq_hash_direct(q, ntohl(h)); ++		return esfq_hash_direct(q, ntohl(info.dst));  +	case TCA_SFQ_HASH_SRC: -+		return esfq_hash_u32(q,hs); ++		return esfq_jhash_1word(q, info.src);  +	case TCA_SFQ_HASH_SRCDIR: -+		return esfq_hash_direct(q, ntohl(hs)); -+#ifdef CONFIG_NETFILTER ++		return esfq_hash_direct(q, ntohl(info.src));  +	case TCA_SFQ_HASH_FWMARK: -+		return esfq_hash_u32(q,nfm); ++		return esfq_jhash_1word(q, info.mark);  +	case TCA_SFQ_HASH_FWMARKDIR: -+		return esfq_hash_direct(q,nfm); ++		return esfq_hash_direct(q, info.mark); ++#ifdef CONFIG_NF_CONNTRACK_ENABLED ++	case TCA_SFQ_HASH_CTORIGDST: ++		return esfq_jhash_1word(q, info.ctorigdst); ++	case TCA_SFQ_HASH_CTORIGSRC: ++		return esfq_jhash_1word(q, info.ctorigsrc); ++	case TCA_SFQ_HASH_CTREPLDST: ++		return esfq_jhash_1word(q, info.ctrepldst); ++	case TCA_SFQ_HASH_CTREPLSRC: ++		return esfq_jhash_1word(q, info.ctreplsrc);  +#endif  +	default:  +		if (net_ratelimit())  +			printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n");  +	} -+	return esfq_fold_hash_classic(q, h, h2); ++	return esfq_jhash_3words(q, info.dst, info.src, info.proto);  +}  +  +static inline void esfq_link(struct esfq_sched_data *q, esfq_index x) @@ -365,6 +423,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e  +		esfq_dec(q, x);  +		sch->q.qlen--;  +		sch->qstats.drops++; ++		sch->qstats.backlog -= len;  +		return len;  +	}  + @@ -381,6 +440,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e  +		sch->q.qlen--;  +		q->ht[q->hash[d]] = q->depth;  +		sch->qstats.drops++; ++		sch->qstats.backlog -= len;  +		return len;  +	}  + @@ -400,6 +460,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e  +		q->ht[hash] = x = q->dep[depth].next;  +		q->hash[x] = hash;  +	} ++	sch->qstats.backlog += skb->len;  +	__skb_queue_tail(&q->qs[x], skb);  +	esfq_inc(q, x);  +	if (q->qs[x].qlen == 1) {		/* The flow is new */ @@ -436,6 +497,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e  +		q->ht[hash] = x = q->dep[depth].next;  +		q->hash[x] = hash;  +	} ++	sch->qstats.backlog += skb->len;  +	__skb_queue_head(&q->qs[x], skb);  +	esfq_inc(q, x);  +	if (q->qs[x].qlen == 1) {		/* The flow is new */ @@ -480,6 +542,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e  +	skb = __skb_dequeue(&q->qs[a]);  +	esfq_dec(q, a);  +	sch->q.qlen--; ++	sch->qstats.backlog -= skb->len;  +	  +	/* Is the slot empty? */  +	if (q->qs[a].qlen == 0) { @@ -542,7 +605,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e  +	  +	if (ctl->hash_kind) {  +		q->hash_kind = ctl->hash_kind; -+		if (q->hash_kind !=  TCA_SFQ_HASH_CLASSIC) ++		if (q->hash_kind != TCA_SFQ_HASH_CLASSIC)  +			q->perturb_period = 0;  +	}  +	 @@ -566,7 +629,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e  +{  +	struct esfq_sched_data *q = qdisc_priv(sch);  +	struct tc_esfq_qopt *ctl; -+	esfq_index p = ~0UL/2; ++	esfq_index p = ~0U/2;  +	int i;  +	  +	if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))  | 
