Linux Audio

Check our new training course

Loading...
  1/* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
  2 *
  3 *		This program is free software; you can redistribute it and/or
  4 *		modify it under the terms of the GNU General Public License
  5 *		as published by the Free Software Foundation; either version
  6 *		2 of the License, or (at your option) any later version.
  7 *
  8 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  9 */
 10
 11#include <linux/module.h>
 12#include <linux/types.h>
 13#include <linux/kernel.h>
 14#include <linux/slab.h>
 15#include <linux/string.h>
 16#include <linux/errno.h>
 17#include <linux/if_arp.h>
 18#include <linux/netdevice.h>
 19#include <linux/init.h>
 20#include <linux/skbuff.h>
 21#include <linux/moduleparam.h>
 22#include <net/dst.h>
 23#include <net/neighbour.h>
 24#include <net/pkt_sched.h>
 25
 26/*
 27   How to setup it.
 28   ----------------
 29
 30   After loading this module you will find a new device teqlN
 31   and new qdisc with the same name. To join a slave to the equalizer
 32   you should just set this qdisc on a device f.e.
 33
 34   # tc qdisc add dev eth0 root teql0
 35   # tc qdisc add dev eth1 root teql0
 36
 37   That's all. Full PnP 8)
 38
 39   Applicability.
 40   --------------
 41
 42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
 43      signal and generate EOI events. If you want to equalize virtual devices
 44      like tunnels, use a normal eql device.
 45   2. This device puts no limitations on physical slave characteristics
 46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
 47      Certainly, large difference in link speeds will make the resulting
 48      eqalized link unusable, because of huge packet reordering.
 49      I estimate an upper useful difference as ~10 times.
 50   3. If the slave requires address resolution, only protocols using
 51      neighbour cache (IPv4/IPv6) will work over the equalized link.
 52      Other protocols are still allowed to use the slave device directly,
 53      which will not break load balancing, though native slave
 54      traffic will have the highest priority.  */
 55
 56struct teql_master {
 57	struct Qdisc_ops qops;
 58	struct net_device *dev;
 59	struct Qdisc *slaves;
 60	struct list_head master_list;
 61	unsigned long	tx_bytes;
 62	unsigned long	tx_packets;
 63	unsigned long	tx_errors;
 64	unsigned long	tx_dropped;
 65};
 66
 67struct teql_sched_data {
 68	struct Qdisc *next;
 69	struct teql_master *m;
 70	struct neighbour *ncache;
 71	struct sk_buff_head q;
 72};
 73
 74#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
 75
 76#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
 77
 78/* "teql*" qdisc routines */
 79
 80static int
 81teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 82{
 83	struct net_device *dev = qdisc_dev(sch);
 84	struct teql_sched_data *q = qdisc_priv(sch);
 85
 86	if (q->q.qlen < dev->tx_queue_len) {
 87		__skb_queue_tail(&q->q, skb);
 88		return NET_XMIT_SUCCESS;
 89	}
 90
 91	return qdisc_drop(skb, sch);
 92}
 93
 94static struct sk_buff *
 95teql_dequeue(struct Qdisc *sch)
 96{
 97	struct teql_sched_data *dat = qdisc_priv(sch);
 98	struct netdev_queue *dat_queue;
 99	struct sk_buff *skb;
100
101	skb = __skb_dequeue(&dat->q);
102	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
103	if (skb == NULL) {
104		struct net_device *m = qdisc_dev(dat_queue->qdisc);
105		if (m) {
106			dat->m->slaves = sch;
107			netif_wake_queue(m);
108		}
109	} else {
110		qdisc_bstats_update(sch, skb);
111	}
112	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
113	return skb;
114}
115
116static struct sk_buff *
117teql_peek(struct Qdisc *sch)
118{
119	/* teql is meant to be used as root qdisc */
120	return NULL;
121}
122
123static inline void
124teql_neigh_release(struct neighbour *n)
125{
126	if (n)
127		neigh_release(n);
128}
129
130static void
131teql_reset(struct Qdisc *sch)
132{
133	struct teql_sched_data *dat = qdisc_priv(sch);
134
135	skb_queue_purge(&dat->q);
136	sch->q.qlen = 0;
137	teql_neigh_release(xchg(&dat->ncache, NULL));
138}
139
140static void
141teql_destroy(struct Qdisc *sch)
142{
143	struct Qdisc *q, *prev;
144	struct teql_sched_data *dat = qdisc_priv(sch);
145	struct teql_master *master = dat->m;
146
147	prev = master->slaves;
148	if (prev) {
149		do {
150			q = NEXT_SLAVE(prev);
151			if (q == sch) {
152				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
153				if (q == master->slaves) {
154					master->slaves = NEXT_SLAVE(q);
155					if (q == master->slaves) {
156						struct netdev_queue *txq;
157						spinlock_t *root_lock;
158
159						txq = netdev_get_tx_queue(master->dev, 0);
160						master->slaves = NULL;
161
162						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
163						spin_lock_bh(root_lock);
164						qdisc_reset(txq->qdisc);
165						spin_unlock_bh(root_lock);
166					}
167				}
168				skb_queue_purge(&dat->q);
169				teql_neigh_release(xchg(&dat->ncache, NULL));
170				break;
171			}
172
173		} while ((prev = q) != master->slaves);
174	}
175}
176
177static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
178{
179	struct net_device *dev = qdisc_dev(sch);
180	struct teql_master *m = (struct teql_master *)sch->ops;
181	struct teql_sched_data *q = qdisc_priv(sch);
182
183	if (dev->hard_header_len > m->dev->hard_header_len)
184		return -EINVAL;
185
186	if (m->dev == dev)
187		return -ELOOP;
188
189	q->m = m;
190
191	skb_queue_head_init(&q->q);
192
193	if (m->slaves) {
194		if (m->dev->flags & IFF_UP) {
195			if ((m->dev->flags & IFF_POINTOPOINT &&
196			     !(dev->flags & IFF_POINTOPOINT)) ||
197			    (m->dev->flags & IFF_BROADCAST &&
198			     !(dev->flags & IFF_BROADCAST)) ||
199			    (m->dev->flags & IFF_MULTICAST &&
200			     !(dev->flags & IFF_MULTICAST)) ||
201			    dev->mtu < m->dev->mtu)
202				return -EINVAL;
203		} else {
204			if (!(dev->flags&IFF_POINTOPOINT))
205				m->dev->flags &= ~IFF_POINTOPOINT;
206			if (!(dev->flags&IFF_BROADCAST))
207				m->dev->flags &= ~IFF_BROADCAST;
208			if (!(dev->flags&IFF_MULTICAST))
209				m->dev->flags &= ~IFF_MULTICAST;
210			if (dev->mtu < m->dev->mtu)
211				m->dev->mtu = dev->mtu;
212		}
213		q->next = NEXT_SLAVE(m->slaves);
214		NEXT_SLAVE(m->slaves) = sch;
215	} else {
216		q->next = sch;
217		m->slaves = sch;
218		m->dev->mtu = dev->mtu;
219		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
220	}
221	return 0;
222}
223
224
225static int
226__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
227	       struct net_device *dev, struct netdev_queue *txq,
228	       struct neighbour *mn)
229{
230	struct teql_sched_data *q = qdisc_priv(txq->qdisc);
231	struct neighbour *n = q->ncache;
232
233	if (mn->tbl == NULL)
234		return -EINVAL;
235	if (n && n->tbl == mn->tbl &&
236	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
237		atomic_inc(&n->refcnt);
238	} else {
239		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
240		if (IS_ERR(n))
241			return PTR_ERR(n);
242	}
243	if (neigh_event_send(n, skb_res) == 0) {
244		int err;
245		char haddr[MAX_ADDR_LEN];
246
247		neigh_ha_snapshot(haddr, n, dev);
248		err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
249				      NULL, skb->len);
250
251		if (err < 0) {
252			neigh_release(n);
253			return -EINVAL;
254		}
255		teql_neigh_release(xchg(&q->ncache, n));
256		return 0;
257	}
258	neigh_release(n);
259	return (skb_res == NULL) ? -EAGAIN : 1;
260}
261
262static inline int teql_resolve(struct sk_buff *skb,
263			       struct sk_buff *skb_res,
264			       struct net_device *dev,
265			       struct netdev_queue *txq)
266{
267	struct dst_entry *dst = skb_dst(skb);
268	struct neighbour *mn;
269	int res;
270
271	if (txq->qdisc == &noop_qdisc)
272		return -ENODEV;
273
274	if (!dev->header_ops || !dst)
275		return 0;
276
277	rcu_read_lock();
278	mn = dst_get_neighbour_noref(dst);
279	res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0;
280	rcu_read_unlock();
281
282	return res;
283}
284
285static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
286{
287	struct teql_master *master = netdev_priv(dev);
288	struct Qdisc *start, *q;
289	int busy;
290	int nores;
291	int subq = skb_get_queue_mapping(skb);
292	struct sk_buff *skb_res = NULL;
293
294	start = master->slaves;
295
296restart:
297	nores = 0;
298	busy = 0;
299
300	q = start;
301	if (!q)
302		goto drop;
303
304	do {
305		struct net_device *slave = qdisc_dev(q);
306		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
307		const struct net_device_ops *slave_ops = slave->netdev_ops;
308
309		if (slave_txq->qdisc_sleeping != q)
310			continue;
311		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
312		    !netif_running(slave)) {
313			busy = 1;
314			continue;
315		}
316
317		switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
318		case 0:
319			if (__netif_tx_trylock(slave_txq)) {
320				unsigned int length = qdisc_pkt_len(skb);
321
322				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
323				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
324					txq_trans_update(slave_txq);
325					__netif_tx_unlock(slave_txq);
326					master->slaves = NEXT_SLAVE(q);
327					netif_wake_queue(dev);
328					master->tx_packets++;
329					master->tx_bytes += length;
330					return NETDEV_TX_OK;
331				}
332				__netif_tx_unlock(slave_txq);
333			}
334			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
335				busy = 1;
336			break;
337		case 1:
338			master->slaves = NEXT_SLAVE(q);
339			return NETDEV_TX_OK;
340		default:
341			nores = 1;
342			break;
343		}
344		__skb_pull(skb, skb_network_offset(skb));
345	} while ((q = NEXT_SLAVE(q)) != start);
346
347	if (nores && skb_res == NULL) {
348		skb_res = skb;
349		goto restart;
350	}
351
352	if (busy) {
353		netif_stop_queue(dev);
354		return NETDEV_TX_BUSY;
355	}
356	master->tx_errors++;
357
358drop:
359	master->tx_dropped++;
360	dev_kfree_skb(skb);
361	return NETDEV_TX_OK;
362}
363
364static int teql_master_open(struct net_device *dev)
365{
366	struct Qdisc *q;
367	struct teql_master *m = netdev_priv(dev);
368	int mtu = 0xFFFE;
369	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
370
371	if (m->slaves == NULL)
372		return -EUNATCH;
373
374	flags = FMASK;
375
376	q = m->slaves;
377	do {
378		struct net_device *slave = qdisc_dev(q);
379
380		if (slave == NULL)
381			return -EUNATCH;
382
383		if (slave->mtu < mtu)
384			mtu = slave->mtu;
385		if (slave->hard_header_len > LL_MAX_HEADER)
386			return -EINVAL;
387
388		/* If all the slaves are BROADCAST, master is BROADCAST
389		   If all the slaves are PtP, master is PtP
390		   Otherwise, master is NBMA.
391		 */
392		if (!(slave->flags&IFF_POINTOPOINT))
393			flags &= ~IFF_POINTOPOINT;
394		if (!(slave->flags&IFF_BROADCAST))
395			flags &= ~IFF_BROADCAST;
396		if (!(slave->flags&IFF_MULTICAST))
397			flags &= ~IFF_MULTICAST;
398	} while ((q = NEXT_SLAVE(q)) != m->slaves);
399
400	m->dev->mtu = mtu;
401	m->dev->flags = (m->dev->flags&~FMASK) | flags;
402	netif_start_queue(m->dev);
403	return 0;
404}
405
406static int teql_master_close(struct net_device *dev)
407{
408	netif_stop_queue(dev);
409	return 0;
410}
411
412static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
413						     struct rtnl_link_stats64 *stats)
414{
415	struct teql_master *m = netdev_priv(dev);
416
417	stats->tx_packets	= m->tx_packets;
418	stats->tx_bytes		= m->tx_bytes;
419	stats->tx_errors	= m->tx_errors;
420	stats->tx_dropped	= m->tx_dropped;
421	return stats;
422}
423
424static int teql_master_mtu(struct net_device *dev, int new_mtu)
425{
426	struct teql_master *m = netdev_priv(dev);
427	struct Qdisc *q;
428
429	if (new_mtu < 68)
430		return -EINVAL;
431
432	q = m->slaves;
433	if (q) {
434		do {
435			if (new_mtu > qdisc_dev(q)->mtu)
436				return -EINVAL;
437		} while ((q = NEXT_SLAVE(q)) != m->slaves);
438	}
439
440	dev->mtu = new_mtu;
441	return 0;
442}
443
444static const struct net_device_ops teql_netdev_ops = {
445	.ndo_open	= teql_master_open,
446	.ndo_stop	= teql_master_close,
447	.ndo_start_xmit	= teql_master_xmit,
448	.ndo_get_stats64 = teql_master_stats64,
449	.ndo_change_mtu	= teql_master_mtu,
450};
451
452static __init void teql_master_setup(struct net_device *dev)
453{
454	struct teql_master *master = netdev_priv(dev);
455	struct Qdisc_ops *ops = &master->qops;
456
457	master->dev	= dev;
458	ops->priv_size  = sizeof(struct teql_sched_data);
459
460	ops->enqueue	=	teql_enqueue;
461	ops->dequeue	=	teql_dequeue;
462	ops->peek	=	teql_peek;
463	ops->init	=	teql_qdisc_init;
464	ops->reset	=	teql_reset;
465	ops->destroy	=	teql_destroy;
466	ops->owner	=	THIS_MODULE;
467
468	dev->netdev_ops =       &teql_netdev_ops;
469	dev->type		= ARPHRD_VOID;
470	dev->mtu		= 1500;
471	dev->tx_queue_len	= 100;
472	dev->flags		= IFF_NOARP;
473	dev->hard_header_len	= LL_MAX_HEADER;
474	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
475}
476
477static LIST_HEAD(master_dev_list);
478static int max_equalizers = 1;
479module_param(max_equalizers, int, 0);
480MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
481
482static int __init teql_init(void)
483{
484	int i;
485	int err = -ENODEV;
486
487	for (i = 0; i < max_equalizers; i++) {
488		struct net_device *dev;
489		struct teql_master *master;
490
491		dev = alloc_netdev(sizeof(struct teql_master),
492				  "teql%d", teql_master_setup);
493		if (!dev) {
494			err = -ENOMEM;
495			break;
496		}
497
498		if ((err = register_netdev(dev))) {
499			free_netdev(dev);
500			break;
501		}
502
503		master = netdev_priv(dev);
504
505		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
506		err = register_qdisc(&master->qops);
507
508		if (err) {
509			unregister_netdev(dev);
510			free_netdev(dev);
511			break;
512		}
513
514		list_add_tail(&master->master_list, &master_dev_list);
515	}
516	return i ? 0 : err;
517}
518
519static void __exit teql_exit(void)
520{
521	struct teql_master *master, *nxt;
522
523	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
524
525		list_del(&master->master_list);
526
527		unregister_qdisc(&master->qops);
528		unregister_netdev(master->dev);
529		free_netdev(master->dev);
530	}
531}
532
533module_init(teql_init);
534module_exit(teql_exit);
535
536MODULE_LICENSE("GPL");