Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.8.
   1/* File veth.c created by Kyle A. Lucke on Mon Aug  7 2000. */
   2/*
   3 * IBM eServer iSeries Virtual Ethernet Device Driver
   4 * Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp.
   5 * Substantially cleaned up by:
   6 * Copyright (C) 2003 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
   7 * Copyright (C) 2004-2005 Michael Ellerman, IBM Corporation.
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU General Public License as
  11 * published by the Free Software Foundation; either version 2 of the
  12 * License, or (at your option) any later version.
  13 *
  14 * This program is distributed in the hope that it will be useful, but
  15 * WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * General Public License for more details.
  18 *
  19 * You should have received a copy of the GNU General Public License
  20 * along with this program; if not, write to the Free Software
  21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
  22 * USA
  23 *
  24 *
  25 * This module implements the virtual ethernet device for iSeries LPAR
  26 * Linux.  It uses hypervisor message passing to implement an
  27 * ethernet-like network device communicating between partitions on
  28 * the iSeries.
  29 *
  30 * The iSeries LPAR hypervisor currently allows for up to 16 different
  31 * virtual ethernets.  These are all dynamically configurable on
  32 * OS/400 partitions, but dynamic configuration is not supported under
  33 * Linux yet.  An ethXX network device will be created for each
  34 * virtual ethernet this partition is connected to.
  35 *
  36 * - This driver is responsible for routing packets to and from other
  37 *   partitions.  The MAC addresses used by the virtual ethernets
  38 *   contains meaning and must not be modified.
  39 *
  40 * - Having 2 virtual ethernets to the same remote partition DOES NOT
  41 *   double the available bandwidth.  The 2 devices will share the
  42 *   available hypervisor bandwidth.
  43 *
  44 * - If you send a packet to your own mac address, it will just be
  45 *   dropped, you won't get it on the receive side.
  46 *
  47 * - Multicast is implemented by sending the frame frame to every
  48 *   other partition.  It is the responsibility of the receiving
  49 *   partition to filter the addresses desired.
  50 *
  51 * Tunable parameters:
  52 *
  53 * VETH_NUMBUFFERS: This compile time option defaults to 120.  It
  54 * controls how much memory Linux will allocate per remote partition
  55 * it is communicating with.  It can be thought of as the maximum
  56 * number of packets outstanding to a remote partition at a time.
  57 */
  58
  59#include <linux/module.h>
  60#include <linux/types.h>
  61#include <linux/errno.h>
  62#include <linux/ioport.h>
  63#include <linux/kernel.h>
  64#include <linux/netdevice.h>
  65#include <linux/etherdevice.h>
  66#include <linux/skbuff.h>
  67#include <linux/init.h>
  68#include <linux/delay.h>
  69#include <linux/mm.h>
  70#include <linux/ethtool.h>
  71#include <linux/if_ether.h>
  72#include <linux/slab.h>
  73
  74#include <asm/abs_addr.h>
  75#include <asm/iseries/mf.h>
  76#include <asm/uaccess.h>
  77#include <asm/firmware.h>
  78#include <asm/iseries/hv_lp_config.h>
  79#include <asm/iseries/hv_types.h>
  80#include <asm/iseries/hv_lp_event.h>
  81#include <asm/iommu.h>
  82#include <asm/vio.h>
  83
  84#undef DEBUG
  85
  86MODULE_AUTHOR("Kyle Lucke <klucke@us.ibm.com>");
  87MODULE_DESCRIPTION("iSeries Virtual ethernet driver");
  88MODULE_LICENSE("GPL");
  89
  90#define VETH_EVENT_CAP	(0)
  91#define VETH_EVENT_FRAMES	(1)
  92#define VETH_EVENT_MONITOR	(2)
  93#define VETH_EVENT_FRAMES_ACK	(3)
  94
  95#define VETH_MAX_ACKS_PER_MSG	(20)
  96#define VETH_MAX_FRAMES_PER_MSG	(6)
  97
  98struct veth_frames_data {
  99	u32 addr[VETH_MAX_FRAMES_PER_MSG];
 100	u16 len[VETH_MAX_FRAMES_PER_MSG];
 101	u32 eofmask;
 102};
 103#define VETH_EOF_SHIFT		(32-VETH_MAX_FRAMES_PER_MSG)
 104
 105struct veth_frames_ack_data {
 106	u16 token[VETH_MAX_ACKS_PER_MSG];
 107};
 108
 109struct veth_cap_data {
 110	u8 caps_version;
 111	u8 rsvd1;
 112	u16 num_buffers;
 113	u16 ack_threshold;
 114	u16 rsvd2;
 115	u32 ack_timeout;
 116	u32 rsvd3;
 117	u64 rsvd4[3];
 118};
 119
 120struct veth_lpevent {
 121	struct HvLpEvent base_event;
 122	union {
 123		struct veth_cap_data caps_data;
 124		struct veth_frames_data frames_data;
 125		struct veth_frames_ack_data frames_ack_data;
 126	} u;
 127
 128};
 129
 130#define DRV_NAME	"iseries_veth"
 131#define DRV_VERSION	"2.0"
 132
 133#define VETH_NUMBUFFERS		(120)
 134#define VETH_ACKTIMEOUT 	(1000000) /* microseconds */
 135#define VETH_MAX_MCAST		(12)
 136
 137#define VETH_MAX_MTU		(9000)
 138
 139#if VETH_NUMBUFFERS < 10
 140#define ACK_THRESHOLD 		(1)
 141#elif VETH_NUMBUFFERS < 20
 142#define ACK_THRESHOLD 		(4)
 143#elif VETH_NUMBUFFERS < 40
 144#define ACK_THRESHOLD 		(10)
 145#else
 146#define ACK_THRESHOLD 		(20)
 147#endif
 148
 149#define	VETH_STATE_SHUTDOWN	(0x0001)
 150#define VETH_STATE_OPEN		(0x0002)
 151#define VETH_STATE_RESET	(0x0004)
 152#define VETH_STATE_SENTMON	(0x0008)
 153#define VETH_STATE_SENTCAPS	(0x0010)
 154#define VETH_STATE_GOTCAPACK	(0x0020)
 155#define VETH_STATE_GOTCAPS	(0x0040)
 156#define VETH_STATE_SENTCAPACK	(0x0080)
 157#define VETH_STATE_READY	(0x0100)
 158
 159struct veth_msg {
 160	struct veth_msg *next;
 161	struct veth_frames_data data;
 162	int token;
 163	int in_use;
 164	struct sk_buff *skb;
 165	struct device *dev;
 166};
 167
 168struct veth_lpar_connection {
 169	HvLpIndex remote_lp;
 170	struct delayed_work statemachine_wq;
 171	struct veth_msg *msgs;
 172	int num_events;
 173	struct veth_cap_data local_caps;
 174
 175	struct kobject kobject;
 176	struct timer_list ack_timer;
 177
 178	struct timer_list reset_timer;
 179	unsigned int reset_timeout;
 180	unsigned long last_contact;
 181	int outstanding_tx;
 182
 183	spinlock_t lock;
 184	unsigned long state;
 185	HvLpInstanceId src_inst;
 186	HvLpInstanceId dst_inst;
 187	struct veth_lpevent cap_event, cap_ack_event;
 188	u16 pending_acks[VETH_MAX_ACKS_PER_MSG];
 189	u32 num_pending_acks;
 190
 191	int num_ack_events;
 192	struct veth_cap_data remote_caps;
 193	u32 ack_timeout;
 194
 195	struct veth_msg *msg_stack_head;
 196};
 197
 198struct veth_port {
 199	struct device *dev;
 200	u64 mac_addr;
 201	HvLpIndexMap lpar_map;
 202
 203	/* queue_lock protects the stopped_map and dev's queue. */
 204	spinlock_t queue_lock;
 205	HvLpIndexMap stopped_map;
 206
 207	/* mcast_gate protects promiscuous, num_mcast & mcast_addr. */
 208	rwlock_t mcast_gate;
 209	int promiscuous;
 210	int num_mcast;
 211	u64 mcast_addr[VETH_MAX_MCAST];
 212
 213	struct kobject kobject;
 214};
 215
 216static HvLpIndex this_lp;
 217static struct veth_lpar_connection *veth_cnx[HVMAXARCHITECTEDLPS]; /* = 0 */
 218static struct net_device *veth_dev[HVMAXARCHITECTEDVIRTUALLANS]; /* = 0 */
 219
 220static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev);
 221static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *);
 222static void veth_wake_queues(struct veth_lpar_connection *cnx);
 223static void veth_stop_queues(struct veth_lpar_connection *cnx);
 224static void veth_receive(struct veth_lpar_connection *, struct veth_lpevent *);
 225static void veth_release_connection(struct kobject *kobject);
 226static void veth_timed_ack(unsigned long ptr);
 227static void veth_timed_reset(unsigned long ptr);
 228
 229/*
 230 * Utility functions
 231 */
 232
 233#define veth_info(fmt, args...) \
 234	printk(KERN_INFO DRV_NAME ": " fmt, ## args)
 235
 236#define veth_error(fmt, args...) \
 237	printk(KERN_ERR DRV_NAME ": Error: " fmt, ## args)
 238
 239#ifdef DEBUG
 240#define veth_debug(fmt, args...) \
 241	printk(KERN_DEBUG DRV_NAME ": " fmt, ## args)
 242#else
 243#define veth_debug(fmt, args...) do {} while (0)
 244#endif
 245
 246/* You must hold the connection's lock when you call this function. */
 247static inline void veth_stack_push(struct veth_lpar_connection *cnx,
 248				   struct veth_msg *msg)
 249{
 250	msg->next = cnx->msg_stack_head;
 251	cnx->msg_stack_head = msg;
 252}
 253
 254/* You must hold the connection's lock when you call this function. */
 255static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx)
 256{
 257	struct veth_msg *msg;
 258
 259	msg = cnx->msg_stack_head;
 260	if (msg)
 261		cnx->msg_stack_head = cnx->msg_stack_head->next;
 262
 263	return msg;
 264}
 265
 266/* You must hold the connection's lock when you call this function. */
 267static inline int veth_stack_is_empty(struct veth_lpar_connection *cnx)
 268{
 269	return cnx->msg_stack_head == NULL;
 270}
 271
 272static inline HvLpEvent_Rc
 273veth_signalevent(struct veth_lpar_connection *cnx, u16 subtype,
 274		 HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype,
 275		 u64 token,
 276		 u64 data1, u64 data2, u64 data3, u64 data4, u64 data5)
 277{
 278	return HvCallEvent_signalLpEventFast(cnx->remote_lp,
 279					     HvLpEvent_Type_VirtualLan,
 280					     subtype, ackind, acktype,
 281					     cnx->src_inst,
 282					     cnx->dst_inst,
 283					     token, data1, data2, data3,
 284					     data4, data5);
 285}
 286
 287static inline HvLpEvent_Rc veth_signaldata(struct veth_lpar_connection *cnx,
 288					   u16 subtype, u64 token, void *data)
 289{
 290	u64 *p = (u64 *) data;
 291
 292	return veth_signalevent(cnx, subtype, HvLpEvent_AckInd_NoAck,
 293				HvLpEvent_AckType_ImmediateAck,
 294				token, p[0], p[1], p[2], p[3], p[4]);
 295}
 296
 297struct veth_allocation {
 298	struct completion c;
 299	int num;
 300};
 301
 302static void veth_complete_allocation(void *parm, int number)
 303{
 304	struct veth_allocation *vc = (struct veth_allocation *)parm;
 305
 306	vc->num = number;
 307	complete(&vc->c);
 308}
 309
 310static int veth_allocate_events(HvLpIndex rlp, int number)
 311{
 312	struct veth_allocation vc =
 313		{ COMPLETION_INITIALIZER_ONSTACK(vc.c), 0 };
 314
 315	mf_allocate_lp_events(rlp, HvLpEvent_Type_VirtualLan,
 316			    sizeof(struct veth_lpevent), number,
 317			    &veth_complete_allocation, &vc);
 318	wait_for_completion(&vc.c);
 319
 320	return vc.num;
 321}
 322
 323/*
 324 * sysfs support
 325 */
 326
 327struct veth_cnx_attribute {
 328	struct attribute attr;
 329	ssize_t (*show)(struct veth_lpar_connection *, char *buf);
 330	ssize_t (*store)(struct veth_lpar_connection *, const char *buf);
 331};
 332
 333static ssize_t veth_cnx_attribute_show(struct kobject *kobj,
 334		struct attribute *attr, char *buf)
 335{
 336	struct veth_cnx_attribute *cnx_attr;
 337	struct veth_lpar_connection *cnx;
 338
 339	cnx_attr = container_of(attr, struct veth_cnx_attribute, attr);
 340	cnx = container_of(kobj, struct veth_lpar_connection, kobject);
 341
 342	if (!cnx_attr->show)
 343		return -EIO;
 344
 345	return cnx_attr->show(cnx, buf);
 346}
 347
 348#define CUSTOM_CNX_ATTR(_name, _format, _expression)			\
 349static ssize_t _name##_show(struct veth_lpar_connection *cnx, char *buf)\
 350{									\
 351	return sprintf(buf, _format, _expression);			\
 352}									\
 353struct veth_cnx_attribute veth_cnx_attr_##_name = __ATTR_RO(_name)
 354
 355#define SIMPLE_CNX_ATTR(_name)	\
 356	CUSTOM_CNX_ATTR(_name, "%lu\n", (unsigned long)cnx->_name)
 357
 358SIMPLE_CNX_ATTR(outstanding_tx);
 359SIMPLE_CNX_ATTR(remote_lp);
 360SIMPLE_CNX_ATTR(num_events);
 361SIMPLE_CNX_ATTR(src_inst);
 362SIMPLE_CNX_ATTR(dst_inst);
 363SIMPLE_CNX_ATTR(num_pending_acks);
 364SIMPLE_CNX_ATTR(num_ack_events);
 365CUSTOM_CNX_ATTR(ack_timeout, "%d\n", jiffies_to_msecs(cnx->ack_timeout));
 366CUSTOM_CNX_ATTR(reset_timeout, "%d\n", jiffies_to_msecs(cnx->reset_timeout));
 367CUSTOM_CNX_ATTR(state, "0x%.4lX\n", cnx->state);
 368CUSTOM_CNX_ATTR(last_contact, "%d\n", cnx->last_contact ?
 369		jiffies_to_msecs(jiffies - cnx->last_contact) : 0);
 370
 371#define GET_CNX_ATTR(_name)	(&veth_cnx_attr_##_name.attr)
 372
 373static struct attribute *veth_cnx_default_attrs[] = {
 374	GET_CNX_ATTR(outstanding_tx),
 375	GET_CNX_ATTR(remote_lp),
 376	GET_CNX_ATTR(num_events),
 377	GET_CNX_ATTR(reset_timeout),
 378	GET_CNX_ATTR(last_contact),
 379	GET_CNX_ATTR(state),
 380	GET_CNX_ATTR(src_inst),
 381	GET_CNX_ATTR(dst_inst),
 382	GET_CNX_ATTR(num_pending_acks),
 383	GET_CNX_ATTR(num_ack_events),
 384	GET_CNX_ATTR(ack_timeout),
 385	NULL
 386};
 387
 388static const struct sysfs_ops veth_cnx_sysfs_ops = {
 389		.show = veth_cnx_attribute_show
 390};
 391
 392static struct kobj_type veth_lpar_connection_ktype = {
 393	.release	= veth_release_connection,
 394	.sysfs_ops	= &veth_cnx_sysfs_ops,
 395	.default_attrs	= veth_cnx_default_attrs
 396};
 397
 398struct veth_port_attribute {
 399	struct attribute attr;
 400	ssize_t (*show)(struct veth_port *, char *buf);
 401	ssize_t (*store)(struct veth_port *, const char *buf);
 402};
 403
 404static ssize_t veth_port_attribute_show(struct kobject *kobj,
 405		struct attribute *attr, char *buf)
 406{
 407	struct veth_port_attribute *port_attr;
 408	struct veth_port *port;
 409
 410	port_attr = container_of(attr, struct veth_port_attribute, attr);
 411	port = container_of(kobj, struct veth_port, kobject);
 412
 413	if (!port_attr->show)
 414		return -EIO;
 415
 416	return port_attr->show(port, buf);
 417}
 418
 419#define CUSTOM_PORT_ATTR(_name, _format, _expression)			\
 420static ssize_t _name##_show(struct veth_port *port, char *buf)		\
 421{									\
 422	return sprintf(buf, _format, _expression);			\
 423}									\
 424struct veth_port_attribute veth_port_attr_##_name = __ATTR_RO(_name)
 425
 426#define SIMPLE_PORT_ATTR(_name)	\
 427	CUSTOM_PORT_ATTR(_name, "%lu\n", (unsigned long)port->_name)
 428
 429SIMPLE_PORT_ATTR(promiscuous);
 430SIMPLE_PORT_ATTR(num_mcast);
 431CUSTOM_PORT_ATTR(lpar_map, "0x%X\n", port->lpar_map);
 432CUSTOM_PORT_ATTR(stopped_map, "0x%X\n", port->stopped_map);
 433CUSTOM_PORT_ATTR(mac_addr, "0x%llX\n", port->mac_addr);
 434
 435#define GET_PORT_ATTR(_name)	(&veth_port_attr_##_name.attr)
 436static struct attribute *veth_port_default_attrs[] = {
 437	GET_PORT_ATTR(mac_addr),
 438	GET_PORT_ATTR(lpar_map),
 439	GET_PORT_ATTR(stopped_map),
 440	GET_PORT_ATTR(promiscuous),
 441	GET_PORT_ATTR(num_mcast),
 442	NULL
 443};
 444
 445static const struct sysfs_ops veth_port_sysfs_ops = {
 446	.show = veth_port_attribute_show
 447};
 448
 449static struct kobj_type veth_port_ktype = {
 450	.sysfs_ops	= &veth_port_sysfs_ops,
 451	.default_attrs	= veth_port_default_attrs
 452};
 453
 454/*
 455 * LPAR connection code
 456 */
 457
 458static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx)
 459{
 460	schedule_delayed_work(&cnx->statemachine_wq, 0);
 461}
 462
 463static void veth_take_cap(struct veth_lpar_connection *cnx,
 464			  struct veth_lpevent *event)
 465{
 466	unsigned long flags;
 467
 468	spin_lock_irqsave(&cnx->lock, flags);
 469	/* Receiving caps may mean the other end has just come up, so
 470	 * we need to reload the instance ID of the far end */
 471	cnx->dst_inst =
 472		HvCallEvent_getTargetLpInstanceId(cnx->remote_lp,
 473						  HvLpEvent_Type_VirtualLan);
 474
 475	if (cnx->state & VETH_STATE_GOTCAPS) {
 476		veth_error("Received a second capabilities from LPAR %d.\n",
 477			   cnx->remote_lp);
 478		event->base_event.xRc = HvLpEvent_Rc_BufferNotAvailable;
 479		HvCallEvent_ackLpEvent((struct HvLpEvent *) event);
 480	} else {
 481		memcpy(&cnx->cap_event, event, sizeof(cnx->cap_event));
 482		cnx->state |= VETH_STATE_GOTCAPS;
 483		veth_kick_statemachine(cnx);
 484	}
 485	spin_unlock_irqrestore(&cnx->lock, flags);
 486}
 487
 488static void veth_take_cap_ack(struct veth_lpar_connection *cnx,
 489			      struct veth_lpevent *event)
 490{
 491	unsigned long flags;
 492
 493	spin_lock_irqsave(&cnx->lock, flags);
 494	if (cnx->state & VETH_STATE_GOTCAPACK) {
 495		veth_error("Received a second capabilities ack from LPAR %d.\n",
 496			   cnx->remote_lp);
 497	} else {
 498		memcpy(&cnx->cap_ack_event, event,
 499		       sizeof(cnx->cap_ack_event));
 500		cnx->state |= VETH_STATE_GOTCAPACK;
 501		veth_kick_statemachine(cnx);
 502	}
 503	spin_unlock_irqrestore(&cnx->lock, flags);
 504}
 505
 506static void veth_take_monitor_ack(struct veth_lpar_connection *cnx,
 507				  struct veth_lpevent *event)
 508{
 509	unsigned long flags;
 510
 511	spin_lock_irqsave(&cnx->lock, flags);
 512	veth_debug("cnx %d: lost connection.\n", cnx->remote_lp);
 513
 514	/* Avoid kicking the statemachine once we're shutdown.
 515	 * It's unnecessary and it could break veth_stop_connection(). */
 516
 517	if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
 518		cnx->state |= VETH_STATE_RESET;
 519		veth_kick_statemachine(cnx);
 520	}
 521	spin_unlock_irqrestore(&cnx->lock, flags);
 522}
 523
 524static void veth_handle_ack(struct veth_lpevent *event)
 525{
 526	HvLpIndex rlp = event->base_event.xTargetLp;
 527	struct veth_lpar_connection *cnx = veth_cnx[rlp];
 528
 529	BUG_ON(! cnx);
 530
 531	switch (event->base_event.xSubtype) {
 532	case VETH_EVENT_CAP:
 533		veth_take_cap_ack(cnx, event);
 534		break;
 535	case VETH_EVENT_MONITOR:
 536		veth_take_monitor_ack(cnx, event);
 537		break;
 538	default:
 539		veth_error("Unknown ack type %d from LPAR %d.\n",
 540				event->base_event.xSubtype, rlp);
 541	}
 542}
 543
 544static void veth_handle_int(struct veth_lpevent *event)
 545{
 546	HvLpIndex rlp = event->base_event.xSourceLp;
 547	struct veth_lpar_connection *cnx = veth_cnx[rlp];
 548	unsigned long flags;
 549	int i, acked = 0;
 550
 551	BUG_ON(! cnx);
 552
 553	switch (event->base_event.xSubtype) {
 554	case VETH_EVENT_CAP:
 555		veth_take_cap(cnx, event);
 556		break;
 557	case VETH_EVENT_MONITOR:
 558		/* do nothing... this'll hang out here til we're dead,
 559		 * and the hypervisor will return it for us. */
 560		break;
 561	case VETH_EVENT_FRAMES_ACK:
 562		spin_lock_irqsave(&cnx->lock, flags);
 563
 564		for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) {
 565			u16 msgnum = event->u.frames_ack_data.token[i];
 566
 567			if (msgnum < VETH_NUMBUFFERS) {
 568				veth_recycle_msg(cnx, cnx->msgs + msgnum);
 569				cnx->outstanding_tx--;
 570				acked++;
 571			}
 572		}
 573
 574		if (acked > 0) {
 575			cnx->last_contact = jiffies;
 576			veth_wake_queues(cnx);
 577		}
 578
 579		spin_unlock_irqrestore(&cnx->lock, flags);
 580		break;
 581	case VETH_EVENT_FRAMES:
 582		veth_receive(cnx, event);
 583		break;
 584	default:
 585		veth_error("Unknown interrupt type %d from LPAR %d.\n",
 586				event->base_event.xSubtype, rlp);
 587	}
 588}
 589
 590static void veth_handle_event(struct HvLpEvent *event)
 591{
 592	struct veth_lpevent *veth_event = (struct veth_lpevent *)event;
 593
 594	if (hvlpevent_is_ack(event))
 595		veth_handle_ack(veth_event);
 596	else
 597		veth_handle_int(veth_event);
 598}
 599
 600static int veth_process_caps(struct veth_lpar_connection *cnx)
 601{
 602	struct veth_cap_data *remote_caps = &cnx->remote_caps;
 603	int num_acks_needed;
 604
 605	/* Convert timer to jiffies */
 606	cnx->ack_timeout = remote_caps->ack_timeout * HZ / 1000000;
 607
 608	if ( (remote_caps->num_buffers == 0) ||
 609	     (remote_caps->ack_threshold > VETH_MAX_ACKS_PER_MSG) ||
 610	     (remote_caps->ack_threshold == 0) ||
 611	     (cnx->ack_timeout == 0) ) {
 612		veth_error("Received incompatible capabilities from LPAR %d.\n",
 613				cnx->remote_lp);
 614		return HvLpEvent_Rc_InvalidSubtypeData;
 615	}
 616
 617	num_acks_needed = (remote_caps->num_buffers
 618			   / remote_caps->ack_threshold) + 1;
 619
 620	/* FIXME: locking on num_ack_events? */
 621	if (cnx->num_ack_events < num_acks_needed) {
 622		int num;
 623
 624		num = veth_allocate_events(cnx->remote_lp,
 625					   num_acks_needed-cnx->num_ack_events);
 626		if (num > 0)
 627			cnx->num_ack_events += num;
 628
 629		if (cnx->num_ack_events < num_acks_needed) {
 630			veth_error("Couldn't allocate enough ack events "
 631					"for LPAR %d.\n", cnx->remote_lp);
 632
 633			return HvLpEvent_Rc_BufferNotAvailable;
 634		}
 635	}
 636
 637
 638	return HvLpEvent_Rc_Good;
 639}
 640
 641/* FIXME: The gotos here are a bit dubious */
 642static void veth_statemachine(struct work_struct *work)
 643{
 644	struct veth_lpar_connection *cnx =
 645		container_of(work, struct veth_lpar_connection,
 646			     statemachine_wq.work);
 647	int rlp = cnx->remote_lp;
 648	int rc;
 649
 650	spin_lock_irq(&cnx->lock);
 651
 652 restart:
 653	if (cnx->state & VETH_STATE_RESET) {
 654		if (cnx->state & VETH_STATE_OPEN)
 655			HvCallEvent_closeLpEventPath(cnx->remote_lp,
 656						     HvLpEvent_Type_VirtualLan);
 657
 658		/*
 659		 * Reset ack data. This prevents the ack_timer actually
 660		 * doing anything, even if it runs one more time when
 661		 * we drop the lock below.
 662		 */
 663		memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
 664		cnx->num_pending_acks = 0;
 665
 666		cnx->state &= ~(VETH_STATE_RESET | VETH_STATE_SENTMON
 667				| VETH_STATE_OPEN | VETH_STATE_SENTCAPS
 668				| VETH_STATE_GOTCAPACK | VETH_STATE_GOTCAPS
 669				| VETH_STATE_SENTCAPACK | VETH_STATE_READY);
 670
 671		/* Clean up any leftover messages */
 672		if (cnx->msgs) {
 673			int i;
 674			for (i = 0; i < VETH_NUMBUFFERS; ++i)
 675				veth_recycle_msg(cnx, cnx->msgs + i);
 676		}
 677
 678		cnx->outstanding_tx = 0;
 679		veth_wake_queues(cnx);
 680
 681		/* Drop the lock so we can do stuff that might sleep or
 682		 * take other locks. */
 683		spin_unlock_irq(&cnx->lock);
 684
 685		del_timer_sync(&cnx->ack_timer);
 686		del_timer_sync(&cnx->reset_timer);
 687
 688		spin_lock_irq(&cnx->lock);
 689
 690		if (cnx->state & VETH_STATE_RESET)
 691			goto restart;
 692
 693		/* Hack, wait for the other end to reset itself. */
 694		if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
 695			schedule_delayed_work(&cnx->statemachine_wq, 5 * HZ);
 696			goto out;
 697		}
 698	}
 699
 700	if (cnx->state & VETH_STATE_SHUTDOWN)
 701		/* It's all over, do nothing */
 702		goto out;
 703
 704	if ( !(cnx->state & VETH_STATE_OPEN) ) {
 705		if (! cnx->msgs || (cnx->num_events < (2 + VETH_NUMBUFFERS)) )
 706			goto cant_cope;
 707
 708		HvCallEvent_openLpEventPath(rlp, HvLpEvent_Type_VirtualLan);
 709		cnx->src_inst =
 710			HvCallEvent_getSourceLpInstanceId(rlp,
 711							  HvLpEvent_Type_VirtualLan);
 712		cnx->dst_inst =
 713			HvCallEvent_getTargetLpInstanceId(rlp,
 714							  HvLpEvent_Type_VirtualLan);
 715		cnx->state |= VETH_STATE_OPEN;
 716	}
 717
 718	if ( (cnx->state & VETH_STATE_OPEN) &&
 719	     !(cnx->state & VETH_STATE_SENTMON) ) {
 720		rc = veth_signalevent(cnx, VETH_EVENT_MONITOR,
 721				      HvLpEvent_AckInd_DoAck,
 722				      HvLpEvent_AckType_DeferredAck,
 723				      0, 0, 0, 0, 0, 0);
 724
 725		if (rc == HvLpEvent_Rc_Good) {
 726			cnx->state |= VETH_STATE_SENTMON;
 727		} else {
 728			if ( (rc != HvLpEvent_Rc_PartitionDead) &&
 729			     (rc != HvLpEvent_Rc_PathClosed) )
 730				veth_error("Error sending monitor to LPAR %d, "
 731						"rc = %d\n", rlp, rc);
 732
 733			/* Oh well, hope we get a cap from the other
 734			 * end and do better when that kicks us */
 735			goto out;
 736		}
 737	}
 738
 739	if ( (cnx->state & VETH_STATE_OPEN) &&
 740	     !(cnx->state & VETH_STATE_SENTCAPS)) {
 741		u64 *rawcap = (u64 *)&cnx->local_caps;
 742
 743		rc = veth_signalevent(cnx, VETH_EVENT_CAP,
 744				      HvLpEvent_AckInd_DoAck,
 745				      HvLpEvent_AckType_ImmediateAck,
 746				      0, rawcap[0], rawcap[1], rawcap[2],
 747				      rawcap[3], rawcap[4]);
 748
 749		if (rc == HvLpEvent_Rc_Good) {
 750			cnx->state |= VETH_STATE_SENTCAPS;
 751		} else {
 752			if ( (rc != HvLpEvent_Rc_PartitionDead) &&
 753			     (rc != HvLpEvent_Rc_PathClosed) )
 754				veth_error("Error sending caps to LPAR %d, "
 755						"rc = %d\n", rlp, rc);
 756
 757			/* Oh well, hope we get a cap from the other
 758			 * end and do better when that kicks us */
 759			goto out;
 760		}
 761	}
 762
 763	if ((cnx->state & VETH_STATE_GOTCAPS) &&
 764	    !(cnx->state & VETH_STATE_SENTCAPACK)) {
 765		struct veth_cap_data *remote_caps = &cnx->remote_caps;
 766
 767		memcpy(remote_caps, &cnx->cap_event.u.caps_data,
 768		       sizeof(*remote_caps));
 769
 770		spin_unlock_irq(&cnx->lock);
 771		rc = veth_process_caps(cnx);
 772		spin_lock_irq(&cnx->lock);
 773
 774		/* We dropped the lock, so recheck for anything which
 775		 * might mess us up */
 776		if (cnx->state & (VETH_STATE_RESET|VETH_STATE_SHUTDOWN))
 777			goto restart;
 778
 779		cnx->cap_event.base_event.xRc = rc;
 780		HvCallEvent_ackLpEvent((struct HvLpEvent *)&cnx->cap_event);
 781		if (rc == HvLpEvent_Rc_Good)
 782			cnx->state |= VETH_STATE_SENTCAPACK;
 783		else
 784			goto cant_cope;
 785	}
 786
 787	if ((cnx->state & VETH_STATE_GOTCAPACK) &&
 788	    (cnx->state & VETH_STATE_GOTCAPS) &&
 789	    !(cnx->state & VETH_STATE_READY)) {
 790		if (cnx->cap_ack_event.base_event.xRc == HvLpEvent_Rc_Good) {
 791			/* Start the ACK timer */
 792			cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
 793			add_timer(&cnx->ack_timer);
 794			cnx->state |= VETH_STATE_READY;
 795		} else {
 796			veth_error("Caps rejected by LPAR %d, rc = %d\n",
 797					rlp, cnx->cap_ack_event.base_event.xRc);
 798			goto cant_cope;
 799		}
 800	}
 801
 802 out:
 803	spin_unlock_irq(&cnx->lock);
 804	return;
 805
 806 cant_cope:
 807	/* FIXME: we get here if something happens we really can't
 808	 * cope with.  The link will never work once we get here, and
 809	 * all we can do is not lock the rest of the system up */
 810	veth_error("Unrecoverable error on connection to LPAR %d, shutting down"
 811			" (state = 0x%04lx)\n", rlp, cnx->state);
 812	cnx->state |= VETH_STATE_SHUTDOWN;
 813	spin_unlock_irq(&cnx->lock);
 814}
 815
 816static int veth_init_connection(u8 rlp)
 817{
 818	struct veth_lpar_connection *cnx;
 819	struct veth_msg *msgs;
 820	int i;
 821
 822	if ( (rlp == this_lp) ||
 823	     ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) )
 824		return 0;
 825
 826	cnx = kzalloc(sizeof(*cnx), GFP_KERNEL);
 827	if (! cnx)
 828		return -ENOMEM;
 829
 830	cnx->remote_lp = rlp;
 831	spin_lock_init(&cnx->lock);
 832	INIT_DELAYED_WORK(&cnx->statemachine_wq, veth_statemachine);
 833
 834	init_timer(&cnx->ack_timer);
 835	cnx->ack_timer.function = veth_timed_ack;
 836	cnx->ack_timer.data = (unsigned long) cnx;
 837
 838	init_timer(&cnx->reset_timer);
 839	cnx->reset_timer.function = veth_timed_reset;
 840	cnx->reset_timer.data = (unsigned long) cnx;
 841	cnx->reset_timeout = 5 * HZ * (VETH_ACKTIMEOUT / 1000000);
 842
 843	memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
 844
 845	veth_cnx[rlp] = cnx;
 846
 847	/* This gets us 1 reference, which is held on behalf of the driver
 848	 * infrastructure. It's released at module unload. */
 849	kobject_init(&cnx->kobject, &veth_lpar_connection_ktype);
 850
 851	msgs = kcalloc(VETH_NUMBUFFERS, sizeof(struct veth_msg), GFP_KERNEL);
 852	if (! msgs) {
 853		veth_error("Can't allocate buffers for LPAR %d.\n", rlp);
 854		return -ENOMEM;
 855	}
 856
 857	cnx->msgs = msgs;
 858
 859	for (i = 0; i < VETH_NUMBUFFERS; i++) {
 860		msgs[i].token = i;
 861		veth_stack_push(cnx, msgs + i);
 862	}
 863
 864	cnx->num_events = veth_allocate_events(rlp, 2 + VETH_NUMBUFFERS);
 865
 866	if (cnx->num_events < (2 + VETH_NUMBUFFERS)) {
 867		veth_error("Can't allocate enough events for LPAR %d.\n", rlp);
 868		return -ENOMEM;
 869	}
 870
 871	cnx->local_caps.num_buffers = VETH_NUMBUFFERS;
 872	cnx->local_caps.ack_threshold = ACK_THRESHOLD;
 873	cnx->local_caps.ack_timeout = VETH_ACKTIMEOUT;
 874
 875	return 0;
 876}
 877
 878static void veth_stop_connection(struct veth_lpar_connection *cnx)
 879{
 880	if (!cnx)
 881		return;
 882
 883	spin_lock_irq(&cnx->lock);
 884	cnx->state |= VETH_STATE_RESET | VETH_STATE_SHUTDOWN;
 885	veth_kick_statemachine(cnx);
 886	spin_unlock_irq(&cnx->lock);
 887
 888	/* ensure the statemachine runs now and waits for its completion */
 889	flush_delayed_work_sync(&cnx->statemachine_wq);
 890}
 891
 892static void veth_destroy_connection(struct veth_lpar_connection *cnx)
 893{
 894	if (!cnx)
 895		return;
 896
 897	if (cnx->num_events > 0)
 898		mf_deallocate_lp_events(cnx->remote_lp,
 899				      HvLpEvent_Type_VirtualLan,
 900				      cnx->num_events,
 901				      NULL, NULL);
 902	if (cnx->num_ack_events > 0)
 903		mf_deallocate_lp_events(cnx->remote_lp,
 904				      HvLpEvent_Type_VirtualLan,
 905				      cnx->num_ack_events,
 906				      NULL, NULL);
 907
 908	kfree(cnx->msgs);
 909	veth_cnx[cnx->remote_lp] = NULL;
 910	kfree(cnx);
 911}
 912
 913static void veth_release_connection(struct kobject *kobj)
 914{
 915	struct veth_lpar_connection *cnx;
 916	cnx = container_of(kobj, struct veth_lpar_connection, kobject);
 917	veth_stop_connection(cnx);
 918	veth_destroy_connection(cnx);
 919}
 920
 921/*
 922 * net_device code
 923 */
 924
 925static int veth_open(struct net_device *dev)
 926{
 927	netif_start_queue(dev);
 928	return 0;
 929}
 930
 931static int veth_close(struct net_device *dev)
 932{
 933	netif_stop_queue(dev);
 934	return 0;
 935}
 936
 937static int veth_change_mtu(struct net_device *dev, int new_mtu)
 938{
 939	if ((new_mtu < 68) || (new_mtu > VETH_MAX_MTU))
 940		return -EINVAL;
 941	dev->mtu = new_mtu;
 942	return 0;
 943}
 944
 945static void veth_set_multicast_list(struct net_device *dev)
 946{
 947	struct veth_port *port = netdev_priv(dev);
 948	unsigned long flags;
 949
 950	write_lock_irqsave(&port->mcast_gate, flags);
 951
 952	if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI) ||
 953			(netdev_mc_count(dev) > VETH_MAX_MCAST)) {
 954		port->promiscuous = 1;
 955	} else {
 956		struct netdev_hw_addr *ha;
 957
 958		port->promiscuous = 0;
 959
 960		/* Update table */
 961		port->num_mcast = 0;
 962
 963		netdev_for_each_mc_addr(ha, dev) {
 964			u8 *addr = ha->addr;
 965			u64 xaddr = 0;
 966
 967			memcpy(&xaddr, addr, ETH_ALEN);
 968			port->mcast_addr[port->num_mcast] = xaddr;
 969			port->num_mcast++;
 970		}
 971	}
 972
 973	write_unlock_irqrestore(&port->mcast_gate, flags);
 974}
 975
 976static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 977{
 978	strncpy(info->driver, DRV_NAME, sizeof(info->driver) - 1);
 979	info->driver[sizeof(info->driver) - 1] = '\0';
 980	strncpy(info->version, DRV_VERSION, sizeof(info->version) - 1);
 981	info->version[sizeof(info->version) - 1] = '\0';
 982}
 983
 984static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 985{
 986	ecmd->supported = (SUPPORTED_1000baseT_Full
 987			  | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
 988	ecmd->advertising = (SUPPORTED_1000baseT_Full
 989			    | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
 990	ecmd->port = PORT_FIBRE;
 991	ecmd->transceiver = XCVR_INTERNAL;
 992	ecmd->phy_address = 0;
 993	ecmd->speed = SPEED_1000;
 994	ecmd->duplex = DUPLEX_FULL;
 995	ecmd->autoneg = AUTONEG_ENABLE;
 996	ecmd->maxtxpkt = 120;
 997	ecmd->maxrxpkt = 120;
 998	return 0;
 999}
1000
1001static const struct ethtool_ops ops = {
1002	.get_drvinfo = veth_get_drvinfo,
1003	.get_settings = veth_get_settings,
1004	.get_link = ethtool_op_get_link,
1005};
1006
1007static const struct net_device_ops veth_netdev_ops = {
1008	.ndo_open		= veth_open,
1009	.ndo_stop		= veth_close,
1010	.ndo_start_xmit		= veth_start_xmit,
1011	.ndo_change_mtu		= veth_change_mtu,
1012	.ndo_set_multicast_list	= veth_set_multicast_list,
1013	.ndo_set_mac_address	= NULL,
1014	.ndo_validate_addr	= eth_validate_addr,
1015};
1016
1017static struct net_device *veth_probe_one(int vlan,
1018		struct vio_dev *vio_dev)
1019{
1020	struct net_device *dev;
1021	struct veth_port *port;
1022	struct device *vdev = &vio_dev->dev;
1023	int i, rc;
1024	const unsigned char *mac_addr;
1025
1026	mac_addr = vio_get_attribute(vio_dev, "local-mac-address", NULL);
1027	if (mac_addr == NULL)
1028		mac_addr = vio_get_attribute(vio_dev, "mac-address", NULL);
1029	if (mac_addr == NULL) {
1030		veth_error("Unable to fetch MAC address from device tree.\n");
1031		return NULL;
1032	}
1033
1034	dev = alloc_etherdev(sizeof (struct veth_port));
1035	if (! dev) {
1036		veth_error("Unable to allocate net_device structure!\n");
1037		return NULL;
1038	}
1039
1040	port = netdev_priv(dev);
1041
1042	spin_lock_init(&port->queue_lock);
1043	rwlock_init(&port->mcast_gate);
1044	port->stopped_map = 0;
1045
1046	for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1047		HvLpVirtualLanIndexMap map;
1048
1049		if (i == this_lp)
1050			continue;
1051		map = HvLpConfig_getVirtualLanIndexMapForLp(i);
1052		if (map & (0x8000 >> vlan))
1053			port->lpar_map |= (1 << i);
1054	}
1055	port->dev = vdev;
1056
1057	memcpy(dev->dev_addr, mac_addr, ETH_ALEN);
1058
1059	dev->mtu = VETH_MAX_MTU;
1060
1061	memcpy(&port->mac_addr, mac_addr, ETH_ALEN);
1062
1063	dev->netdev_ops = &veth_netdev_ops;
1064	SET_ETHTOOL_OPS(dev, &ops);
1065
1066	SET_NETDEV_DEV(dev, vdev);
1067
1068	rc = register_netdev(dev);
1069	if (rc != 0) {
1070		veth_error("Failed registering net device for vlan%d.\n", vlan);
1071		free_netdev(dev);
1072		return NULL;
1073	}
1074
1075	kobject_init(&port->kobject, &veth_port_ktype);
1076	if (0 != kobject_add(&port->kobject, &dev->dev.kobj, "veth_port"))
1077		veth_error("Failed adding port for %s to sysfs.\n", dev->name);
1078
1079	veth_info("%s attached to iSeries vlan %d (LPAR map = 0x%.4X)\n",
1080			dev->name, vlan, port->lpar_map);
1081
1082	return dev;
1083}
1084
1085/*
1086 * Tx path
1087 */
1088
1089static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp,
1090				struct net_device *dev)
1091{
1092	struct veth_lpar_connection *cnx = veth_cnx[rlp];
1093	struct veth_port *port = netdev_priv(dev);
1094	HvLpEvent_Rc rc;
1095	struct veth_msg *msg = NULL;
1096	unsigned long flags;
1097
1098	if (! cnx)
1099		return 0;
1100
1101	spin_lock_irqsave(&cnx->lock, flags);
1102
1103	if (! (cnx->state & VETH_STATE_READY))
1104		goto no_error;
1105
1106	if ((skb->len - ETH_HLEN) > VETH_MAX_MTU)
1107		goto drop;
1108
1109	msg = veth_stack_pop(cnx);
1110	if (! msg)
1111		goto drop;
1112
1113	msg->in_use = 1;
1114	msg->skb = skb_get(skb);
1115
1116	msg->data.addr[0] = dma_map_single(port->dev, skb->data,
1117				skb->len, DMA_TO_DEVICE);
1118
1119	if (dma_mapping_error(port->dev, msg->data.addr[0]))
1120		goto recycle_and_drop;
1121
1122	msg->dev = port->dev;
1123	msg->data.len[0] = skb->len;
1124	msg->data.eofmask = 1 << VETH_EOF_SHIFT;
1125
1126	rc = veth_signaldata(cnx, VETH_EVENT_FRAMES, msg->token, &msg->data);
1127
1128	if (rc != HvLpEvent_Rc_Good)
1129		goto recycle_and_drop;
1130
1131	/* If the timer's not already running, start it now. */
1132	if (0 == cnx->outstanding_tx)
1133		mod_timer(&cnx->reset_timer, jiffies + cnx->reset_timeout);
1134
1135	cnx->last_contact = jiffies;
1136	cnx->outstanding_tx++;
1137
1138	if (veth_stack_is_empty(cnx))
1139		veth_stop_queues(cnx);
1140
1141 no_error:
1142	spin_unlock_irqrestore(&cnx->lock, flags);
1143	return 0;
1144
1145 recycle_and_drop:
1146	veth_recycle_msg(cnx, msg);
1147 drop:
1148	spin_unlock_irqrestore(&cnx->lock, flags);
1149	return 1;
1150}
1151
1152static void veth_transmit_to_many(struct sk_buff *skb,
1153					  HvLpIndexMap lpmask,
1154					  struct net_device *dev)
1155{
1156	int i, success, error;
1157
1158	success = error = 0;
1159
1160	for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1161		if ((lpmask & (1 << i)) == 0)
1162			continue;
1163
1164		if (veth_transmit_to_one(skb, i, dev))
1165			error = 1;
1166		else
1167			success = 1;
1168	}
1169
1170	if (error)
1171		dev->stats.tx_errors++;
1172
1173	if (success) {
1174		dev->stats.tx_packets++;
1175		dev->stats.tx_bytes += skb->len;
1176	}
1177}
1178
1179static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev)
1180{
1181	unsigned char *frame = skb->data;
1182	struct veth_port *port = netdev_priv(dev);
1183	HvLpIndexMap lpmask;
1184
1185	if (is_unicast_ether_addr(frame)) {
1186		/* unicast packet */
1187		HvLpIndex rlp = frame[5];
1188
1189		if ( ! ((1 << rlp) & port->lpar_map) ) {
1190			dev_kfree_skb(skb);
1191			return NETDEV_TX_OK;
1192		}
1193
1194		lpmask = 1 << rlp;
1195	} else {
1196		lpmask = port->lpar_map;
1197	}
1198
1199	veth_transmit_to_many(skb, lpmask, dev);
1200
1201	dev_kfree_skb(skb);
1202
1203	return NETDEV_TX_OK;
1204}
1205
1206/* You must hold the connection's lock when you call this function. */
1207static void veth_recycle_msg(struct veth_lpar_connection *cnx,
1208			     struct veth_msg *msg)
1209{
1210	u32 dma_address, dma_length;
1211
1212	if (msg->in_use) {
1213		msg->in_use = 0;
1214		dma_address = msg->data.addr[0];
1215		dma_length = msg->data.len[0];
1216
1217		if (!dma_mapping_error(msg->dev, dma_address))
1218			dma_unmap_single(msg->dev, dma_address, dma_length,
1219					DMA_TO_DEVICE);
1220
1221		if (msg->skb) {
1222			dev_kfree_skb_any(msg->skb);
1223			msg->skb = NULL;
1224		}
1225
1226		memset(&msg->data, 0, sizeof(msg->data));
1227		veth_stack_push(cnx, msg);
1228	} else if (cnx->state & VETH_STATE_OPEN) {
1229		veth_error("Non-pending frame (# %d) acked by LPAR %d.\n",
1230				cnx->remote_lp, msg->token);
1231	}
1232}
1233
1234static void veth_wake_queues(struct veth_lpar_connection *cnx)
1235{
1236	int i;
1237
1238	for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
1239		struct net_device *dev = veth_dev[i];
1240		struct veth_port *port;
1241		unsigned long flags;
1242
1243		if (! dev)
1244			continue;
1245
1246		port = netdev_priv(dev);
1247
1248		if (! (port->lpar_map & (1<<cnx->remote_lp)))
1249			continue;
1250
1251		spin_lock_irqsave(&port->queue_lock, flags);
1252
1253		port->stopped_map &= ~(1 << cnx->remote_lp);
1254
1255		if (0 == port->stopped_map && netif_queue_stopped(dev)) {
1256			veth_debug("cnx %d: woke queue for %s.\n",
1257					cnx->remote_lp, dev->name);
1258			netif_wake_queue(dev);
1259		}
1260		spin_unlock_irqrestore(&port->queue_lock, flags);
1261	}
1262}
1263
1264static void veth_stop_queues(struct veth_lpar_connection *cnx)
1265{
1266	int i;
1267
1268	for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
1269		struct net_device *dev = veth_dev[i];
1270		struct veth_port *port;
1271
1272		if (! dev)
1273			continue;
1274
1275		port = netdev_priv(dev);
1276
1277		/* If this cnx is not on the vlan for this port, continue */
1278		if (! (port->lpar_map & (1 << cnx->remote_lp)))
1279			continue;
1280
1281		spin_lock(&port->queue_lock);
1282
1283		netif_stop_queue(dev);
1284		port->stopped_map |= (1 << cnx->remote_lp);
1285
1286		veth_debug("cnx %d: stopped queue for %s, map = 0x%x.\n",
1287				cnx->remote_lp, dev->name, port->stopped_map);
1288
1289		spin_unlock(&port->queue_lock);
1290	}
1291}
1292
1293static void veth_timed_reset(unsigned long ptr)
1294{
1295	struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)ptr;
1296	unsigned long trigger_time, flags;
1297
1298	/* FIXME is it possible this fires after veth_stop_connection()?
1299	 * That would reschedule the statemachine for 5 seconds and probably
1300	 * execute it after the module's been unloaded. Hmm. */
1301
1302	spin_lock_irqsave(&cnx->lock, flags);
1303
1304	if (cnx->outstanding_tx > 0) {
1305		trigger_time = cnx->last_contact + cnx->reset_timeout;
1306
1307		if (trigger_time < jiffies) {
1308			cnx->state |= VETH_STATE_RESET;
1309			veth_kick_statemachine(cnx);
1310			veth_error("%d packets not acked by LPAR %d within %d "
1311					"seconds, resetting.\n",
1312					cnx->outstanding_tx, cnx->remote_lp,
1313					cnx->reset_timeout / HZ);
1314		} else {
1315			/* Reschedule the timer */
1316			trigger_time = jiffies + cnx->reset_timeout;
1317			mod_timer(&cnx->reset_timer, trigger_time);
1318		}
1319	}
1320
1321	spin_unlock_irqrestore(&cnx->lock, flags);
1322}
1323
1324/*
1325 * Rx path
1326 */
1327
1328static inline int veth_frame_wanted(struct veth_port *port, u64 mac_addr)
1329{
1330	int wanted = 0;
1331	int i;
1332	unsigned long flags;
1333
1334	if ( (mac_addr == port->mac_addr) || (mac_addr == 0xffffffffffff0000) )
1335		return 1;
1336
1337	read_lock_irqsave(&port->mcast_gate, flags);
1338
1339	if (port->promiscuous) {
1340		wanted = 1;
1341		goto out;
1342	}
1343
1344	for (i = 0; i < port->num_mcast; ++i) {
1345		if (port->mcast_addr[i] == mac_addr) {
1346			wanted = 1;
1347			break;
1348		}
1349	}
1350
1351 out:
1352	read_unlock_irqrestore(&port->mcast_gate, flags);
1353
1354	return wanted;
1355}
1356
1357struct dma_chunk {
1358	u64 addr;
1359	u64 size;
1360};
1361
1362#define VETH_MAX_PAGES_PER_FRAME ( (VETH_MAX_MTU+PAGE_SIZE-2)/PAGE_SIZE + 1 )
1363
1364static inline void veth_build_dma_list(struct dma_chunk *list,
1365				       unsigned char *p, unsigned long length)
1366{
1367	unsigned long done;
1368	int i = 1;
1369
1370	/* FIXME: skbs are contiguous in real addresses.  Do we
1371	 * really need to break it into PAGE_SIZE chunks, or can we do
1372	 * it just at the granularity of iSeries real->absolute
1373	 * mapping?  Indeed, given the way the allocator works, can we
1374	 * count on them being absolutely contiguous? */
1375	list[0].addr = iseries_hv_addr(p);
1376	list[0].size = min(length,
1377			   PAGE_SIZE - ((unsigned long)p & ~PAGE_MASK));
1378
1379	done = list[0].size;
1380	while (done < length) {
1381		list[i].addr = iseries_hv_addr(p + done);
1382		list[i].size = min(length-done, PAGE_SIZE);
1383		done += list[i].size;
1384		i++;
1385	}
1386}
1387
1388static void veth_flush_acks(struct veth_lpar_connection *cnx)
1389{
1390	HvLpEvent_Rc rc;
1391
1392	rc = veth_signaldata(cnx, VETH_EVENT_FRAMES_ACK,
1393			     0, &cnx->pending_acks);
1394
1395	if (rc != HvLpEvent_Rc_Good)
1396		veth_error("Failed acking frames from LPAR %d, rc = %d\n",
1397				cnx->remote_lp, (int)rc);
1398
1399	cnx->num_pending_acks = 0;
1400	memset(&cnx->pending_acks, 0xff, sizeof(cnx->pending_acks));
1401}
1402
1403static void veth_receive(struct veth_lpar_connection *cnx,
1404			 struct veth_lpevent *event)
1405{
1406	struct veth_frames_data *senddata = &event->u.frames_data;
1407	int startchunk = 0;
1408	int nchunks;
1409	unsigned long flags;
1410	HvLpDma_Rc rc;
1411
1412	do {
1413		u16 length = 0;
1414		struct sk_buff *skb;
1415		struct dma_chunk local_list[VETH_MAX_PAGES_PER_FRAME];
1416		struct dma_chunk remote_list[VETH_MAX_FRAMES_PER_MSG];
1417		u64 dest;
1418		HvLpVirtualLanIndex vlan;
1419		struct net_device *dev;
1420		struct veth_port *port;
1421
1422		/* FIXME: do we need this? */
1423		memset(local_list, 0, sizeof(local_list));
1424		memset(remote_list, 0, sizeof(VETH_MAX_FRAMES_PER_MSG));
1425
1426		/* a 0 address marks the end of the valid entries */
1427		if (senddata->addr[startchunk] == 0)
1428			break;
1429
1430		/* make sure that we have at least 1 EOF entry in the
1431		 * remaining entries */
1432		if (! (senddata->eofmask >> (startchunk + VETH_EOF_SHIFT))) {
1433			veth_error("Missing EOF fragment in event "
1434					"eofmask = 0x%x startchunk = %d\n",
1435					(unsigned)senddata->eofmask,
1436					startchunk);
1437			break;
1438		}
1439
1440		/* build list of chunks in this frame */
1441		nchunks = 0;
1442		do {
1443			remote_list[nchunks].addr =
1444				(u64) senddata->addr[startchunk+nchunks] << 32;
1445			remote_list[nchunks].size =
1446				senddata->len[startchunk+nchunks];
1447			length += remote_list[nchunks].size;
1448		} while (! (senddata->eofmask &
1449			    (1 << (VETH_EOF_SHIFT + startchunk + nchunks++))));
1450
1451		/* length == total length of all chunks */
1452		/* nchunks == # of chunks in this frame */
1453
1454		if ((length - ETH_HLEN) > VETH_MAX_MTU) {
1455			veth_error("Received oversize frame from LPAR %d "
1456					"(length = %d)\n",
1457					cnx->remote_lp, length);
1458			continue;
1459		}
1460
1461		skb = alloc_skb(length, GFP_ATOMIC);
1462		if (!skb)
1463			continue;
1464
1465		veth_build_dma_list(local_list, skb->data, length);
1466
1467		rc = HvCallEvent_dmaBufList(HvLpEvent_Type_VirtualLan,
1468					    event->base_event.xSourceLp,
1469					    HvLpDma_Direction_RemoteToLocal,
1470					    cnx->src_inst,
1471					    cnx->dst_inst,
1472					    HvLpDma_AddressType_RealAddress,
1473					    HvLpDma_AddressType_TceIndex,
1474					    iseries_hv_addr(&local_list),
1475					    iseries_hv_addr(&remote_list),
1476					    length);
1477		if (rc != HvLpDma_Rc_Good) {
1478			dev_kfree_skb_irq(skb);
1479			continue;
1480		}
1481
1482		vlan = skb->data[9];
1483		dev = veth_dev[vlan];
1484		if (! dev) {
1485			/*
1486			 * Some earlier versions of the driver sent
1487			 * broadcasts down all connections, even to lpars
1488			 * that weren't on the relevant vlan. So ignore
1489			 * packets belonging to a vlan we're not on.
1490			 * We can also be here if we receive packets while
1491			 * the driver is going down, because then dev is NULL.
1492			 */
1493			dev_kfree_skb_irq(skb);
1494			continue;
1495		}
1496
1497		port = netdev_priv(dev);
1498		dest = *((u64 *) skb->data) & 0xFFFFFFFFFFFF0000;
1499
1500		if ((vlan > HVMAXARCHITECTEDVIRTUALLANS) || !port) {
1501			dev_kfree_skb_irq(skb);
1502			continue;
1503		}
1504		if (! veth_frame_wanted(port, dest)) {
1505			dev_kfree_skb_irq(skb);
1506			continue;
1507		}
1508
1509		skb_put(skb, length);
1510		skb->protocol = eth_type_trans(skb, dev);
1511		skb_checksum_none_assert(skb);
1512		netif_rx(skb);	/* send it up */
1513		dev->stats.rx_packets++;
1514		dev->stats.rx_bytes += length;
1515	} while (startchunk += nchunks, startchunk < VETH_MAX_FRAMES_PER_MSG);
1516
1517	/* Ack it */
1518	spin_lock_irqsave(&cnx->lock, flags);
1519	BUG_ON(cnx->num_pending_acks > VETH_MAX_ACKS_PER_MSG);
1520
1521	cnx->pending_acks[cnx->num_pending_acks++] =
1522		event->base_event.xCorrelationToken;
1523
1524	if ( (cnx->num_pending_acks >= cnx->remote_caps.ack_threshold) ||
1525	     (cnx->num_pending_acks >= VETH_MAX_ACKS_PER_MSG) )
1526		veth_flush_acks(cnx);
1527
1528	spin_unlock_irqrestore(&cnx->lock, flags);
1529}
1530
1531static void veth_timed_ack(unsigned long ptr)
1532{
1533	struct veth_lpar_connection *cnx = (struct veth_lpar_connection *) ptr;
1534	unsigned long flags;
1535
1536	/* Ack all the events */
1537	spin_lock_irqsave(&cnx->lock, flags);
1538	if (cnx->num_pending_acks > 0)
1539		veth_flush_acks(cnx);
1540
1541	/* Reschedule the timer */
1542	cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
1543	add_timer(&cnx->ack_timer);
1544	spin_unlock_irqrestore(&cnx->lock, flags);
1545}
1546
1547static int veth_remove(struct vio_dev *vdev)
1548{
1549	struct veth_lpar_connection *cnx;
1550	struct net_device *dev;
1551	struct veth_port *port;
1552	int i;
1553
1554	dev = veth_dev[vdev->unit_address];
1555
1556	if (! dev)
1557		return 0;
1558
1559	port = netdev_priv(dev);
1560
1561	for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1562		cnx = veth_cnx[i];
1563
1564		if (cnx && (port->lpar_map & (1 << i))) {
1565			/* Drop our reference to connections on our VLAN */
1566			kobject_put(&cnx->kobject);
1567		}
1568	}
1569
1570	veth_dev[vdev->unit_address] = NULL;
1571	kobject_del(&port->kobject);
1572	kobject_put(&port->kobject);
1573	unregister_netdev(dev);
1574	free_netdev(dev);
1575
1576	return 0;
1577}
1578
1579static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id)
1580{
1581	int i = vdev->unit_address;
1582	struct net_device *dev;
1583	struct veth_port *port;
1584
1585	dev = veth_probe_one(i, vdev);
1586	if (dev == NULL) {
1587		veth_remove(vdev);
1588		return 1;
1589	}
1590	veth_dev[i] = dev;
1591
1592	port = netdev_priv(dev);
1593
1594	/* Start the state machine on each connection on this vlan. If we're
1595	 * the first dev to do so this will commence link negotiation */
1596	for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1597		struct veth_lpar_connection *cnx;
1598
1599		if (! (port->lpar_map & (1 << i)))
1600			continue;
1601
1602		cnx = veth_cnx[i];
1603		if (!cnx)
1604			continue;
1605
1606		kobject_get(&cnx->kobject);
1607		veth_kick_statemachine(cnx);
1608	}
1609
1610	return 0;
1611}
1612
1613/**
1614 * veth_device_table: Used by vio.c to match devices that we
1615 * support.
1616 */
1617static struct vio_device_id veth_device_table[] __devinitdata = {
1618	{ "network", "IBM,iSeries-l-lan" },
1619	{ "", "" }
1620};
1621MODULE_DEVICE_TABLE(vio, veth_device_table);
1622
1623static struct vio_driver veth_driver = {
1624	.id_table = veth_device_table,
1625	.probe = veth_probe,
1626	.remove = veth_remove,
1627	.driver = {
1628		.name = DRV_NAME,
1629		.owner = THIS_MODULE,
1630	}
1631};
1632
1633/*
1634 * Module initialization/cleanup
1635 */
1636
1637static void __exit veth_module_cleanup(void)
1638{
1639	int i;
1640	struct veth_lpar_connection *cnx;
1641
1642	/* Disconnect our "irq" to stop events coming from the Hypervisor. */
1643	HvLpEvent_unregisterHandler(HvLpEvent_Type_VirtualLan);
1644
1645	for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1646		cnx = veth_cnx[i];
1647
1648		if (!cnx)
1649			continue;
1650
1651		/* Cancel work queued from Hypervisor callbacks */
1652		cancel_delayed_work_sync(&cnx->statemachine_wq);
1653		/* Remove the connection from sysfs */
1654		kobject_del(&cnx->kobject);
1655		/* Drop the driver's reference to the connection */
1656		kobject_put(&cnx->kobject);
1657	}
1658
1659	/* Unregister the driver, which will close all the netdevs and stop
1660	 * the connections when they're no longer referenced. */
1661	vio_unregister_driver(&veth_driver);
1662}
1663module_exit(veth_module_cleanup);
1664
1665static int __init veth_module_init(void)
1666{
1667	int i;
1668	int rc;
1669
1670	if (!firmware_has_feature(FW_FEATURE_ISERIES))
1671		return -ENODEV;
1672
1673	this_lp = HvLpConfig_getLpIndex_outline();
1674
1675	for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1676		rc = veth_init_connection(i);
1677		if (rc != 0)
1678			goto error;
1679	}
1680
1681	HvLpEvent_registerHandler(HvLpEvent_Type_VirtualLan,
1682				  &veth_handle_event);
1683
1684	rc = vio_register_driver(&veth_driver);
1685	if (rc != 0)
1686		goto error;
1687
1688	for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1689		struct kobject *kobj;
1690
1691		if (!veth_cnx[i])
1692			continue;
1693
1694		kobj = &veth_cnx[i]->kobject;
1695		/* If the add failes, complain but otherwise continue */
1696		if (0 != driver_add_kobj(&veth_driver.driver, kobj,
1697					"cnx%.2d", veth_cnx[i]->remote_lp))
1698			veth_error("cnx %d: Failed adding to sysfs.\n", i);
1699	}
1700
1701	return 0;
1702
1703error:
1704	for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1705		veth_destroy_connection(veth_cnx[i]);
1706	}
1707
1708	return rc;
1709}
1710module_init(veth_module_init);