Linux Audio

Check our new training course

Loading...
v3.5.6
   1/* ldc.c: Logical Domain Channel link-layer protocol driver.
   2 *
   3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/export.h>
   8#include <linux/slab.h>
   9#include <linux/spinlock.h>
  10#include <linux/delay.h>
  11#include <linux/errno.h>
  12#include <linux/string.h>
  13#include <linux/scatterlist.h>
  14#include <linux/interrupt.h>
  15#include <linux/list.h>
  16#include <linux/init.h>
  17#include <linux/bitmap.h>
 
  18
  19#include <asm/hypervisor.h>
  20#include <asm/iommu.h>
  21#include <asm/page.h>
  22#include <asm/ldc.h>
  23#include <asm/mdesc.h>
  24
  25#define DRV_MODULE_NAME		"ldc"
  26#define PFX DRV_MODULE_NAME	": "
  27#define DRV_MODULE_VERSION	"1.1"
  28#define DRV_MODULE_RELDATE	"July 22, 2008"
  29
  30static char version[] __devinitdata =
 
 
 
 
  31	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
  32#define LDC_PACKET_SIZE		64
  33
  34/* Packet header layout for unreliable and reliable mode frames.
  35 * When in RAW mode, packets are simply straight 64-byte payloads
  36 * with no headers.
  37 */
  38struct ldc_packet {
  39	u8			type;
  40#define LDC_CTRL		0x01
  41#define LDC_DATA		0x02
  42#define LDC_ERR			0x10
  43
  44	u8			stype;
  45#define LDC_INFO		0x01
  46#define LDC_ACK			0x02
  47#define LDC_NACK		0x04
  48
  49	u8			ctrl;
  50#define LDC_VERS		0x01 /* Link Version		*/
  51#define LDC_RTS			0x02 /* Request To Send		*/
  52#define LDC_RTR			0x03 /* Ready To Receive	*/
  53#define LDC_RDX			0x04 /* Ready for Data eXchange	*/
  54#define LDC_CTRL_MSK		0x0f
  55
  56	u8			env;
  57#define LDC_LEN			0x3f
  58#define LDC_FRAG_MASK		0xc0
  59#define LDC_START		0x40
  60#define LDC_STOP		0x80
  61
  62	u32			seqid;
  63
  64	union {
  65		u8		u_data[LDC_PACKET_SIZE - 8];
  66		struct {
  67			u32	pad;
  68			u32	ackid;
  69			u8	r_data[LDC_PACKET_SIZE - 8 - 8];
  70		} r;
  71	} u;
  72};
  73
  74struct ldc_version {
  75	u16 major;
  76	u16 minor;
  77};
  78
  79/* Ordered from largest major to lowest.  */
  80static struct ldc_version ver_arr[] = {
  81	{ .major = 1, .minor = 0 },
  82};
  83
  84#define LDC_DEFAULT_MTU			(4 * LDC_PACKET_SIZE)
  85#define LDC_DEFAULT_NUM_ENTRIES		(PAGE_SIZE / LDC_PACKET_SIZE)
  86
  87struct ldc_channel;
  88
  89struct ldc_mode_ops {
  90	int (*write)(struct ldc_channel *, const void *, unsigned int);
  91	int (*read)(struct ldc_channel *, void *, unsigned int);
  92};
  93
  94static const struct ldc_mode_ops raw_ops;
  95static const struct ldc_mode_ops nonraw_ops;
  96static const struct ldc_mode_ops stream_ops;
  97
  98int ldom_domaining_enabled;
  99
 100struct ldc_iommu {
 101	/* Protects arena alloc/free.  */
 102	spinlock_t			lock;
 103	struct iommu_arena		arena;
 104	struct ldc_mtable_entry		*page_table;
 
 105};
 106
 107struct ldc_channel {
 108	/* Protects all operations that depend upon channel state.  */
 109	spinlock_t			lock;
 110
 111	unsigned long			id;
 112
 113	u8				*mssbuf;
 114	u32				mssbuf_len;
 115	u32				mssbuf_off;
 116
 117	struct ldc_packet		*tx_base;
 118	unsigned long			tx_head;
 119	unsigned long			tx_tail;
 120	unsigned long			tx_num_entries;
 121	unsigned long			tx_ra;
 122
 123	unsigned long			tx_acked;
 124
 125	struct ldc_packet		*rx_base;
 126	unsigned long			rx_head;
 127	unsigned long			rx_tail;
 128	unsigned long			rx_num_entries;
 129	unsigned long			rx_ra;
 130
 131	u32				rcv_nxt;
 132	u32				snd_nxt;
 133
 134	unsigned long			chan_state;
 135
 136	struct ldc_channel_config	cfg;
 137	void				*event_arg;
 138
 139	const struct ldc_mode_ops	*mops;
 140
 141	struct ldc_iommu		iommu;
 142
 143	struct ldc_version		ver;
 144
 145	u8				hs_state;
 146#define LDC_HS_CLOSED			0x00
 147#define LDC_HS_OPEN			0x01
 148#define LDC_HS_GOTVERS			0x02
 149#define LDC_HS_SENTRTR			0x03
 150#define LDC_HS_GOTRTR			0x04
 151#define LDC_HS_COMPLETE			0x10
 152
 153	u8				flags;
 154#define LDC_FLAG_ALLOCED_QUEUES		0x01
 155#define LDC_FLAG_REGISTERED_QUEUES	0x02
 156#define LDC_FLAG_REGISTERED_IRQS	0x04
 157#define LDC_FLAG_RESET			0x10
 158
 159	u8				mss;
 160	u8				state;
 161
 162#define LDC_IRQ_NAME_MAX		32
 163	char				rx_irq_name[LDC_IRQ_NAME_MAX];
 164	char				tx_irq_name[LDC_IRQ_NAME_MAX];
 165
 166	struct hlist_head		mh_list;
 167
 168	struct hlist_node		list;
 169};
 170
 171#define ldcdbg(TYPE, f, a...) \
 172do {	if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
 173		printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
 174} while (0)
 175
 176static const char *state_to_str(u8 state)
 177{
 178	switch (state) {
 179	case LDC_STATE_INVALID:
 180		return "INVALID";
 181	case LDC_STATE_INIT:
 182		return "INIT";
 183	case LDC_STATE_BOUND:
 184		return "BOUND";
 185	case LDC_STATE_READY:
 186		return "READY";
 187	case LDC_STATE_CONNECTED:
 188		return "CONNECTED";
 189	default:
 190		return "<UNKNOWN>";
 191	}
 192}
 193
 194static void ldc_set_state(struct ldc_channel *lp, u8 state)
 195{
 196	ldcdbg(STATE, "STATE (%s) --> (%s)\n",
 197	       state_to_str(lp->state),
 198	       state_to_str(state));
 199
 200	lp->state = state;
 201}
 202
 203static unsigned long __advance(unsigned long off, unsigned long num_entries)
 204{
 205	off += LDC_PACKET_SIZE;
 206	if (off == (num_entries * LDC_PACKET_SIZE))
 207		off = 0;
 208
 209	return off;
 210}
 211
 212static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
 213{
 214	return __advance(off, lp->rx_num_entries);
 215}
 216
 217static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
 218{
 219	return __advance(off, lp->tx_num_entries);
 220}
 221
 222static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
 223						  unsigned long *new_tail)
 224{
 225	struct ldc_packet *p;
 226	unsigned long t;
 227
 228	t = tx_advance(lp, lp->tx_tail);
 229	if (t == lp->tx_head)
 230		return NULL;
 231
 232	*new_tail = t;
 233
 234	p = lp->tx_base;
 235	return p + (lp->tx_tail / LDC_PACKET_SIZE);
 236}
 237
 238/* When we are in reliable or stream mode, have to track the next packet
 239 * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
 240 * to be careful not to stomp over the queue past that point.  During
 241 * the handshake, we don't have TX data packets pending in the queue
 242 * and that's why handshake_get_tx_packet() need not be mindful of
 243 * lp->tx_acked.
 244 */
 245static unsigned long head_for_data(struct ldc_channel *lp)
 246{
 247	if (lp->cfg.mode == LDC_MODE_STREAM)
 248		return lp->tx_acked;
 249	return lp->tx_head;
 250}
 251
 252static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
 253{
 254	unsigned long limit, tail, new_tail, diff;
 255	unsigned int mss;
 256
 257	limit = head_for_data(lp);
 258	tail = lp->tx_tail;
 259	new_tail = tx_advance(lp, tail);
 260	if (new_tail == limit)
 261		return 0;
 262
 263	if (limit > new_tail)
 264		diff = limit - new_tail;
 265	else
 266		diff = (limit +
 267			((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
 268	diff /= LDC_PACKET_SIZE;
 269	mss = lp->mss;
 270
 271	if (diff * mss < size)
 272		return 0;
 273
 274	return 1;
 275}
 276
 277static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
 278					     unsigned long *new_tail)
 279{
 280	struct ldc_packet *p;
 281	unsigned long h, t;
 282
 283	h = head_for_data(lp);
 284	t = tx_advance(lp, lp->tx_tail);
 285	if (t == h)
 286		return NULL;
 287
 288	*new_tail = t;
 289
 290	p = lp->tx_base;
 291	return p + (lp->tx_tail / LDC_PACKET_SIZE);
 292}
 293
 294static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
 295{
 296	unsigned long orig_tail = lp->tx_tail;
 297	int limit = 1000;
 298
 299	lp->tx_tail = tail;
 300	while (limit-- > 0) {
 301		unsigned long err;
 302
 303		err = sun4v_ldc_tx_set_qtail(lp->id, tail);
 304		if (!err)
 305			return 0;
 306
 307		if (err != HV_EWOULDBLOCK) {
 308			lp->tx_tail = orig_tail;
 309			return -EINVAL;
 310		}
 311		udelay(1);
 312	}
 313
 314	lp->tx_tail = orig_tail;
 315	return -EBUSY;
 316}
 317
 318/* This just updates the head value in the hypervisor using
 319 * a polling loop with a timeout.  The caller takes care of
 320 * upating software state representing the head change, if any.
 321 */
 322static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
 323{
 324	int limit = 1000;
 325
 326	while (limit-- > 0) {
 327		unsigned long err;
 328
 329		err = sun4v_ldc_rx_set_qhead(lp->id, head);
 330		if (!err)
 331			return 0;
 332
 333		if (err != HV_EWOULDBLOCK)
 334			return -EINVAL;
 335
 336		udelay(1);
 337	}
 338
 339	return -EBUSY;
 340}
 341
 342static int send_tx_packet(struct ldc_channel *lp,
 343			  struct ldc_packet *p,
 344			  unsigned long new_tail)
 345{
 346	BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
 347
 348	return set_tx_tail(lp, new_tail);
 349}
 350
 351static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
 352						 u8 stype, u8 ctrl,
 353						 void *data, int dlen,
 354						 unsigned long *new_tail)
 355{
 356	struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
 357
 358	if (p) {
 359		memset(p, 0, sizeof(*p));
 360		p->type = LDC_CTRL;
 361		p->stype = stype;
 362		p->ctrl = ctrl;
 363		if (data)
 364			memcpy(p->u.u_data, data, dlen);
 365	}
 366	return p;
 367}
 368
 369static int start_handshake(struct ldc_channel *lp)
 370{
 371	struct ldc_packet *p;
 372	struct ldc_version *ver;
 373	unsigned long new_tail;
 374
 375	ver = &ver_arr[0];
 376
 377	ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
 378	       ver->major, ver->minor);
 379
 380	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 381				   ver, sizeof(*ver), &new_tail);
 382	if (p) {
 383		int err = send_tx_packet(lp, p, new_tail);
 384		if (!err)
 385			lp->flags &= ~LDC_FLAG_RESET;
 386		return err;
 387	}
 388	return -EBUSY;
 389}
 390
 391static int send_version_nack(struct ldc_channel *lp,
 392			     u16 major, u16 minor)
 393{
 394	struct ldc_packet *p;
 395	struct ldc_version ver;
 396	unsigned long new_tail;
 397
 398	ver.major = major;
 399	ver.minor = minor;
 400
 401	p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
 402				   &ver, sizeof(ver), &new_tail);
 403	if (p) {
 404		ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
 405		       ver.major, ver.minor);
 406
 407		return send_tx_packet(lp, p, new_tail);
 408	}
 409	return -EBUSY;
 410}
 411
 412static int send_version_ack(struct ldc_channel *lp,
 413			    struct ldc_version *vp)
 414{
 415	struct ldc_packet *p;
 416	unsigned long new_tail;
 417
 418	p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
 419				   vp, sizeof(*vp), &new_tail);
 420	if (p) {
 421		ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
 422		       vp->major, vp->minor);
 423
 424		return send_tx_packet(lp, p, new_tail);
 425	}
 426	return -EBUSY;
 427}
 428
 429static int send_rts(struct ldc_channel *lp)
 430{
 431	struct ldc_packet *p;
 432	unsigned long new_tail;
 433
 434	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
 435				   &new_tail);
 436	if (p) {
 437		p->env = lp->cfg.mode;
 438		p->seqid = 0;
 439		lp->rcv_nxt = 0;
 440
 441		ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
 442		       p->env, p->seqid);
 443
 444		return send_tx_packet(lp, p, new_tail);
 445	}
 446	return -EBUSY;
 447}
 448
 449static int send_rtr(struct ldc_channel *lp)
 450{
 451	struct ldc_packet *p;
 452	unsigned long new_tail;
 453
 454	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
 455				   &new_tail);
 456	if (p) {
 457		p->env = lp->cfg.mode;
 458		p->seqid = 0;
 459
 460		ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
 461		       p->env, p->seqid);
 462
 463		return send_tx_packet(lp, p, new_tail);
 464	}
 465	return -EBUSY;
 466}
 467
 468static int send_rdx(struct ldc_channel *lp)
 469{
 470	struct ldc_packet *p;
 471	unsigned long new_tail;
 472
 473	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
 474				   &new_tail);
 475	if (p) {
 476		p->env = 0;
 477		p->seqid = ++lp->snd_nxt;
 478		p->u.r.ackid = lp->rcv_nxt;
 479
 480		ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
 481		       p->env, p->seqid, p->u.r.ackid);
 482
 483		return send_tx_packet(lp, p, new_tail);
 484	}
 485	return -EBUSY;
 486}
 487
 488static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
 489{
 490	struct ldc_packet *p;
 491	unsigned long new_tail;
 492	int err;
 493
 494	p = data_get_tx_packet(lp, &new_tail);
 495	if (!p)
 496		return -EBUSY;
 497	memset(p, 0, sizeof(*p));
 498	p->type = data_pkt->type;
 499	p->stype = LDC_NACK;
 500	p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
 501	p->seqid = lp->snd_nxt + 1;
 502	p->u.r.ackid = lp->rcv_nxt;
 503
 504	ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
 505	       p->type, p->ctrl, p->seqid, p->u.r.ackid);
 506
 507	err = send_tx_packet(lp, p, new_tail);
 508	if (!err)
 509		lp->snd_nxt++;
 510
 511	return err;
 512}
 513
 514static int ldc_abort(struct ldc_channel *lp)
 515{
 516	unsigned long hv_err;
 517
 518	ldcdbg(STATE, "ABORT\n");
 519
 520	/* We report but do not act upon the hypervisor errors because
 521	 * there really isn't much we can do if they fail at this point.
 522	 */
 523	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
 524	if (hv_err)
 525		printk(KERN_ERR PFX "ldc_abort: "
 526		       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 527		       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
 528
 529	hv_err = sun4v_ldc_tx_get_state(lp->id,
 530					&lp->tx_head,
 531					&lp->tx_tail,
 532					&lp->chan_state);
 533	if (hv_err)
 534		printk(KERN_ERR PFX "ldc_abort: "
 535		       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
 536		       lp->id, hv_err);
 537
 538	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
 539	if (hv_err)
 540		printk(KERN_ERR PFX "ldc_abort: "
 541		       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 542		       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
 543
 544	/* Refetch the RX queue state as well, because we could be invoked
 545	 * here in the queue processing context.
 546	 */
 547	hv_err = sun4v_ldc_rx_get_state(lp->id,
 548					&lp->rx_head,
 549					&lp->rx_tail,
 550					&lp->chan_state);
 551	if (hv_err)
 552		printk(KERN_ERR PFX "ldc_abort: "
 553		       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
 554		       lp->id, hv_err);
 555
 556	return -ECONNRESET;
 557}
 558
 559static struct ldc_version *find_by_major(u16 major)
 560{
 561	struct ldc_version *ret = NULL;
 562	int i;
 563
 564	for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
 565		struct ldc_version *v = &ver_arr[i];
 566		if (v->major <= major) {
 567			ret = v;
 568			break;
 569		}
 570	}
 571	return ret;
 572}
 573
 574static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
 575{
 576	struct ldc_version *vap;
 577	int err;
 578
 579	ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
 580	       vp->major, vp->minor);
 581
 582	if (lp->hs_state == LDC_HS_GOTVERS) {
 583		lp->hs_state = LDC_HS_OPEN;
 584		memset(&lp->ver, 0, sizeof(lp->ver));
 585	}
 586
 587	vap = find_by_major(vp->major);
 588	if (!vap) {
 589		err = send_version_nack(lp, 0, 0);
 590	} else if (vap->major != vp->major) {
 591		err = send_version_nack(lp, vap->major, vap->minor);
 592	} else {
 593		struct ldc_version ver = *vp;
 594		if (ver.minor > vap->minor)
 595			ver.minor = vap->minor;
 596		err = send_version_ack(lp, &ver);
 597		if (!err) {
 598			lp->ver = ver;
 599			lp->hs_state = LDC_HS_GOTVERS;
 600		}
 601	}
 602	if (err)
 603		return ldc_abort(lp);
 604
 605	return 0;
 606}
 607
 608static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
 609{
 610	ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
 611	       vp->major, vp->minor);
 612
 613	if (lp->hs_state == LDC_HS_GOTVERS) {
 614		if (lp->ver.major != vp->major ||
 615		    lp->ver.minor != vp->minor)
 616			return ldc_abort(lp);
 617	} else {
 618		lp->ver = *vp;
 619		lp->hs_state = LDC_HS_GOTVERS;
 620	}
 621	if (send_rts(lp))
 622		return ldc_abort(lp);
 623	return 0;
 624}
 625
 626static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
 627{
 628	struct ldc_version *vap;
 629	struct ldc_packet *p;
 630	unsigned long new_tail;
 631
 632	if (vp->major == 0 && vp->minor == 0)
 633		return ldc_abort(lp);
 634
 635	vap = find_by_major(vp->major);
 636	if (!vap)
 637		return ldc_abort(lp);
 638
 639	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 640					   vap, sizeof(*vap),
 641					   &new_tail);
 642	if (!p)
 643		return ldc_abort(lp);
 644
 645	return send_tx_packet(lp, p, new_tail);
 646}
 647
 648static int process_version(struct ldc_channel *lp,
 649			   struct ldc_packet *p)
 650{
 651	struct ldc_version *vp;
 652
 653	vp = (struct ldc_version *) p->u.u_data;
 654
 655	switch (p->stype) {
 656	case LDC_INFO:
 657		return process_ver_info(lp, vp);
 658
 659	case LDC_ACK:
 660		return process_ver_ack(lp, vp);
 661
 662	case LDC_NACK:
 663		return process_ver_nack(lp, vp);
 664
 665	default:
 666		return ldc_abort(lp);
 667	}
 668}
 669
 670static int process_rts(struct ldc_channel *lp,
 671		       struct ldc_packet *p)
 672{
 673	ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
 674	       p->stype, p->seqid, p->env);
 675
 676	if (p->stype     != LDC_INFO	   ||
 677	    lp->hs_state != LDC_HS_GOTVERS ||
 678	    p->env       != lp->cfg.mode)
 679		return ldc_abort(lp);
 680
 681	lp->snd_nxt = p->seqid;
 682	lp->rcv_nxt = p->seqid;
 683	lp->hs_state = LDC_HS_SENTRTR;
 684	if (send_rtr(lp))
 685		return ldc_abort(lp);
 686
 687	return 0;
 688}
 689
 690static int process_rtr(struct ldc_channel *lp,
 691		       struct ldc_packet *p)
 692{
 693	ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
 694	       p->stype, p->seqid, p->env);
 695
 696	if (p->stype     != LDC_INFO ||
 697	    p->env       != lp->cfg.mode)
 698		return ldc_abort(lp);
 699
 700	lp->snd_nxt = p->seqid;
 701	lp->hs_state = LDC_HS_COMPLETE;
 702	ldc_set_state(lp, LDC_STATE_CONNECTED);
 703	send_rdx(lp);
 704
 705	return LDC_EVENT_UP;
 706}
 707
 708static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
 709{
 710	return lp->rcv_nxt + 1 == seqid;
 711}
 712
 713static int process_rdx(struct ldc_channel *lp,
 714		       struct ldc_packet *p)
 715{
 716	ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
 717	       p->stype, p->seqid, p->env, p->u.r.ackid);
 718
 719	if (p->stype != LDC_INFO ||
 720	    !(rx_seq_ok(lp, p->seqid)))
 721		return ldc_abort(lp);
 722
 723	lp->rcv_nxt = p->seqid;
 724
 725	lp->hs_state = LDC_HS_COMPLETE;
 726	ldc_set_state(lp, LDC_STATE_CONNECTED);
 727
 728	return LDC_EVENT_UP;
 729}
 730
 731static int process_control_frame(struct ldc_channel *lp,
 732				 struct ldc_packet *p)
 733{
 734	switch (p->ctrl) {
 735	case LDC_VERS:
 736		return process_version(lp, p);
 737
 738	case LDC_RTS:
 739		return process_rts(lp, p);
 740
 741	case LDC_RTR:
 742		return process_rtr(lp, p);
 743
 744	case LDC_RDX:
 745		return process_rdx(lp, p);
 746
 747	default:
 748		return ldc_abort(lp);
 749	}
 750}
 751
 752static int process_error_frame(struct ldc_channel *lp,
 753			       struct ldc_packet *p)
 754{
 755	return ldc_abort(lp);
 756}
 757
 758static int process_data_ack(struct ldc_channel *lp,
 759			    struct ldc_packet *ack)
 760{
 761	unsigned long head = lp->tx_acked;
 762	u32 ackid = ack->u.r.ackid;
 763
 764	while (1) {
 765		struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
 766
 767		head = tx_advance(lp, head);
 768
 769		if (p->seqid == ackid) {
 770			lp->tx_acked = head;
 771			return 0;
 772		}
 773		if (head == lp->tx_tail)
 774			return ldc_abort(lp);
 775	}
 776
 777	return 0;
 778}
 779
 780static void send_events(struct ldc_channel *lp, unsigned int event_mask)
 781{
 782	if (event_mask & LDC_EVENT_RESET)
 783		lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
 784	if (event_mask & LDC_EVENT_UP)
 785		lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
 786	if (event_mask & LDC_EVENT_DATA_READY)
 787		lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
 788}
 789
 790static irqreturn_t ldc_rx(int irq, void *dev_id)
 791{
 792	struct ldc_channel *lp = dev_id;
 793	unsigned long orig_state, flags;
 794	unsigned int event_mask;
 795
 796	spin_lock_irqsave(&lp->lock, flags);
 797
 798	orig_state = lp->chan_state;
 799
 800	/* We should probably check for hypervisor errors here and
 801	 * reset the LDC channel if we get one.
 802	 */
 803	sun4v_ldc_rx_get_state(lp->id,
 804			       &lp->rx_head,
 805			       &lp->rx_tail,
 806			       &lp->chan_state);
 807
 808	ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 809	       orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
 810
 811	event_mask = 0;
 812
 813	if (lp->cfg.mode == LDC_MODE_RAW &&
 814	    lp->chan_state == LDC_CHANNEL_UP) {
 815		lp->hs_state = LDC_HS_COMPLETE;
 816		ldc_set_state(lp, LDC_STATE_CONNECTED);
 817
 818		event_mask |= LDC_EVENT_UP;
 819
 820		orig_state = lp->chan_state;
 821	}
 822
 823	/* If we are in reset state, flush the RX queue and ignore
 824	 * everything.
 825	 */
 826	if (lp->flags & LDC_FLAG_RESET) {
 827		(void) __set_rx_head(lp, lp->rx_tail);
 828		goto out;
 829	}
 830
 831	/* Once we finish the handshake, we let the ldc_read()
 832	 * paths do all of the control frame and state management.
 833	 * Just trigger the callback.
 834	 */
 835	if (lp->hs_state == LDC_HS_COMPLETE) {
 836handshake_complete:
 837		if (lp->chan_state != orig_state) {
 838			unsigned int event = LDC_EVENT_RESET;
 839
 840			if (lp->chan_state == LDC_CHANNEL_UP)
 841				event = LDC_EVENT_UP;
 842
 843			event_mask |= event;
 844		}
 845		if (lp->rx_head != lp->rx_tail)
 846			event_mask |= LDC_EVENT_DATA_READY;
 847
 848		goto out;
 849	}
 850
 851	if (lp->chan_state != orig_state)
 852		goto out;
 853
 854	while (lp->rx_head != lp->rx_tail) {
 855		struct ldc_packet *p;
 856		unsigned long new;
 857		int err;
 858
 859		p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
 860
 861		switch (p->type) {
 862		case LDC_CTRL:
 863			err = process_control_frame(lp, p);
 864			if (err > 0)
 865				event_mask |= err;
 866			break;
 867
 868		case LDC_DATA:
 869			event_mask |= LDC_EVENT_DATA_READY;
 870			err = 0;
 871			break;
 872
 873		case LDC_ERR:
 874			err = process_error_frame(lp, p);
 875			break;
 876
 877		default:
 878			err = ldc_abort(lp);
 879			break;
 880		}
 881
 882		if (err < 0)
 883			break;
 884
 885		new = lp->rx_head;
 886		new += LDC_PACKET_SIZE;
 887		if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
 888			new = 0;
 889		lp->rx_head = new;
 890
 891		err = __set_rx_head(lp, new);
 892		if (err < 0) {
 893			(void) ldc_abort(lp);
 894			break;
 895		}
 896		if (lp->hs_state == LDC_HS_COMPLETE)
 897			goto handshake_complete;
 898	}
 899
 900out:
 901	spin_unlock_irqrestore(&lp->lock, flags);
 902
 903	send_events(lp, event_mask);
 904
 905	return IRQ_HANDLED;
 906}
 907
 908static irqreturn_t ldc_tx(int irq, void *dev_id)
 909{
 910	struct ldc_channel *lp = dev_id;
 911	unsigned long flags, orig_state;
 912	unsigned int event_mask = 0;
 913
 914	spin_lock_irqsave(&lp->lock, flags);
 915
 916	orig_state = lp->chan_state;
 917
 918	/* We should probably check for hypervisor errors here and
 919	 * reset the LDC channel if we get one.
 920	 */
 921	sun4v_ldc_tx_get_state(lp->id,
 922			       &lp->tx_head,
 923			       &lp->tx_tail,
 924			       &lp->chan_state);
 925
 926	ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 927	       orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
 928
 929	if (lp->cfg.mode == LDC_MODE_RAW &&
 930	    lp->chan_state == LDC_CHANNEL_UP) {
 931		lp->hs_state = LDC_HS_COMPLETE;
 932		ldc_set_state(lp, LDC_STATE_CONNECTED);
 933
 934		event_mask |= LDC_EVENT_UP;
 935	}
 936
 937	spin_unlock_irqrestore(&lp->lock, flags);
 938
 939	send_events(lp, event_mask);
 940
 941	return IRQ_HANDLED;
 942}
 943
 944/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
 945 * XXX that addition and removal from the ldc_channel_list has
 946 * XXX atomicity, otherwise the __ldc_channel_exists() check is
 947 * XXX totally pointless as another thread can slip into ldc_alloc()
 948 * XXX and add a channel with the same ID.  There also needs to be
 949 * XXX a spinlock for ldc_channel_list.
 950 */
 951static HLIST_HEAD(ldc_channel_list);
 952
 953static int __ldc_channel_exists(unsigned long id)
 954{
 955	struct ldc_channel *lp;
 956	struct hlist_node *n;
 957
 958	hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
 959		if (lp->id == id)
 960			return 1;
 961	}
 962	return 0;
 963}
 964
 965static int alloc_queue(const char *name, unsigned long num_entries,
 966		       struct ldc_packet **base, unsigned long *ra)
 967{
 968	unsigned long size, order;
 969	void *q;
 970
 971	size = num_entries * LDC_PACKET_SIZE;
 972	order = get_order(size);
 973
 974	q = (void *) __get_free_pages(GFP_KERNEL, order);
 975	if (!q) {
 976		printk(KERN_ERR PFX "Alloc of %s queue failed with "
 977		       "size=%lu order=%lu\n", name, size, order);
 978		return -ENOMEM;
 979	}
 980
 981	memset(q, 0, PAGE_SIZE << order);
 982
 983	*base = q;
 984	*ra = __pa(q);
 985
 986	return 0;
 987}
 988
 989static void free_queue(unsigned long num_entries, struct ldc_packet *q)
 990{
 991	unsigned long size, order;
 992
 993	if (!q)
 994		return;
 995
 996	size = num_entries * LDC_PACKET_SIZE;
 997	order = get_order(size);
 998
 999	free_pages((unsigned long)q, order);
1000}
1001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1002/* XXX Make this configurable... XXX */
1003#define LDC_IOTABLE_SIZE	(8 * 1024)
1004
1005static int ldc_iommu_init(struct ldc_channel *lp)
1006{
1007	unsigned long sz, num_tsb_entries, tsbsize, order;
1008	struct ldc_iommu *iommu = &lp->iommu;
 
1009	struct ldc_mtable_entry *table;
1010	unsigned long hv_err;
1011	int err;
1012
1013	num_tsb_entries = LDC_IOTABLE_SIZE;
1014	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1015
1016	spin_lock_init(&iommu->lock);
1017
1018	sz = num_tsb_entries / 8;
1019	sz = (sz + 7UL) & ~7UL;
1020	iommu->arena.map = kzalloc(sz, GFP_KERNEL);
1021	if (!iommu->arena.map) {
1022		printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1023		return -ENOMEM;
1024	}
1025
1026	iommu->arena.limit = num_tsb_entries;
 
 
1027
1028	order = get_order(tsbsize);
1029
1030	table = (struct ldc_mtable_entry *)
1031		__get_free_pages(GFP_KERNEL, order);
1032	err = -ENOMEM;
1033	if (!table) {
1034		printk(KERN_ERR PFX "Alloc of MTE table failed, "
1035		       "size=%lu order=%lu\n", tsbsize, order);
1036		goto out_free_map;
1037	}
1038
1039	memset(table, 0, PAGE_SIZE << order);
1040
1041	iommu->page_table = table;
1042
1043	hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1044					 num_tsb_entries);
1045	err = -EINVAL;
1046	if (hv_err)
1047		goto out_free_table;
1048
1049	return 0;
1050
1051out_free_table:
1052	free_pages((unsigned long) table, order);
1053	iommu->page_table = NULL;
1054
1055out_free_map:
1056	kfree(iommu->arena.map);
1057	iommu->arena.map = NULL;
1058
1059	return err;
1060}
1061
1062static void ldc_iommu_release(struct ldc_channel *lp)
1063{
1064	struct ldc_iommu *iommu = &lp->iommu;
 
1065	unsigned long num_tsb_entries, tsbsize, order;
1066
1067	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1068
1069	num_tsb_entries = iommu->arena.limit;
1070	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1071	order = get_order(tsbsize);
1072
1073	free_pages((unsigned long) iommu->page_table, order);
1074	iommu->page_table = NULL;
1075
1076	kfree(iommu->arena.map);
1077	iommu->arena.map = NULL;
1078}
1079
1080struct ldc_channel *ldc_alloc(unsigned long id,
1081			      const struct ldc_channel_config *cfgp,
1082			      void *event_arg)
 
1083{
1084	struct ldc_channel *lp;
1085	const struct ldc_mode_ops *mops;
1086	unsigned long dummy1, dummy2, hv_err;
1087	u8 mss, *mssbuf;
1088	int err;
1089
1090	err = -ENODEV;
1091	if (!ldom_domaining_enabled)
1092		goto out_err;
1093
1094	err = -EINVAL;
1095	if (!cfgp)
1096		goto out_err;
 
 
1097
1098	switch (cfgp->mode) {
1099	case LDC_MODE_RAW:
1100		mops = &raw_ops;
1101		mss = LDC_PACKET_SIZE;
1102		break;
1103
1104	case LDC_MODE_UNRELIABLE:
1105		mops = &nonraw_ops;
1106		mss = LDC_PACKET_SIZE - 8;
1107		break;
1108
1109	case LDC_MODE_STREAM:
1110		mops = &stream_ops;
1111		mss = LDC_PACKET_SIZE - 8 - 8;
1112		break;
1113
1114	default:
1115		goto out_err;
1116	}
1117
1118	if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1119		goto out_err;
1120
1121	hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1122	err = -ENODEV;
1123	if (hv_err == HV_ECHANNEL)
1124		goto out_err;
1125
1126	err = -EEXIST;
1127	if (__ldc_channel_exists(id))
1128		goto out_err;
1129
1130	mssbuf = NULL;
1131
1132	lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1133	err = -ENOMEM;
1134	if (!lp)
1135		goto out_err;
1136
1137	spin_lock_init(&lp->lock);
1138
1139	lp->id = id;
1140
1141	err = ldc_iommu_init(lp);
1142	if (err)
1143		goto out_free_ldc;
1144
1145	lp->mops = mops;
1146	lp->mss = mss;
1147
1148	lp->cfg = *cfgp;
1149	if (!lp->cfg.mtu)
1150		lp->cfg.mtu = LDC_DEFAULT_MTU;
1151
1152	if (lp->cfg.mode == LDC_MODE_STREAM) {
1153		mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1154		if (!mssbuf) {
1155			err = -ENOMEM;
1156			goto out_free_iommu;
1157		}
1158		lp->mssbuf = mssbuf;
1159	}
1160
1161	lp->event_arg = event_arg;
1162
1163	/* XXX allow setting via ldc_channel_config to override defaults
1164	 * XXX or use some formula based upon mtu
1165	 */
1166	lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1167	lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1168
1169	err = alloc_queue("TX", lp->tx_num_entries,
1170			  &lp->tx_base, &lp->tx_ra);
1171	if (err)
1172		goto out_free_mssbuf;
1173
1174	err = alloc_queue("RX", lp->rx_num_entries,
1175			  &lp->rx_base, &lp->rx_ra);
1176	if (err)
1177		goto out_free_txq;
1178
1179	lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1180
1181	lp->hs_state = LDC_HS_CLOSED;
1182	ldc_set_state(lp, LDC_STATE_INIT);
1183
1184	INIT_HLIST_NODE(&lp->list);
1185	hlist_add_head(&lp->list, &ldc_channel_list);
1186
1187	INIT_HLIST_HEAD(&lp->mh_list);
1188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1189	return lp;
1190
1191out_free_txq:
1192	free_queue(lp->tx_num_entries, lp->tx_base);
1193
1194out_free_mssbuf:
1195	kfree(mssbuf);
1196
1197out_free_iommu:
1198	ldc_iommu_release(lp);
1199
1200out_free_ldc:
1201	kfree(lp);
1202
1203out_err:
1204	return ERR_PTR(err);
1205}
1206EXPORT_SYMBOL(ldc_alloc);
1207
1208void ldc_free(struct ldc_channel *lp)
1209{
1210	if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1211		free_irq(lp->cfg.rx_irq, lp);
1212		free_irq(lp->cfg.tx_irq, lp);
 
1213	}
1214
1215	if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1216		sun4v_ldc_tx_qconf(lp->id, 0, 0);
1217		sun4v_ldc_rx_qconf(lp->id, 0, 0);
1218		lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1219	}
1220	if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1221		free_queue(lp->tx_num_entries, lp->tx_base);
1222		free_queue(lp->rx_num_entries, lp->rx_base);
1223		lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1224	}
1225
1226	hlist_del(&lp->list);
 
 
1227
 
 
 
 
1228	kfree(lp->mssbuf);
1229
1230	ldc_iommu_release(lp);
1231
1232	kfree(lp);
1233}
1234EXPORT_SYMBOL(ldc_free);
1235
1236/* Bind the channel.  This registers the LDC queues with
1237 * the hypervisor and puts the channel into a pseudo-listening
1238 * state.  This does not initiate a handshake, ldc_connect() does
1239 * that.
1240 */
1241int ldc_bind(struct ldc_channel *lp, const char *name)
1242{
1243	unsigned long hv_err, flags;
1244	int err = -EINVAL;
1245
1246	if (!name ||
1247	    (lp->state != LDC_STATE_INIT))
1248		return -EINVAL;
1249
1250	snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1251	snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1252
1253	err = request_irq(lp->cfg.rx_irq, ldc_rx,
1254			  IRQF_SAMPLE_RANDOM | IRQF_DISABLED,
1255			  lp->rx_irq_name, lp);
1256	if (err)
1257		return err;
1258
1259	err = request_irq(lp->cfg.tx_irq, ldc_tx,
1260			  IRQF_SAMPLE_RANDOM | IRQF_DISABLED,
1261			  lp->tx_irq_name, lp);
1262	if (err) {
1263		free_irq(lp->cfg.rx_irq, lp);
1264		return err;
1265	}
1266
1267
1268	spin_lock_irqsave(&lp->lock, flags);
1269
1270	enable_irq(lp->cfg.rx_irq);
1271	enable_irq(lp->cfg.tx_irq);
1272
1273	lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1274
1275	err = -ENODEV;
1276	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1277	if (hv_err)
1278		goto out_free_irqs;
1279
1280	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1281	if (hv_err)
1282		goto out_free_irqs;
1283
1284	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1285	if (hv_err)
1286		goto out_unmap_tx;
1287
1288	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1289	if (hv_err)
1290		goto out_unmap_tx;
1291
1292	lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1293
1294	hv_err = sun4v_ldc_tx_get_state(lp->id,
1295					&lp->tx_head,
1296					&lp->tx_tail,
1297					&lp->chan_state);
1298	err = -EBUSY;
1299	if (hv_err)
1300		goto out_unmap_rx;
1301
1302	lp->tx_acked = lp->tx_head;
1303
1304	lp->hs_state = LDC_HS_OPEN;
1305	ldc_set_state(lp, LDC_STATE_BOUND);
1306
1307	spin_unlock_irqrestore(&lp->lock, flags);
1308
1309	return 0;
1310
1311out_unmap_rx:
1312	lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1313	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1314
1315out_unmap_tx:
1316	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1317
1318out_free_irqs:
1319	lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1320	free_irq(lp->cfg.tx_irq, lp);
1321	free_irq(lp->cfg.rx_irq, lp);
1322
1323	spin_unlock_irqrestore(&lp->lock, flags);
1324
1325	return err;
1326}
1327EXPORT_SYMBOL(ldc_bind);
1328
1329int ldc_connect(struct ldc_channel *lp)
1330{
1331	unsigned long flags;
1332	int err;
1333
1334	if (lp->cfg.mode == LDC_MODE_RAW)
1335		return -EINVAL;
1336
1337	spin_lock_irqsave(&lp->lock, flags);
1338
1339	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1340	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1341	    lp->hs_state != LDC_HS_OPEN)
1342		err = -EINVAL;
1343	else
1344		err = start_handshake(lp);
1345
1346	spin_unlock_irqrestore(&lp->lock, flags);
1347
1348	return err;
1349}
1350EXPORT_SYMBOL(ldc_connect);
1351
1352int ldc_disconnect(struct ldc_channel *lp)
1353{
1354	unsigned long hv_err, flags;
1355	int err;
1356
1357	if (lp->cfg.mode == LDC_MODE_RAW)
1358		return -EINVAL;
1359
1360	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1361	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1362		return -EINVAL;
1363
1364	spin_lock_irqsave(&lp->lock, flags);
1365
1366	err = -ENODEV;
1367	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1368	if (hv_err)
1369		goto out_err;
1370
1371	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1372	if (hv_err)
1373		goto out_err;
1374
1375	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1376	if (hv_err)
1377		goto out_err;
1378
1379	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1380	if (hv_err)
1381		goto out_err;
1382
1383	ldc_set_state(lp, LDC_STATE_BOUND);
1384	lp->hs_state = LDC_HS_OPEN;
1385	lp->flags |= LDC_FLAG_RESET;
1386
1387	spin_unlock_irqrestore(&lp->lock, flags);
1388
1389	return 0;
1390
1391out_err:
1392	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1393	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1394	free_irq(lp->cfg.tx_irq, lp);
1395	free_irq(lp->cfg.rx_irq, lp);
1396	lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1397		       LDC_FLAG_REGISTERED_QUEUES);
1398	ldc_set_state(lp, LDC_STATE_INIT);
1399
1400	spin_unlock_irqrestore(&lp->lock, flags);
1401
1402	return err;
1403}
1404EXPORT_SYMBOL(ldc_disconnect);
1405
1406int ldc_state(struct ldc_channel *lp)
1407{
1408	return lp->state;
1409}
1410EXPORT_SYMBOL(ldc_state);
1411
1412static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1413{
1414	struct ldc_packet *p;
1415	unsigned long new_tail;
1416	int err;
1417
1418	if (size > LDC_PACKET_SIZE)
1419		return -EMSGSIZE;
1420
1421	p = data_get_tx_packet(lp, &new_tail);
1422	if (!p)
1423		return -EAGAIN;
1424
1425	memcpy(p, buf, size);
1426
1427	err = send_tx_packet(lp, p, new_tail);
1428	if (!err)
1429		err = size;
1430
1431	return err;
1432}
1433
1434static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1435{
1436	struct ldc_packet *p;
1437	unsigned long hv_err, new;
1438	int err;
1439
1440	if (size < LDC_PACKET_SIZE)
1441		return -EINVAL;
1442
1443	hv_err = sun4v_ldc_rx_get_state(lp->id,
1444					&lp->rx_head,
1445					&lp->rx_tail,
1446					&lp->chan_state);
1447	if (hv_err)
1448		return ldc_abort(lp);
1449
1450	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1451	    lp->chan_state == LDC_CHANNEL_RESETTING)
1452		return -ECONNRESET;
1453
1454	if (lp->rx_head == lp->rx_tail)
1455		return 0;
1456
1457	p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1458	memcpy(buf, p, LDC_PACKET_SIZE);
1459
1460	new = rx_advance(lp, lp->rx_head);
1461	lp->rx_head = new;
1462
1463	err = __set_rx_head(lp, new);
1464	if (err < 0)
1465		err = -ECONNRESET;
1466	else
1467		err = LDC_PACKET_SIZE;
1468
1469	return err;
1470}
1471
1472static const struct ldc_mode_ops raw_ops = {
1473	.write		=	write_raw,
1474	.read		=	read_raw,
1475};
1476
1477static int write_nonraw(struct ldc_channel *lp, const void *buf,
1478			unsigned int size)
1479{
1480	unsigned long hv_err, tail;
1481	unsigned int copied;
1482	u32 seq;
1483	int err;
1484
1485	hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1486					&lp->chan_state);
1487	if (unlikely(hv_err))
1488		return -EBUSY;
1489
1490	if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1491		return ldc_abort(lp);
1492
1493	if (!tx_has_space_for(lp, size))
1494		return -EAGAIN;
1495
1496	seq = lp->snd_nxt;
1497	copied = 0;
1498	tail = lp->tx_tail;
1499	while (copied < size) {
1500		struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1501		u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1502			    p->u.u_data :
1503			    p->u.r.r_data);
1504		int data_len;
1505
1506		p->type = LDC_DATA;
1507		p->stype = LDC_INFO;
1508		p->ctrl = 0;
1509
1510		data_len = size - copied;
1511		if (data_len > lp->mss)
1512			data_len = lp->mss;
1513
1514		BUG_ON(data_len > LDC_LEN);
1515
1516		p->env = (data_len |
1517			  (copied == 0 ? LDC_START : 0) |
1518			  (data_len == size - copied ? LDC_STOP : 0));
1519
1520		p->seqid = ++seq;
1521
1522		ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1523		       p->type,
1524		       p->stype,
1525		       p->ctrl,
1526		       p->env,
1527		       p->seqid);
1528
1529		memcpy(data, buf, data_len);
1530		buf += data_len;
1531		copied += data_len;
1532
1533		tail = tx_advance(lp, tail);
1534	}
1535
1536	err = set_tx_tail(lp, tail);
1537	if (!err) {
1538		lp->snd_nxt = seq;
1539		err = size;
1540	}
1541
1542	return err;
1543}
1544
1545static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1546		      struct ldc_packet *first_frag)
1547{
1548	int err;
1549
1550	if (first_frag)
1551		lp->rcv_nxt = first_frag->seqid - 1;
1552
1553	err = send_data_nack(lp, p);
1554	if (err)
1555		return err;
1556
1557	err = __set_rx_head(lp, lp->rx_tail);
1558	if (err < 0)
1559		return ldc_abort(lp);
1560
1561	return 0;
1562}
1563
1564static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1565{
1566	if (p->stype & LDC_ACK) {
1567		int err = process_data_ack(lp, p);
1568		if (err)
1569			return err;
1570	}
1571	if (p->stype & LDC_NACK)
1572		return ldc_abort(lp);
1573
1574	return 0;
1575}
1576
1577static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1578{
1579	unsigned long dummy;
1580	int limit = 1000;
1581
1582	ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1583	       cur_head, lp->rx_head, lp->rx_tail);
1584	while (limit-- > 0) {
1585		unsigned long hv_err;
1586
1587		hv_err = sun4v_ldc_rx_get_state(lp->id,
1588						&dummy,
1589						&lp->rx_tail,
1590						&lp->chan_state);
1591		if (hv_err)
1592			return ldc_abort(lp);
1593
1594		if (lp->chan_state == LDC_CHANNEL_DOWN ||
1595		    lp->chan_state == LDC_CHANNEL_RESETTING)
1596			return -ECONNRESET;
1597
1598		if (cur_head != lp->rx_tail) {
1599			ldcdbg(DATA, "DATA WAIT DONE "
1600			       "head[%lx] tail[%lx] chan_state[%lx]\n",
1601			       dummy, lp->rx_tail, lp->chan_state);
1602			return 0;
1603		}
1604
1605		udelay(1);
1606	}
1607	return -EAGAIN;
1608}
1609
1610static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1611{
1612	int err = __set_rx_head(lp, head);
1613
1614	if (err < 0)
1615		return ldc_abort(lp);
1616
1617	lp->rx_head = head;
1618	return 0;
1619}
1620
1621static void send_data_ack(struct ldc_channel *lp)
1622{
1623	unsigned long new_tail;
1624	struct ldc_packet *p;
1625
1626	p = data_get_tx_packet(lp, &new_tail);
1627	if (likely(p)) {
1628		int err;
1629
1630		memset(p, 0, sizeof(*p));
1631		p->type = LDC_DATA;
1632		p->stype = LDC_ACK;
1633		p->ctrl = 0;
1634		p->seqid = lp->snd_nxt + 1;
1635		p->u.r.ackid = lp->rcv_nxt;
1636
1637		err = send_tx_packet(lp, p, new_tail);
1638		if (!err)
1639			lp->snd_nxt++;
1640	}
1641}
1642
1643static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1644{
1645	struct ldc_packet *first_frag;
1646	unsigned long hv_err, new;
1647	int err, copied;
1648
1649	hv_err = sun4v_ldc_rx_get_state(lp->id,
1650					&lp->rx_head,
1651					&lp->rx_tail,
1652					&lp->chan_state);
1653	if (hv_err)
1654		return ldc_abort(lp);
1655
1656	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1657	    lp->chan_state == LDC_CHANNEL_RESETTING)
1658		return -ECONNRESET;
1659
1660	if (lp->rx_head == lp->rx_tail)
1661		return 0;
1662
1663	first_frag = NULL;
1664	copied = err = 0;
1665	new = lp->rx_head;
1666	while (1) {
1667		struct ldc_packet *p;
1668		int pkt_len;
1669
1670		BUG_ON(new == lp->rx_tail);
1671		p = lp->rx_base + (new / LDC_PACKET_SIZE);
1672
1673		ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1674		       "rcv_nxt[%08x]\n",
1675		       p->type,
1676		       p->stype,
1677		       p->ctrl,
1678		       p->env,
1679		       p->seqid,
1680		       p->u.r.ackid,
1681		       lp->rcv_nxt);
1682
1683		if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1684			err = rx_bad_seq(lp, p, first_frag);
1685			copied = 0;
1686			break;
1687		}
1688
1689		if (p->type & LDC_CTRL) {
1690			err = process_control_frame(lp, p);
1691			if (err < 0)
1692				break;
1693			err = 0;
1694		}
1695
1696		lp->rcv_nxt = p->seqid;
1697
1698		if (!(p->type & LDC_DATA)) {
1699			new = rx_advance(lp, new);
1700			goto no_data;
1701		}
1702		if (p->stype & (LDC_ACK | LDC_NACK)) {
1703			err = data_ack_nack(lp, p);
1704			if (err)
1705				break;
1706		}
1707		if (!(p->stype & LDC_INFO)) {
1708			new = rx_advance(lp, new);
1709			err = rx_set_head(lp, new);
1710			if (err)
1711				break;
1712			goto no_data;
1713		}
1714
1715		pkt_len = p->env & LDC_LEN;
1716
1717		/* Every initial packet starts with the START bit set.
1718		 *
1719		 * Singleton packets will have both START+STOP set.
1720		 *
1721		 * Fragments will have START set in the first frame, STOP
1722		 * set in the last frame, and neither bit set in middle
1723		 * frames of the packet.
1724		 *
1725		 * Therefore if we are at the beginning of a packet and
1726		 * we don't see START, or we are in the middle of a fragmented
1727		 * packet and do see START, we are unsynchronized and should
1728		 * flush the RX queue.
1729		 */
1730		if ((first_frag == NULL && !(p->env & LDC_START)) ||
1731		    (first_frag != NULL &&  (p->env & LDC_START))) {
1732			if (!first_frag)
1733				new = rx_advance(lp, new);
1734
1735			err = rx_set_head(lp, new);
1736			if (err)
1737				break;
1738
1739			if (!first_frag)
1740				goto no_data;
1741		}
1742		if (!first_frag)
1743			first_frag = p;
1744
1745		if (pkt_len > size - copied) {
1746			/* User didn't give us a big enough buffer,
1747			 * what to do?  This is a pretty serious error.
1748			 *
1749			 * Since we haven't updated the RX ring head to
1750			 * consume any of the packets, signal the error
1751			 * to the user and just leave the RX ring alone.
1752			 *
1753			 * This seems the best behavior because this allows
1754			 * a user of the LDC layer to start with a small
1755			 * RX buffer for ldc_read() calls and use -EMSGSIZE
1756			 * as a cue to enlarge it's read buffer.
1757			 */
1758			err = -EMSGSIZE;
1759			break;
1760		}
1761
1762		/* Ok, we are gonna eat this one.  */
1763		new = rx_advance(lp, new);
1764
1765		memcpy(buf,
1766		       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1767			p->u.u_data : p->u.r.r_data), pkt_len);
1768		buf += pkt_len;
1769		copied += pkt_len;
1770
1771		if (p->env & LDC_STOP)
1772			break;
1773
1774no_data:
1775		if (new == lp->rx_tail) {
1776			err = rx_data_wait(lp, new);
1777			if (err)
1778				break;
1779		}
1780	}
1781
1782	if (!err)
1783		err = rx_set_head(lp, new);
1784
1785	if (err && first_frag)
1786		lp->rcv_nxt = first_frag->seqid - 1;
1787
1788	if (!err) {
1789		err = copied;
1790		if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1791			send_data_ack(lp);
1792	}
1793
1794	return err;
1795}
1796
1797static const struct ldc_mode_ops nonraw_ops = {
1798	.write		=	write_nonraw,
1799	.read		=	read_nonraw,
1800};
1801
1802static int write_stream(struct ldc_channel *lp, const void *buf,
1803			unsigned int size)
1804{
1805	if (size > lp->cfg.mtu)
1806		size = lp->cfg.mtu;
1807	return write_nonraw(lp, buf, size);
1808}
1809
1810static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1811{
1812	if (!lp->mssbuf_len) {
1813		int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1814		if (err < 0)
1815			return err;
1816
1817		lp->mssbuf_len = err;
1818		lp->mssbuf_off = 0;
1819	}
1820
1821	if (size > lp->mssbuf_len)
1822		size = lp->mssbuf_len;
1823	memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1824
1825	lp->mssbuf_off += size;
1826	lp->mssbuf_len -= size;
1827
1828	return size;
1829}
1830
1831static const struct ldc_mode_ops stream_ops = {
1832	.write		=	write_stream,
1833	.read		=	read_stream,
1834};
1835
1836int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1837{
1838	unsigned long flags;
1839	int err;
1840
1841	if (!buf)
1842		return -EINVAL;
1843
1844	if (!size)
1845		return 0;
1846
1847	spin_lock_irqsave(&lp->lock, flags);
1848
1849	if (lp->hs_state != LDC_HS_COMPLETE)
1850		err = -ENOTCONN;
1851	else
1852		err = lp->mops->write(lp, buf, size);
1853
1854	spin_unlock_irqrestore(&lp->lock, flags);
1855
1856	return err;
1857}
1858EXPORT_SYMBOL(ldc_write);
1859
1860int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1861{
1862	unsigned long flags;
1863	int err;
1864
1865	if (!buf)
1866		return -EINVAL;
1867
1868	if (!size)
1869		return 0;
1870
1871	spin_lock_irqsave(&lp->lock, flags);
1872
1873	if (lp->hs_state != LDC_HS_COMPLETE)
1874		err = -ENOTCONN;
1875	else
1876		err = lp->mops->read(lp, buf, size);
1877
1878	spin_unlock_irqrestore(&lp->lock, flags);
1879
1880	return err;
1881}
1882EXPORT_SYMBOL(ldc_read);
1883
1884static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
1885{
1886	struct iommu_arena *arena = &iommu->arena;
1887	unsigned long n, start, end, limit;
1888	int pass;
1889
1890	limit = arena->limit;
1891	start = arena->hint;
1892	pass = 0;
1893
1894again:
1895	n = bitmap_find_next_zero_area(arena->map, limit, start, npages, 0);
1896	end = n + npages;
1897	if (unlikely(end >= limit)) {
1898		if (likely(pass < 1)) {
1899			limit = start;
1900			start = 0;
1901			pass++;
1902			goto again;
1903		} else {
1904			/* Scanned the whole thing, give up. */
1905			return -1;
1906		}
1907	}
1908	bitmap_set(arena->map, n, npages);
1909
1910	arena->hint = end;
1911
1912	return n;
1913}
1914
1915#define COOKIE_PGSZ_CODE	0xf000000000000000ULL
1916#define COOKIE_PGSZ_CODE_SHIFT	60ULL
1917
1918static u64 pagesize_code(void)
1919{
1920	switch (PAGE_SIZE) {
1921	default:
1922	case (8ULL * 1024ULL):
1923		return 0;
1924	case (64ULL * 1024ULL):
1925		return 1;
1926	case (512ULL * 1024ULL):
1927		return 2;
1928	case (4ULL * 1024ULL * 1024ULL):
1929		return 3;
1930	case (32ULL * 1024ULL * 1024ULL):
1931		return 4;
1932	case (256ULL * 1024ULL * 1024ULL):
1933		return 5;
1934	}
1935}
1936
1937static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1938{
1939	return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1940		(index << PAGE_SHIFT) |
1941		page_offset);
1942}
1943
1944static u64 cookie_to_index(u64 cookie, unsigned long *shift)
1945{
1946	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1947
1948	cookie &= ~COOKIE_PGSZ_CODE;
1949
1950	*shift = szcode * 3;
1951
1952	return (cookie >> (13ULL + (szcode * 3ULL)));
1953}
1954
1955static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1956					     unsigned long npages)
1957{
1958	long entry;
1959
1960	entry = arena_alloc(iommu, npages);
1961	if (unlikely(entry < 0))
 
1962		return NULL;
1963
1964	return iommu->page_table + entry;
1965}
1966
1967static u64 perm_to_mte(unsigned int map_perm)
1968{
1969	u64 mte_base;
1970
1971	mte_base = pagesize_code();
1972
1973	if (map_perm & LDC_MAP_SHADOW) {
1974		if (map_perm & LDC_MAP_R)
1975			mte_base |= LDC_MTE_COPY_R;
1976		if (map_perm & LDC_MAP_W)
1977			mte_base |= LDC_MTE_COPY_W;
1978	}
1979	if (map_perm & LDC_MAP_DIRECT) {
1980		if (map_perm & LDC_MAP_R)
1981			mte_base |= LDC_MTE_READ;
1982		if (map_perm & LDC_MAP_W)
1983			mte_base |= LDC_MTE_WRITE;
1984		if (map_perm & LDC_MAP_X)
1985			mte_base |= LDC_MTE_EXEC;
1986	}
1987	if (map_perm & LDC_MAP_IO) {
1988		if (map_perm & LDC_MAP_R)
1989			mte_base |= LDC_MTE_IOMMU_R;
1990		if (map_perm & LDC_MAP_W)
1991			mte_base |= LDC_MTE_IOMMU_W;
1992	}
1993
1994	return mte_base;
1995}
1996
1997static int pages_in_region(unsigned long base, long len)
1998{
1999	int count = 0;
2000
2001	do {
2002		unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2003
2004		len -= (new - base);
2005		base = new;
2006		count++;
2007	} while (len > 0);
2008
2009	return count;
2010}
2011
2012struct cookie_state {
2013	struct ldc_mtable_entry		*page_table;
2014	struct ldc_trans_cookie		*cookies;
2015	u64				mte_base;
2016	u64				prev_cookie;
2017	u32				pte_idx;
2018	u32				nc;
2019};
2020
2021static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2022			 unsigned long off, unsigned long len)
2023{
2024	do {
2025		unsigned long tlen, new = pa + PAGE_SIZE;
2026		u64 this_cookie;
2027
2028		sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2029
2030		tlen = PAGE_SIZE;
2031		if (off)
2032			tlen = PAGE_SIZE - off;
2033		if (tlen > len)
2034			tlen = len;
2035
2036		this_cookie = make_cookie(sp->pte_idx,
2037					  pagesize_code(), off);
2038
2039		off = 0;
2040
2041		if (this_cookie == sp->prev_cookie) {
2042			sp->cookies[sp->nc - 1].cookie_size += tlen;
2043		} else {
2044			sp->cookies[sp->nc].cookie_addr = this_cookie;
2045			sp->cookies[sp->nc].cookie_size = tlen;
2046			sp->nc++;
2047		}
2048		sp->prev_cookie = this_cookie + tlen;
2049
2050		sp->pte_idx++;
2051
2052		len -= tlen;
2053		pa = new;
2054	} while (len > 0);
2055}
2056
2057static int sg_count_one(struct scatterlist *sg)
2058{
2059	unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2060	long len = sg->length;
2061
2062	if ((sg->offset | len) & (8UL - 1))
2063		return -EFAULT;
2064
2065	return pages_in_region(base + sg->offset, len);
2066}
2067
2068static int sg_count_pages(struct scatterlist *sg, int num_sg)
2069{
2070	int count;
2071	int i;
2072
2073	count = 0;
2074	for (i = 0; i < num_sg; i++) {
2075		int err = sg_count_one(sg + i);
2076		if (err < 0)
2077			return err;
2078		count += err;
2079	}
2080
2081	return count;
2082}
2083
2084int ldc_map_sg(struct ldc_channel *lp,
2085	       struct scatterlist *sg, int num_sg,
2086	       struct ldc_trans_cookie *cookies, int ncookies,
2087	       unsigned int map_perm)
2088{
2089	unsigned long i, npages, flags;
2090	struct ldc_mtable_entry *base;
2091	struct cookie_state state;
2092	struct ldc_iommu *iommu;
2093	int err;
 
2094
2095	if (map_perm & ~LDC_MAP_ALL)
2096		return -EINVAL;
2097
2098	err = sg_count_pages(sg, num_sg);
2099	if (err < 0)
2100		return err;
2101
2102	npages = err;
2103	if (err > ncookies)
2104		return -EMSGSIZE;
2105
2106	iommu = &lp->iommu;
2107
2108	spin_lock_irqsave(&iommu->lock, flags);
2109	base = alloc_npages(iommu, npages);
2110	spin_unlock_irqrestore(&iommu->lock, flags);
2111
2112	if (!base)
2113		return -ENOMEM;
2114
2115	state.page_table = iommu->page_table;
2116	state.cookies = cookies;
2117	state.mte_base = perm_to_mte(map_perm);
2118	state.prev_cookie = ~(u64)0;
2119	state.pte_idx = (base - iommu->page_table);
2120	state.nc = 0;
2121
2122	for (i = 0; i < num_sg; i++)
2123		fill_cookies(&state, page_to_pfn(sg_page(&sg[i])) << PAGE_SHIFT,
2124			     sg[i].offset, sg[i].length);
 
2125
2126	return state.nc;
2127}
2128EXPORT_SYMBOL(ldc_map_sg);
2129
2130int ldc_map_single(struct ldc_channel *lp,
2131		   void *buf, unsigned int len,
2132		   struct ldc_trans_cookie *cookies, int ncookies,
2133		   unsigned int map_perm)
2134{
2135	unsigned long npages, pa, flags;
2136	struct ldc_mtable_entry *base;
2137	struct cookie_state state;
2138	struct ldc_iommu *iommu;
2139
2140	if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2141		return -EINVAL;
2142
2143	pa = __pa(buf);
2144	if ((pa | len) & (8UL - 1))
2145		return -EFAULT;
2146
2147	npages = pages_in_region(pa, len);
2148
2149	iommu = &lp->iommu;
2150
2151	spin_lock_irqsave(&iommu->lock, flags);
2152	base = alloc_npages(iommu, npages);
2153	spin_unlock_irqrestore(&iommu->lock, flags);
2154
2155	if (!base)
2156		return -ENOMEM;
2157
2158	state.page_table = iommu->page_table;
2159	state.cookies = cookies;
2160	state.mte_base = perm_to_mte(map_perm);
2161	state.prev_cookie = ~(u64)0;
2162	state.pte_idx = (base - iommu->page_table);
2163	state.nc = 0;
2164	fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2165	BUG_ON(state.nc != 1);
2166
2167	return state.nc;
2168}
2169EXPORT_SYMBOL(ldc_map_single);
2170
 
2171static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2172			u64 cookie, u64 size)
2173{
2174	struct iommu_arena *arena = &iommu->arena;
2175	unsigned long i, shift, index, npages;
2176	struct ldc_mtable_entry *base;
2177
2178	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2179	index = cookie_to_index(cookie, &shift);
2180	base = iommu->page_table + index;
2181
2182	BUG_ON(index > arena->limit ||
2183	       (index + npages) > arena->limit);
2184
2185	for (i = 0; i < npages; i++) {
2186		if (base->cookie)
2187			sun4v_ldc_revoke(id, cookie + (i << shift),
2188					 base->cookie);
2189		base->mte = 0;
2190		__clear_bit(index + i, arena->map);
2191	}
2192}
2193
2194void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2195	       int ncookies)
2196{
2197	struct ldc_iommu *iommu = &lp->iommu;
2198	unsigned long flags;
2199	int i;
 
2200
2201	spin_lock_irqsave(&iommu->lock, flags);
2202	for (i = 0; i < ncookies; i++) {
2203		u64 addr = cookies[i].cookie_addr;
2204		u64 size = cookies[i].cookie_size;
2205
2206		free_npages(lp->id, iommu, addr, size);
2207	}
2208	spin_unlock_irqrestore(&iommu->lock, flags);
2209}
2210EXPORT_SYMBOL(ldc_unmap);
2211
2212int ldc_copy(struct ldc_channel *lp, int copy_dir,
2213	     void *buf, unsigned int len, unsigned long offset,
2214	     struct ldc_trans_cookie *cookies, int ncookies)
2215{
2216	unsigned int orig_len;
2217	unsigned long ra;
2218	int i;
2219
2220	if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2221		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2222		       lp->id, copy_dir);
2223		return -EINVAL;
2224	}
2225
2226	ra = __pa(buf);
2227	if ((ra | len | offset) & (8UL - 1)) {
2228		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2229		       "ra[%lx] len[%x] offset[%lx]\n",
2230		       lp->id, ra, len, offset);
2231		return -EFAULT;
2232	}
2233
2234	if (lp->hs_state != LDC_HS_COMPLETE ||
2235	    (lp->flags & LDC_FLAG_RESET)) {
2236		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2237		       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2238		return -ECONNRESET;
2239	}
2240
2241	orig_len = len;
2242	for (i = 0; i < ncookies; i++) {
2243		unsigned long cookie_raddr = cookies[i].cookie_addr;
2244		unsigned long this_len = cookies[i].cookie_size;
2245		unsigned long actual_len;
2246
2247		if (unlikely(offset)) {
2248			unsigned long this_off = offset;
2249
2250			if (this_off > this_len)
2251				this_off = this_len;
2252
2253			offset -= this_off;
2254			this_len -= this_off;
2255			if (!this_len)
2256				continue;
2257			cookie_raddr += this_off;
2258		}
2259
2260		if (this_len > len)
2261			this_len = len;
2262
2263		while (1) {
2264			unsigned long hv_err;
2265
2266			hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2267						cookie_raddr, ra,
2268						this_len, &actual_len);
2269			if (unlikely(hv_err)) {
2270				printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2271				       "HV error %lu\n",
2272				       lp->id, hv_err);
2273				if (lp->hs_state != LDC_HS_COMPLETE ||
2274				    (lp->flags & LDC_FLAG_RESET))
2275					return -ECONNRESET;
2276				else
2277					return -EFAULT;
2278			}
2279
2280			cookie_raddr += actual_len;
2281			ra += actual_len;
2282			len -= actual_len;
2283			if (actual_len == this_len)
2284				break;
2285
2286			this_len -= actual_len;
2287		}
2288
2289		if (!len)
2290			break;
2291	}
2292
2293	/* It is caller policy what to do about short copies.
2294	 * For example, a networking driver can declare the
2295	 * packet a runt and drop it.
2296	 */
2297
2298	return orig_len - len;
2299}
2300EXPORT_SYMBOL(ldc_copy);
2301
2302void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2303			  struct ldc_trans_cookie *cookies, int *ncookies,
2304			  unsigned int map_perm)
2305{
2306	void *buf;
2307	int err;
2308
2309	if (len & (8UL - 1))
2310		return ERR_PTR(-EINVAL);
2311
2312	buf = kzalloc(len, GFP_KERNEL);
2313	if (!buf)
2314		return ERR_PTR(-ENOMEM);
2315
2316	err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2317	if (err < 0) {
2318		kfree(buf);
2319		return ERR_PTR(err);
2320	}
2321	*ncookies = err;
2322
2323	return buf;
2324}
2325EXPORT_SYMBOL(ldc_alloc_exp_dring);
2326
2327void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2328			struct ldc_trans_cookie *cookies, int ncookies)
2329{
2330	ldc_unmap(lp, cookies, ncookies);
2331	kfree(buf);
2332}
2333EXPORT_SYMBOL(ldc_free_exp_dring);
2334
2335static int __init ldc_init(void)
2336{
2337	unsigned long major, minor;
2338	struct mdesc_handle *hp;
2339	const u64 *v;
2340	int err;
2341	u64 mp;
2342
2343	hp = mdesc_grab();
2344	if (!hp)
2345		return -ENODEV;
2346
2347	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2348	err = -ENODEV;
2349	if (mp == MDESC_NODE_NULL)
2350		goto out;
2351
2352	v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2353	if (!v)
2354		goto out;
2355
2356	major = 1;
2357	minor = 0;
2358	if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2359		printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2360		goto out;
2361	}
2362
2363	printk(KERN_INFO "%s", version);
2364
2365	if (!*v) {
2366		printk(KERN_INFO PFX "Domaining disabled.\n");
2367		goto out;
2368	}
2369	ldom_domaining_enabled = 1;
2370	err = 0;
2371
2372out:
2373	mdesc_release(hp);
2374	return err;
2375}
2376
2377core_initcall(ldc_init);
v4.6
   1/* ldc.c: Logical Domain Channel link-layer protocol driver.
   2 *
   3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/export.h>
   8#include <linux/slab.h>
   9#include <linux/spinlock.h>
  10#include <linux/delay.h>
  11#include <linux/errno.h>
  12#include <linux/string.h>
  13#include <linux/scatterlist.h>
  14#include <linux/interrupt.h>
  15#include <linux/list.h>
  16#include <linux/init.h>
  17#include <linux/bitmap.h>
  18#include <linux/iommu-common.h>
  19
  20#include <asm/hypervisor.h>
  21#include <asm/iommu.h>
  22#include <asm/page.h>
  23#include <asm/ldc.h>
  24#include <asm/mdesc.h>
  25
  26#define DRV_MODULE_NAME		"ldc"
  27#define PFX DRV_MODULE_NAME	": "
  28#define DRV_MODULE_VERSION	"1.1"
  29#define DRV_MODULE_RELDATE	"July 22, 2008"
  30
  31#define COOKIE_PGSZ_CODE	0xf000000000000000ULL
  32#define COOKIE_PGSZ_CODE_SHIFT	60ULL
  33
  34
  35static char version[] =
  36	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
  37#define LDC_PACKET_SIZE		64
  38
  39/* Packet header layout for unreliable and reliable mode frames.
  40 * When in RAW mode, packets are simply straight 64-byte payloads
  41 * with no headers.
  42 */
  43struct ldc_packet {
  44	u8			type;
  45#define LDC_CTRL		0x01
  46#define LDC_DATA		0x02
  47#define LDC_ERR			0x10
  48
  49	u8			stype;
  50#define LDC_INFO		0x01
  51#define LDC_ACK			0x02
  52#define LDC_NACK		0x04
  53
  54	u8			ctrl;
  55#define LDC_VERS		0x01 /* Link Version		*/
  56#define LDC_RTS			0x02 /* Request To Send		*/
  57#define LDC_RTR			0x03 /* Ready To Receive	*/
  58#define LDC_RDX			0x04 /* Ready for Data eXchange	*/
  59#define LDC_CTRL_MSK		0x0f
  60
  61	u8			env;
  62#define LDC_LEN			0x3f
  63#define LDC_FRAG_MASK		0xc0
  64#define LDC_START		0x40
  65#define LDC_STOP		0x80
  66
  67	u32			seqid;
  68
  69	union {
  70		u8		u_data[LDC_PACKET_SIZE - 8];
  71		struct {
  72			u32	pad;
  73			u32	ackid;
  74			u8	r_data[LDC_PACKET_SIZE - 8 - 8];
  75		} r;
  76	} u;
  77};
  78
  79struct ldc_version {
  80	u16 major;
  81	u16 minor;
  82};
  83
  84/* Ordered from largest major to lowest.  */
  85static struct ldc_version ver_arr[] = {
  86	{ .major = 1, .minor = 0 },
  87};
  88
  89#define LDC_DEFAULT_MTU			(4 * LDC_PACKET_SIZE)
  90#define LDC_DEFAULT_NUM_ENTRIES		(PAGE_SIZE / LDC_PACKET_SIZE)
  91
  92struct ldc_channel;
  93
  94struct ldc_mode_ops {
  95	int (*write)(struct ldc_channel *, const void *, unsigned int);
  96	int (*read)(struct ldc_channel *, void *, unsigned int);
  97};
  98
  99static const struct ldc_mode_ops raw_ops;
 100static const struct ldc_mode_ops nonraw_ops;
 101static const struct ldc_mode_ops stream_ops;
 102
 103int ldom_domaining_enabled;
 104
 105struct ldc_iommu {
 106	/* Protects ldc_unmap.  */
 107	spinlock_t			lock;
 
 108	struct ldc_mtable_entry		*page_table;
 109	struct iommu_map_table		iommu_map_table;
 110};
 111
 112struct ldc_channel {
 113	/* Protects all operations that depend upon channel state.  */
 114	spinlock_t			lock;
 115
 116	unsigned long			id;
 117
 118	u8				*mssbuf;
 119	u32				mssbuf_len;
 120	u32				mssbuf_off;
 121
 122	struct ldc_packet		*tx_base;
 123	unsigned long			tx_head;
 124	unsigned long			tx_tail;
 125	unsigned long			tx_num_entries;
 126	unsigned long			tx_ra;
 127
 128	unsigned long			tx_acked;
 129
 130	struct ldc_packet		*rx_base;
 131	unsigned long			rx_head;
 132	unsigned long			rx_tail;
 133	unsigned long			rx_num_entries;
 134	unsigned long			rx_ra;
 135
 136	u32				rcv_nxt;
 137	u32				snd_nxt;
 138
 139	unsigned long			chan_state;
 140
 141	struct ldc_channel_config	cfg;
 142	void				*event_arg;
 143
 144	const struct ldc_mode_ops	*mops;
 145
 146	struct ldc_iommu		iommu;
 147
 148	struct ldc_version		ver;
 149
 150	u8				hs_state;
 151#define LDC_HS_CLOSED			0x00
 152#define LDC_HS_OPEN			0x01
 153#define LDC_HS_GOTVERS			0x02
 154#define LDC_HS_SENTRTR			0x03
 155#define LDC_HS_GOTRTR			0x04
 156#define LDC_HS_COMPLETE			0x10
 157
 158	u8				flags;
 159#define LDC_FLAG_ALLOCED_QUEUES		0x01
 160#define LDC_FLAG_REGISTERED_QUEUES	0x02
 161#define LDC_FLAG_REGISTERED_IRQS	0x04
 162#define LDC_FLAG_RESET			0x10
 163
 164	u8				mss;
 165	u8				state;
 166
 167#define LDC_IRQ_NAME_MAX		32
 168	char				rx_irq_name[LDC_IRQ_NAME_MAX];
 169	char				tx_irq_name[LDC_IRQ_NAME_MAX];
 170
 171	struct hlist_head		mh_list;
 172
 173	struct hlist_node		list;
 174};
 175
 176#define ldcdbg(TYPE, f, a...) \
 177do {	if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
 178		printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
 179} while (0)
 180
 181static const char *state_to_str(u8 state)
 182{
 183	switch (state) {
 184	case LDC_STATE_INVALID:
 185		return "INVALID";
 186	case LDC_STATE_INIT:
 187		return "INIT";
 188	case LDC_STATE_BOUND:
 189		return "BOUND";
 190	case LDC_STATE_READY:
 191		return "READY";
 192	case LDC_STATE_CONNECTED:
 193		return "CONNECTED";
 194	default:
 195		return "<UNKNOWN>";
 196	}
 197}
 198
 199static void ldc_set_state(struct ldc_channel *lp, u8 state)
 200{
 201	ldcdbg(STATE, "STATE (%s) --> (%s)\n",
 202	       state_to_str(lp->state),
 203	       state_to_str(state));
 204
 205	lp->state = state;
 206}
 207
 208static unsigned long __advance(unsigned long off, unsigned long num_entries)
 209{
 210	off += LDC_PACKET_SIZE;
 211	if (off == (num_entries * LDC_PACKET_SIZE))
 212		off = 0;
 213
 214	return off;
 215}
 216
 217static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
 218{
 219	return __advance(off, lp->rx_num_entries);
 220}
 221
 222static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
 223{
 224	return __advance(off, lp->tx_num_entries);
 225}
 226
 227static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
 228						  unsigned long *new_tail)
 229{
 230	struct ldc_packet *p;
 231	unsigned long t;
 232
 233	t = tx_advance(lp, lp->tx_tail);
 234	if (t == lp->tx_head)
 235		return NULL;
 236
 237	*new_tail = t;
 238
 239	p = lp->tx_base;
 240	return p + (lp->tx_tail / LDC_PACKET_SIZE);
 241}
 242
 243/* When we are in reliable or stream mode, have to track the next packet
 244 * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
 245 * to be careful not to stomp over the queue past that point.  During
 246 * the handshake, we don't have TX data packets pending in the queue
 247 * and that's why handshake_get_tx_packet() need not be mindful of
 248 * lp->tx_acked.
 249 */
 250static unsigned long head_for_data(struct ldc_channel *lp)
 251{
 252	if (lp->cfg.mode == LDC_MODE_STREAM)
 253		return lp->tx_acked;
 254	return lp->tx_head;
 255}
 256
 257static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
 258{
 259	unsigned long limit, tail, new_tail, diff;
 260	unsigned int mss;
 261
 262	limit = head_for_data(lp);
 263	tail = lp->tx_tail;
 264	new_tail = tx_advance(lp, tail);
 265	if (new_tail == limit)
 266		return 0;
 267
 268	if (limit > new_tail)
 269		diff = limit - new_tail;
 270	else
 271		diff = (limit +
 272			((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
 273	diff /= LDC_PACKET_SIZE;
 274	mss = lp->mss;
 275
 276	if (diff * mss < size)
 277		return 0;
 278
 279	return 1;
 280}
 281
 282static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
 283					     unsigned long *new_tail)
 284{
 285	struct ldc_packet *p;
 286	unsigned long h, t;
 287
 288	h = head_for_data(lp);
 289	t = tx_advance(lp, lp->tx_tail);
 290	if (t == h)
 291		return NULL;
 292
 293	*new_tail = t;
 294
 295	p = lp->tx_base;
 296	return p + (lp->tx_tail / LDC_PACKET_SIZE);
 297}
 298
 299static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
 300{
 301	unsigned long orig_tail = lp->tx_tail;
 302	int limit = 1000;
 303
 304	lp->tx_tail = tail;
 305	while (limit-- > 0) {
 306		unsigned long err;
 307
 308		err = sun4v_ldc_tx_set_qtail(lp->id, tail);
 309		if (!err)
 310			return 0;
 311
 312		if (err != HV_EWOULDBLOCK) {
 313			lp->tx_tail = orig_tail;
 314			return -EINVAL;
 315		}
 316		udelay(1);
 317	}
 318
 319	lp->tx_tail = orig_tail;
 320	return -EBUSY;
 321}
 322
 323/* This just updates the head value in the hypervisor using
 324 * a polling loop with a timeout.  The caller takes care of
 325 * upating software state representing the head change, if any.
 326 */
 327static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
 328{
 329	int limit = 1000;
 330
 331	while (limit-- > 0) {
 332		unsigned long err;
 333
 334		err = sun4v_ldc_rx_set_qhead(lp->id, head);
 335		if (!err)
 336			return 0;
 337
 338		if (err != HV_EWOULDBLOCK)
 339			return -EINVAL;
 340
 341		udelay(1);
 342	}
 343
 344	return -EBUSY;
 345}
 346
 347static int send_tx_packet(struct ldc_channel *lp,
 348			  struct ldc_packet *p,
 349			  unsigned long new_tail)
 350{
 351	BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
 352
 353	return set_tx_tail(lp, new_tail);
 354}
 355
 356static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
 357						 u8 stype, u8 ctrl,
 358						 void *data, int dlen,
 359						 unsigned long *new_tail)
 360{
 361	struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
 362
 363	if (p) {
 364		memset(p, 0, sizeof(*p));
 365		p->type = LDC_CTRL;
 366		p->stype = stype;
 367		p->ctrl = ctrl;
 368		if (data)
 369			memcpy(p->u.u_data, data, dlen);
 370	}
 371	return p;
 372}
 373
 374static int start_handshake(struct ldc_channel *lp)
 375{
 376	struct ldc_packet *p;
 377	struct ldc_version *ver;
 378	unsigned long new_tail;
 379
 380	ver = &ver_arr[0];
 381
 382	ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
 383	       ver->major, ver->minor);
 384
 385	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 386				   ver, sizeof(*ver), &new_tail);
 387	if (p) {
 388		int err = send_tx_packet(lp, p, new_tail);
 389		if (!err)
 390			lp->flags &= ~LDC_FLAG_RESET;
 391		return err;
 392	}
 393	return -EBUSY;
 394}
 395
 396static int send_version_nack(struct ldc_channel *lp,
 397			     u16 major, u16 minor)
 398{
 399	struct ldc_packet *p;
 400	struct ldc_version ver;
 401	unsigned long new_tail;
 402
 403	ver.major = major;
 404	ver.minor = minor;
 405
 406	p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
 407				   &ver, sizeof(ver), &new_tail);
 408	if (p) {
 409		ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
 410		       ver.major, ver.minor);
 411
 412		return send_tx_packet(lp, p, new_tail);
 413	}
 414	return -EBUSY;
 415}
 416
 417static int send_version_ack(struct ldc_channel *lp,
 418			    struct ldc_version *vp)
 419{
 420	struct ldc_packet *p;
 421	unsigned long new_tail;
 422
 423	p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
 424				   vp, sizeof(*vp), &new_tail);
 425	if (p) {
 426		ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
 427		       vp->major, vp->minor);
 428
 429		return send_tx_packet(lp, p, new_tail);
 430	}
 431	return -EBUSY;
 432}
 433
 434static int send_rts(struct ldc_channel *lp)
 435{
 436	struct ldc_packet *p;
 437	unsigned long new_tail;
 438
 439	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
 440				   &new_tail);
 441	if (p) {
 442		p->env = lp->cfg.mode;
 443		p->seqid = 0;
 444		lp->rcv_nxt = 0;
 445
 446		ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
 447		       p->env, p->seqid);
 448
 449		return send_tx_packet(lp, p, new_tail);
 450	}
 451	return -EBUSY;
 452}
 453
 454static int send_rtr(struct ldc_channel *lp)
 455{
 456	struct ldc_packet *p;
 457	unsigned long new_tail;
 458
 459	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
 460				   &new_tail);
 461	if (p) {
 462		p->env = lp->cfg.mode;
 463		p->seqid = 0;
 464
 465		ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
 466		       p->env, p->seqid);
 467
 468		return send_tx_packet(lp, p, new_tail);
 469	}
 470	return -EBUSY;
 471}
 472
 473static int send_rdx(struct ldc_channel *lp)
 474{
 475	struct ldc_packet *p;
 476	unsigned long new_tail;
 477
 478	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
 479				   &new_tail);
 480	if (p) {
 481		p->env = 0;
 482		p->seqid = ++lp->snd_nxt;
 483		p->u.r.ackid = lp->rcv_nxt;
 484
 485		ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
 486		       p->env, p->seqid, p->u.r.ackid);
 487
 488		return send_tx_packet(lp, p, new_tail);
 489	}
 490	return -EBUSY;
 491}
 492
 493static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
 494{
 495	struct ldc_packet *p;
 496	unsigned long new_tail;
 497	int err;
 498
 499	p = data_get_tx_packet(lp, &new_tail);
 500	if (!p)
 501		return -EBUSY;
 502	memset(p, 0, sizeof(*p));
 503	p->type = data_pkt->type;
 504	p->stype = LDC_NACK;
 505	p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
 506	p->seqid = lp->snd_nxt + 1;
 507	p->u.r.ackid = lp->rcv_nxt;
 508
 509	ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
 510	       p->type, p->ctrl, p->seqid, p->u.r.ackid);
 511
 512	err = send_tx_packet(lp, p, new_tail);
 513	if (!err)
 514		lp->snd_nxt++;
 515
 516	return err;
 517}
 518
 519static int ldc_abort(struct ldc_channel *lp)
 520{
 521	unsigned long hv_err;
 522
 523	ldcdbg(STATE, "ABORT\n");
 524
 525	/* We report but do not act upon the hypervisor errors because
 526	 * there really isn't much we can do if they fail at this point.
 527	 */
 528	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
 529	if (hv_err)
 530		printk(KERN_ERR PFX "ldc_abort: "
 531		       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 532		       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
 533
 534	hv_err = sun4v_ldc_tx_get_state(lp->id,
 535					&lp->tx_head,
 536					&lp->tx_tail,
 537					&lp->chan_state);
 538	if (hv_err)
 539		printk(KERN_ERR PFX "ldc_abort: "
 540		       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
 541		       lp->id, hv_err);
 542
 543	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
 544	if (hv_err)
 545		printk(KERN_ERR PFX "ldc_abort: "
 546		       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 547		       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
 548
 549	/* Refetch the RX queue state as well, because we could be invoked
 550	 * here in the queue processing context.
 551	 */
 552	hv_err = sun4v_ldc_rx_get_state(lp->id,
 553					&lp->rx_head,
 554					&lp->rx_tail,
 555					&lp->chan_state);
 556	if (hv_err)
 557		printk(KERN_ERR PFX "ldc_abort: "
 558		       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
 559		       lp->id, hv_err);
 560
 561	return -ECONNRESET;
 562}
 563
 564static struct ldc_version *find_by_major(u16 major)
 565{
 566	struct ldc_version *ret = NULL;
 567	int i;
 568
 569	for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
 570		struct ldc_version *v = &ver_arr[i];
 571		if (v->major <= major) {
 572			ret = v;
 573			break;
 574		}
 575	}
 576	return ret;
 577}
 578
 579static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
 580{
 581	struct ldc_version *vap;
 582	int err;
 583
 584	ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
 585	       vp->major, vp->minor);
 586
 587	if (lp->hs_state == LDC_HS_GOTVERS) {
 588		lp->hs_state = LDC_HS_OPEN;
 589		memset(&lp->ver, 0, sizeof(lp->ver));
 590	}
 591
 592	vap = find_by_major(vp->major);
 593	if (!vap) {
 594		err = send_version_nack(lp, 0, 0);
 595	} else if (vap->major != vp->major) {
 596		err = send_version_nack(lp, vap->major, vap->minor);
 597	} else {
 598		struct ldc_version ver = *vp;
 599		if (ver.minor > vap->minor)
 600			ver.minor = vap->minor;
 601		err = send_version_ack(lp, &ver);
 602		if (!err) {
 603			lp->ver = ver;
 604			lp->hs_state = LDC_HS_GOTVERS;
 605		}
 606	}
 607	if (err)
 608		return ldc_abort(lp);
 609
 610	return 0;
 611}
 612
 613static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
 614{
 615	ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
 616	       vp->major, vp->minor);
 617
 618	if (lp->hs_state == LDC_HS_GOTVERS) {
 619		if (lp->ver.major != vp->major ||
 620		    lp->ver.minor != vp->minor)
 621			return ldc_abort(lp);
 622	} else {
 623		lp->ver = *vp;
 624		lp->hs_state = LDC_HS_GOTVERS;
 625	}
 626	if (send_rts(lp))
 627		return ldc_abort(lp);
 628	return 0;
 629}
 630
 631static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
 632{
 633	struct ldc_version *vap;
 634	struct ldc_packet *p;
 635	unsigned long new_tail;
 636
 637	if (vp->major == 0 && vp->minor == 0)
 638		return ldc_abort(lp);
 639
 640	vap = find_by_major(vp->major);
 641	if (!vap)
 642		return ldc_abort(lp);
 643
 644	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 645					   vap, sizeof(*vap),
 646					   &new_tail);
 647	if (!p)
 648		return ldc_abort(lp);
 649
 650	return send_tx_packet(lp, p, new_tail);
 651}
 652
 653static int process_version(struct ldc_channel *lp,
 654			   struct ldc_packet *p)
 655{
 656	struct ldc_version *vp;
 657
 658	vp = (struct ldc_version *) p->u.u_data;
 659
 660	switch (p->stype) {
 661	case LDC_INFO:
 662		return process_ver_info(lp, vp);
 663
 664	case LDC_ACK:
 665		return process_ver_ack(lp, vp);
 666
 667	case LDC_NACK:
 668		return process_ver_nack(lp, vp);
 669
 670	default:
 671		return ldc_abort(lp);
 672	}
 673}
 674
 675static int process_rts(struct ldc_channel *lp,
 676		       struct ldc_packet *p)
 677{
 678	ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
 679	       p->stype, p->seqid, p->env);
 680
 681	if (p->stype     != LDC_INFO	   ||
 682	    lp->hs_state != LDC_HS_GOTVERS ||
 683	    p->env       != lp->cfg.mode)
 684		return ldc_abort(lp);
 685
 686	lp->snd_nxt = p->seqid;
 687	lp->rcv_nxt = p->seqid;
 688	lp->hs_state = LDC_HS_SENTRTR;
 689	if (send_rtr(lp))
 690		return ldc_abort(lp);
 691
 692	return 0;
 693}
 694
 695static int process_rtr(struct ldc_channel *lp,
 696		       struct ldc_packet *p)
 697{
 698	ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
 699	       p->stype, p->seqid, p->env);
 700
 701	if (p->stype     != LDC_INFO ||
 702	    p->env       != lp->cfg.mode)
 703		return ldc_abort(lp);
 704
 705	lp->snd_nxt = p->seqid;
 706	lp->hs_state = LDC_HS_COMPLETE;
 707	ldc_set_state(lp, LDC_STATE_CONNECTED);
 708	send_rdx(lp);
 709
 710	return LDC_EVENT_UP;
 711}
 712
 713static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
 714{
 715	return lp->rcv_nxt + 1 == seqid;
 716}
 717
 718static int process_rdx(struct ldc_channel *lp,
 719		       struct ldc_packet *p)
 720{
 721	ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
 722	       p->stype, p->seqid, p->env, p->u.r.ackid);
 723
 724	if (p->stype != LDC_INFO ||
 725	    !(rx_seq_ok(lp, p->seqid)))
 726		return ldc_abort(lp);
 727
 728	lp->rcv_nxt = p->seqid;
 729
 730	lp->hs_state = LDC_HS_COMPLETE;
 731	ldc_set_state(lp, LDC_STATE_CONNECTED);
 732
 733	return LDC_EVENT_UP;
 734}
 735
 736static int process_control_frame(struct ldc_channel *lp,
 737				 struct ldc_packet *p)
 738{
 739	switch (p->ctrl) {
 740	case LDC_VERS:
 741		return process_version(lp, p);
 742
 743	case LDC_RTS:
 744		return process_rts(lp, p);
 745
 746	case LDC_RTR:
 747		return process_rtr(lp, p);
 748
 749	case LDC_RDX:
 750		return process_rdx(lp, p);
 751
 752	default:
 753		return ldc_abort(lp);
 754	}
 755}
 756
 757static int process_error_frame(struct ldc_channel *lp,
 758			       struct ldc_packet *p)
 759{
 760	return ldc_abort(lp);
 761}
 762
 763static int process_data_ack(struct ldc_channel *lp,
 764			    struct ldc_packet *ack)
 765{
 766	unsigned long head = lp->tx_acked;
 767	u32 ackid = ack->u.r.ackid;
 768
 769	while (1) {
 770		struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
 771
 772		head = tx_advance(lp, head);
 773
 774		if (p->seqid == ackid) {
 775			lp->tx_acked = head;
 776			return 0;
 777		}
 778		if (head == lp->tx_tail)
 779			return ldc_abort(lp);
 780	}
 781
 782	return 0;
 783}
 784
 785static void send_events(struct ldc_channel *lp, unsigned int event_mask)
 786{
 787	if (event_mask & LDC_EVENT_RESET)
 788		lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
 789	if (event_mask & LDC_EVENT_UP)
 790		lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
 791	if (event_mask & LDC_EVENT_DATA_READY)
 792		lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
 793}
 794
 795static irqreturn_t ldc_rx(int irq, void *dev_id)
 796{
 797	struct ldc_channel *lp = dev_id;
 798	unsigned long orig_state, flags;
 799	unsigned int event_mask;
 800
 801	spin_lock_irqsave(&lp->lock, flags);
 802
 803	orig_state = lp->chan_state;
 804
 805	/* We should probably check for hypervisor errors here and
 806	 * reset the LDC channel if we get one.
 807	 */
 808	sun4v_ldc_rx_get_state(lp->id,
 809			       &lp->rx_head,
 810			       &lp->rx_tail,
 811			       &lp->chan_state);
 812
 813	ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 814	       orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
 815
 816	event_mask = 0;
 817
 818	if (lp->cfg.mode == LDC_MODE_RAW &&
 819	    lp->chan_state == LDC_CHANNEL_UP) {
 820		lp->hs_state = LDC_HS_COMPLETE;
 821		ldc_set_state(lp, LDC_STATE_CONNECTED);
 822
 823		event_mask |= LDC_EVENT_UP;
 824
 825		orig_state = lp->chan_state;
 826	}
 827
 828	/* If we are in reset state, flush the RX queue and ignore
 829	 * everything.
 830	 */
 831	if (lp->flags & LDC_FLAG_RESET) {
 832		(void) __set_rx_head(lp, lp->rx_tail);
 833		goto out;
 834	}
 835
 836	/* Once we finish the handshake, we let the ldc_read()
 837	 * paths do all of the control frame and state management.
 838	 * Just trigger the callback.
 839	 */
 840	if (lp->hs_state == LDC_HS_COMPLETE) {
 841handshake_complete:
 842		if (lp->chan_state != orig_state) {
 843			unsigned int event = LDC_EVENT_RESET;
 844
 845			if (lp->chan_state == LDC_CHANNEL_UP)
 846				event = LDC_EVENT_UP;
 847
 848			event_mask |= event;
 849		}
 850		if (lp->rx_head != lp->rx_tail)
 851			event_mask |= LDC_EVENT_DATA_READY;
 852
 853		goto out;
 854	}
 855
 856	if (lp->chan_state != orig_state)
 857		goto out;
 858
 859	while (lp->rx_head != lp->rx_tail) {
 860		struct ldc_packet *p;
 861		unsigned long new;
 862		int err;
 863
 864		p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
 865
 866		switch (p->type) {
 867		case LDC_CTRL:
 868			err = process_control_frame(lp, p);
 869			if (err > 0)
 870				event_mask |= err;
 871			break;
 872
 873		case LDC_DATA:
 874			event_mask |= LDC_EVENT_DATA_READY;
 875			err = 0;
 876			break;
 877
 878		case LDC_ERR:
 879			err = process_error_frame(lp, p);
 880			break;
 881
 882		default:
 883			err = ldc_abort(lp);
 884			break;
 885		}
 886
 887		if (err < 0)
 888			break;
 889
 890		new = lp->rx_head;
 891		new += LDC_PACKET_SIZE;
 892		if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
 893			new = 0;
 894		lp->rx_head = new;
 895
 896		err = __set_rx_head(lp, new);
 897		if (err < 0) {
 898			(void) ldc_abort(lp);
 899			break;
 900		}
 901		if (lp->hs_state == LDC_HS_COMPLETE)
 902			goto handshake_complete;
 903	}
 904
 905out:
 906	spin_unlock_irqrestore(&lp->lock, flags);
 907
 908	send_events(lp, event_mask);
 909
 910	return IRQ_HANDLED;
 911}
 912
 913static irqreturn_t ldc_tx(int irq, void *dev_id)
 914{
 915	struct ldc_channel *lp = dev_id;
 916	unsigned long flags, orig_state;
 917	unsigned int event_mask = 0;
 918
 919	spin_lock_irqsave(&lp->lock, flags);
 920
 921	orig_state = lp->chan_state;
 922
 923	/* We should probably check for hypervisor errors here and
 924	 * reset the LDC channel if we get one.
 925	 */
 926	sun4v_ldc_tx_get_state(lp->id,
 927			       &lp->tx_head,
 928			       &lp->tx_tail,
 929			       &lp->chan_state);
 930
 931	ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 932	       orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
 933
 934	if (lp->cfg.mode == LDC_MODE_RAW &&
 935	    lp->chan_state == LDC_CHANNEL_UP) {
 936		lp->hs_state = LDC_HS_COMPLETE;
 937		ldc_set_state(lp, LDC_STATE_CONNECTED);
 938
 939		event_mask |= LDC_EVENT_UP;
 940	}
 941
 942	spin_unlock_irqrestore(&lp->lock, flags);
 943
 944	send_events(lp, event_mask);
 945
 946	return IRQ_HANDLED;
 947}
 948
 949/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
 950 * XXX that addition and removal from the ldc_channel_list has
 951 * XXX atomicity, otherwise the __ldc_channel_exists() check is
 952 * XXX totally pointless as another thread can slip into ldc_alloc()
 953 * XXX and add a channel with the same ID.  There also needs to be
 954 * XXX a spinlock for ldc_channel_list.
 955 */
 956static HLIST_HEAD(ldc_channel_list);
 957
 958static int __ldc_channel_exists(unsigned long id)
 959{
 960	struct ldc_channel *lp;
 
 961
 962	hlist_for_each_entry(lp, &ldc_channel_list, list) {
 963		if (lp->id == id)
 964			return 1;
 965	}
 966	return 0;
 967}
 968
 969static int alloc_queue(const char *name, unsigned long num_entries,
 970		       struct ldc_packet **base, unsigned long *ra)
 971{
 972	unsigned long size, order;
 973	void *q;
 974
 975	size = num_entries * LDC_PACKET_SIZE;
 976	order = get_order(size);
 977
 978	q = (void *) __get_free_pages(GFP_KERNEL, order);
 979	if (!q) {
 980		printk(KERN_ERR PFX "Alloc of %s queue failed with "
 981		       "size=%lu order=%lu\n", name, size, order);
 982		return -ENOMEM;
 983	}
 984
 985	memset(q, 0, PAGE_SIZE << order);
 986
 987	*base = q;
 988	*ra = __pa(q);
 989
 990	return 0;
 991}
 992
 993static void free_queue(unsigned long num_entries, struct ldc_packet *q)
 994{
 995	unsigned long size, order;
 996
 997	if (!q)
 998		return;
 999
1000	size = num_entries * LDC_PACKET_SIZE;
1001	order = get_order(size);
1002
1003	free_pages((unsigned long)q, order);
1004}
1005
1006static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
1007{
1008	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1009	/* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
1010
1011	cookie &= ~COOKIE_PGSZ_CODE;
1012
1013	return (cookie >> (13ULL + (szcode * 3ULL)));
1014}
1015
1016static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie,
1017		      unsigned long entry, unsigned long npages)
1018{
1019	struct ldc_mtable_entry *base;
1020	unsigned long i, shift;
1021
1022	shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
1023	base = iommu->page_table + entry;
1024	for (i = 0; i < npages; i++) {
1025		if (base->cookie)
1026			sun4v_ldc_revoke(id, cookie + (i << shift),
1027					 base->cookie);
1028		base->mte = 0;
1029	}
1030}
1031
1032/* XXX Make this configurable... XXX */
1033#define LDC_IOTABLE_SIZE	(8 * 1024)
1034
1035static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
1036{
1037	unsigned long sz, num_tsb_entries, tsbsize, order;
1038	struct ldc_iommu *ldc_iommu = &lp->iommu;
1039	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1040	struct ldc_mtable_entry *table;
1041	unsigned long hv_err;
1042	int err;
1043
1044	num_tsb_entries = LDC_IOTABLE_SIZE;
1045	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1046	spin_lock_init(&ldc_iommu->lock);
 
1047
1048	sz = num_tsb_entries / 8;
1049	sz = (sz + 7UL) & ~7UL;
1050	iommu->map = kzalloc(sz, GFP_KERNEL);
1051	if (!iommu->map) {
1052		printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1053		return -ENOMEM;
1054	}
1055	iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
1056			    NULL, false /* no large pool */,
1057			    1 /* npools */,
1058			    true /* skip span boundary check */);
1059
1060	order = get_order(tsbsize);
1061
1062	table = (struct ldc_mtable_entry *)
1063		__get_free_pages(GFP_KERNEL, order);
1064	err = -ENOMEM;
1065	if (!table) {
1066		printk(KERN_ERR PFX "Alloc of MTE table failed, "
1067		       "size=%lu order=%lu\n", tsbsize, order);
1068		goto out_free_map;
1069	}
1070
1071	memset(table, 0, PAGE_SIZE << order);
1072
1073	ldc_iommu->page_table = table;
1074
1075	hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1076					 num_tsb_entries);
1077	err = -EINVAL;
1078	if (hv_err)
1079		goto out_free_table;
1080
1081	return 0;
1082
1083out_free_table:
1084	free_pages((unsigned long) table, order);
1085	ldc_iommu->page_table = NULL;
1086
1087out_free_map:
1088	kfree(iommu->map);
1089	iommu->map = NULL;
1090
1091	return err;
1092}
1093
1094static void ldc_iommu_release(struct ldc_channel *lp)
1095{
1096	struct ldc_iommu *ldc_iommu = &lp->iommu;
1097	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1098	unsigned long num_tsb_entries, tsbsize, order;
1099
1100	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1101
1102	num_tsb_entries = iommu->poolsize * iommu->nr_pools;
1103	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1104	order = get_order(tsbsize);
1105
1106	free_pages((unsigned long) ldc_iommu->page_table, order);
1107	ldc_iommu->page_table = NULL;
1108
1109	kfree(iommu->map);
1110	iommu->map = NULL;
1111}
1112
1113struct ldc_channel *ldc_alloc(unsigned long id,
1114			      const struct ldc_channel_config *cfgp,
1115			      void *event_arg,
1116			      const char *name)
1117{
1118	struct ldc_channel *lp;
1119	const struct ldc_mode_ops *mops;
1120	unsigned long dummy1, dummy2, hv_err;
1121	u8 mss, *mssbuf;
1122	int err;
1123
1124	err = -ENODEV;
1125	if (!ldom_domaining_enabled)
1126		goto out_err;
1127
1128	err = -EINVAL;
1129	if (!cfgp)
1130		goto out_err;
1131	if (!name)
1132		goto out_err;
1133
1134	switch (cfgp->mode) {
1135	case LDC_MODE_RAW:
1136		mops = &raw_ops;
1137		mss = LDC_PACKET_SIZE;
1138		break;
1139
1140	case LDC_MODE_UNRELIABLE:
1141		mops = &nonraw_ops;
1142		mss = LDC_PACKET_SIZE - 8;
1143		break;
1144
1145	case LDC_MODE_STREAM:
1146		mops = &stream_ops;
1147		mss = LDC_PACKET_SIZE - 8 - 8;
1148		break;
1149
1150	default:
1151		goto out_err;
1152	}
1153
1154	if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1155		goto out_err;
1156
1157	hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1158	err = -ENODEV;
1159	if (hv_err == HV_ECHANNEL)
1160		goto out_err;
1161
1162	err = -EEXIST;
1163	if (__ldc_channel_exists(id))
1164		goto out_err;
1165
1166	mssbuf = NULL;
1167
1168	lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1169	err = -ENOMEM;
1170	if (!lp)
1171		goto out_err;
1172
1173	spin_lock_init(&lp->lock);
1174
1175	lp->id = id;
1176
1177	err = ldc_iommu_init(name, lp);
1178	if (err)
1179		goto out_free_ldc;
1180
1181	lp->mops = mops;
1182	lp->mss = mss;
1183
1184	lp->cfg = *cfgp;
1185	if (!lp->cfg.mtu)
1186		lp->cfg.mtu = LDC_DEFAULT_MTU;
1187
1188	if (lp->cfg.mode == LDC_MODE_STREAM) {
1189		mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1190		if (!mssbuf) {
1191			err = -ENOMEM;
1192			goto out_free_iommu;
1193		}
1194		lp->mssbuf = mssbuf;
1195	}
1196
1197	lp->event_arg = event_arg;
1198
1199	/* XXX allow setting via ldc_channel_config to override defaults
1200	 * XXX or use some formula based upon mtu
1201	 */
1202	lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1203	lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1204
1205	err = alloc_queue("TX", lp->tx_num_entries,
1206			  &lp->tx_base, &lp->tx_ra);
1207	if (err)
1208		goto out_free_mssbuf;
1209
1210	err = alloc_queue("RX", lp->rx_num_entries,
1211			  &lp->rx_base, &lp->rx_ra);
1212	if (err)
1213		goto out_free_txq;
1214
1215	lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1216
1217	lp->hs_state = LDC_HS_CLOSED;
1218	ldc_set_state(lp, LDC_STATE_INIT);
1219
1220	INIT_HLIST_NODE(&lp->list);
1221	hlist_add_head(&lp->list, &ldc_channel_list);
1222
1223	INIT_HLIST_HEAD(&lp->mh_list);
1224
1225	snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1226	snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1227
1228	err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
1229			  lp->rx_irq_name, lp);
1230	if (err)
1231		goto out_free_txq;
1232
1233	err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
1234			  lp->tx_irq_name, lp);
1235	if (err) {
1236		free_irq(lp->cfg.rx_irq, lp);
1237		goto out_free_txq;
1238	}
1239
1240	return lp;
1241
1242out_free_txq:
1243	free_queue(lp->tx_num_entries, lp->tx_base);
1244
1245out_free_mssbuf:
1246	kfree(mssbuf);
1247
1248out_free_iommu:
1249	ldc_iommu_release(lp);
1250
1251out_free_ldc:
1252	kfree(lp);
1253
1254out_err:
1255	return ERR_PTR(err);
1256}
1257EXPORT_SYMBOL(ldc_alloc);
1258
1259void ldc_unbind(struct ldc_channel *lp)
1260{
1261	if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1262		free_irq(lp->cfg.rx_irq, lp);
1263		free_irq(lp->cfg.tx_irq, lp);
1264		lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1265	}
1266
1267	if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1268		sun4v_ldc_tx_qconf(lp->id, 0, 0);
1269		sun4v_ldc_rx_qconf(lp->id, 0, 0);
1270		lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1271	}
1272	if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1273		free_queue(lp->tx_num_entries, lp->tx_base);
1274		free_queue(lp->rx_num_entries, lp->rx_base);
1275		lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1276	}
1277
1278	ldc_set_state(lp, LDC_STATE_INIT);
1279}
1280EXPORT_SYMBOL(ldc_unbind);
1281
1282void ldc_free(struct ldc_channel *lp)
1283{
1284	ldc_unbind(lp);
1285	hlist_del(&lp->list);
1286	kfree(lp->mssbuf);
 
1287	ldc_iommu_release(lp);
1288
1289	kfree(lp);
1290}
1291EXPORT_SYMBOL(ldc_free);
1292
1293/* Bind the channel.  This registers the LDC queues with
1294 * the hypervisor and puts the channel into a pseudo-listening
1295 * state.  This does not initiate a handshake, ldc_connect() does
1296 * that.
1297 */
1298int ldc_bind(struct ldc_channel *lp)
1299{
1300	unsigned long hv_err, flags;
1301	int err = -EINVAL;
1302
1303	if (lp->state != LDC_STATE_INIT)
 
1304		return -EINVAL;
1305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1306	spin_lock_irqsave(&lp->lock, flags);
1307
1308	enable_irq(lp->cfg.rx_irq);
1309	enable_irq(lp->cfg.tx_irq);
1310
1311	lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1312
1313	err = -ENODEV;
1314	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1315	if (hv_err)
1316		goto out_free_irqs;
1317
1318	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1319	if (hv_err)
1320		goto out_free_irqs;
1321
1322	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1323	if (hv_err)
1324		goto out_unmap_tx;
1325
1326	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1327	if (hv_err)
1328		goto out_unmap_tx;
1329
1330	lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1331
1332	hv_err = sun4v_ldc_tx_get_state(lp->id,
1333					&lp->tx_head,
1334					&lp->tx_tail,
1335					&lp->chan_state);
1336	err = -EBUSY;
1337	if (hv_err)
1338		goto out_unmap_rx;
1339
1340	lp->tx_acked = lp->tx_head;
1341
1342	lp->hs_state = LDC_HS_OPEN;
1343	ldc_set_state(lp, LDC_STATE_BOUND);
1344
1345	spin_unlock_irqrestore(&lp->lock, flags);
1346
1347	return 0;
1348
1349out_unmap_rx:
1350	lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1351	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1352
1353out_unmap_tx:
1354	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1355
1356out_free_irqs:
1357	lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1358	free_irq(lp->cfg.tx_irq, lp);
1359	free_irq(lp->cfg.rx_irq, lp);
1360
1361	spin_unlock_irqrestore(&lp->lock, flags);
1362
1363	return err;
1364}
1365EXPORT_SYMBOL(ldc_bind);
1366
1367int ldc_connect(struct ldc_channel *lp)
1368{
1369	unsigned long flags;
1370	int err;
1371
1372	if (lp->cfg.mode == LDC_MODE_RAW)
1373		return -EINVAL;
1374
1375	spin_lock_irqsave(&lp->lock, flags);
1376
1377	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1378	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1379	    lp->hs_state != LDC_HS_OPEN)
1380		err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
1381	else
1382		err = start_handshake(lp);
1383
1384	spin_unlock_irqrestore(&lp->lock, flags);
1385
1386	return err;
1387}
1388EXPORT_SYMBOL(ldc_connect);
1389
1390int ldc_disconnect(struct ldc_channel *lp)
1391{
1392	unsigned long hv_err, flags;
1393	int err;
1394
1395	if (lp->cfg.mode == LDC_MODE_RAW)
1396		return -EINVAL;
1397
1398	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1399	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1400		return -EINVAL;
1401
1402	spin_lock_irqsave(&lp->lock, flags);
1403
1404	err = -ENODEV;
1405	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1406	if (hv_err)
1407		goto out_err;
1408
1409	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1410	if (hv_err)
1411		goto out_err;
1412
1413	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1414	if (hv_err)
1415		goto out_err;
1416
1417	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1418	if (hv_err)
1419		goto out_err;
1420
1421	ldc_set_state(lp, LDC_STATE_BOUND);
1422	lp->hs_state = LDC_HS_OPEN;
1423	lp->flags |= LDC_FLAG_RESET;
1424
1425	spin_unlock_irqrestore(&lp->lock, flags);
1426
1427	return 0;
1428
1429out_err:
1430	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1431	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1432	free_irq(lp->cfg.tx_irq, lp);
1433	free_irq(lp->cfg.rx_irq, lp);
1434	lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1435		       LDC_FLAG_REGISTERED_QUEUES);
1436	ldc_set_state(lp, LDC_STATE_INIT);
1437
1438	spin_unlock_irqrestore(&lp->lock, flags);
1439
1440	return err;
1441}
1442EXPORT_SYMBOL(ldc_disconnect);
1443
1444int ldc_state(struct ldc_channel *lp)
1445{
1446	return lp->state;
1447}
1448EXPORT_SYMBOL(ldc_state);
1449
1450static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1451{
1452	struct ldc_packet *p;
1453	unsigned long new_tail;
1454	int err;
1455
1456	if (size > LDC_PACKET_SIZE)
1457		return -EMSGSIZE;
1458
1459	p = data_get_tx_packet(lp, &new_tail);
1460	if (!p)
1461		return -EAGAIN;
1462
1463	memcpy(p, buf, size);
1464
1465	err = send_tx_packet(lp, p, new_tail);
1466	if (!err)
1467		err = size;
1468
1469	return err;
1470}
1471
1472static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1473{
1474	struct ldc_packet *p;
1475	unsigned long hv_err, new;
1476	int err;
1477
1478	if (size < LDC_PACKET_SIZE)
1479		return -EINVAL;
1480
1481	hv_err = sun4v_ldc_rx_get_state(lp->id,
1482					&lp->rx_head,
1483					&lp->rx_tail,
1484					&lp->chan_state);
1485	if (hv_err)
1486		return ldc_abort(lp);
1487
1488	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1489	    lp->chan_state == LDC_CHANNEL_RESETTING)
1490		return -ECONNRESET;
1491
1492	if (lp->rx_head == lp->rx_tail)
1493		return 0;
1494
1495	p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1496	memcpy(buf, p, LDC_PACKET_SIZE);
1497
1498	new = rx_advance(lp, lp->rx_head);
1499	lp->rx_head = new;
1500
1501	err = __set_rx_head(lp, new);
1502	if (err < 0)
1503		err = -ECONNRESET;
1504	else
1505		err = LDC_PACKET_SIZE;
1506
1507	return err;
1508}
1509
1510static const struct ldc_mode_ops raw_ops = {
1511	.write		=	write_raw,
1512	.read		=	read_raw,
1513};
1514
1515static int write_nonraw(struct ldc_channel *lp, const void *buf,
1516			unsigned int size)
1517{
1518	unsigned long hv_err, tail;
1519	unsigned int copied;
1520	u32 seq;
1521	int err;
1522
1523	hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1524					&lp->chan_state);
1525	if (unlikely(hv_err))
1526		return -EBUSY;
1527
1528	if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1529		return ldc_abort(lp);
1530
1531	if (!tx_has_space_for(lp, size))
1532		return -EAGAIN;
1533
1534	seq = lp->snd_nxt;
1535	copied = 0;
1536	tail = lp->tx_tail;
1537	while (copied < size) {
1538		struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1539		u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1540			    p->u.u_data :
1541			    p->u.r.r_data);
1542		int data_len;
1543
1544		p->type = LDC_DATA;
1545		p->stype = LDC_INFO;
1546		p->ctrl = 0;
1547
1548		data_len = size - copied;
1549		if (data_len > lp->mss)
1550			data_len = lp->mss;
1551
1552		BUG_ON(data_len > LDC_LEN);
1553
1554		p->env = (data_len |
1555			  (copied == 0 ? LDC_START : 0) |
1556			  (data_len == size - copied ? LDC_STOP : 0));
1557
1558		p->seqid = ++seq;
1559
1560		ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1561		       p->type,
1562		       p->stype,
1563		       p->ctrl,
1564		       p->env,
1565		       p->seqid);
1566
1567		memcpy(data, buf, data_len);
1568		buf += data_len;
1569		copied += data_len;
1570
1571		tail = tx_advance(lp, tail);
1572	}
1573
1574	err = set_tx_tail(lp, tail);
1575	if (!err) {
1576		lp->snd_nxt = seq;
1577		err = size;
1578	}
1579
1580	return err;
1581}
1582
1583static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1584		      struct ldc_packet *first_frag)
1585{
1586	int err;
1587
1588	if (first_frag)
1589		lp->rcv_nxt = first_frag->seqid - 1;
1590
1591	err = send_data_nack(lp, p);
1592	if (err)
1593		return err;
1594
1595	err = __set_rx_head(lp, lp->rx_tail);
1596	if (err < 0)
1597		return ldc_abort(lp);
1598
1599	return 0;
1600}
1601
1602static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1603{
1604	if (p->stype & LDC_ACK) {
1605		int err = process_data_ack(lp, p);
1606		if (err)
1607			return err;
1608	}
1609	if (p->stype & LDC_NACK)
1610		return ldc_abort(lp);
1611
1612	return 0;
1613}
1614
1615static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1616{
1617	unsigned long dummy;
1618	int limit = 1000;
1619
1620	ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1621	       cur_head, lp->rx_head, lp->rx_tail);
1622	while (limit-- > 0) {
1623		unsigned long hv_err;
1624
1625		hv_err = sun4v_ldc_rx_get_state(lp->id,
1626						&dummy,
1627						&lp->rx_tail,
1628						&lp->chan_state);
1629		if (hv_err)
1630			return ldc_abort(lp);
1631
1632		if (lp->chan_state == LDC_CHANNEL_DOWN ||
1633		    lp->chan_state == LDC_CHANNEL_RESETTING)
1634			return -ECONNRESET;
1635
1636		if (cur_head != lp->rx_tail) {
1637			ldcdbg(DATA, "DATA WAIT DONE "
1638			       "head[%lx] tail[%lx] chan_state[%lx]\n",
1639			       dummy, lp->rx_tail, lp->chan_state);
1640			return 0;
1641		}
1642
1643		udelay(1);
1644	}
1645	return -EAGAIN;
1646}
1647
1648static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1649{
1650	int err = __set_rx_head(lp, head);
1651
1652	if (err < 0)
1653		return ldc_abort(lp);
1654
1655	lp->rx_head = head;
1656	return 0;
1657}
1658
1659static void send_data_ack(struct ldc_channel *lp)
1660{
1661	unsigned long new_tail;
1662	struct ldc_packet *p;
1663
1664	p = data_get_tx_packet(lp, &new_tail);
1665	if (likely(p)) {
1666		int err;
1667
1668		memset(p, 0, sizeof(*p));
1669		p->type = LDC_DATA;
1670		p->stype = LDC_ACK;
1671		p->ctrl = 0;
1672		p->seqid = lp->snd_nxt + 1;
1673		p->u.r.ackid = lp->rcv_nxt;
1674
1675		err = send_tx_packet(lp, p, new_tail);
1676		if (!err)
1677			lp->snd_nxt++;
1678	}
1679}
1680
1681static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1682{
1683	struct ldc_packet *first_frag;
1684	unsigned long hv_err, new;
1685	int err, copied;
1686
1687	hv_err = sun4v_ldc_rx_get_state(lp->id,
1688					&lp->rx_head,
1689					&lp->rx_tail,
1690					&lp->chan_state);
1691	if (hv_err)
1692		return ldc_abort(lp);
1693
1694	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1695	    lp->chan_state == LDC_CHANNEL_RESETTING)
1696		return -ECONNRESET;
1697
1698	if (lp->rx_head == lp->rx_tail)
1699		return 0;
1700
1701	first_frag = NULL;
1702	copied = err = 0;
1703	new = lp->rx_head;
1704	while (1) {
1705		struct ldc_packet *p;
1706		int pkt_len;
1707
1708		BUG_ON(new == lp->rx_tail);
1709		p = lp->rx_base + (new / LDC_PACKET_SIZE);
1710
1711		ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1712		       "rcv_nxt[%08x]\n",
1713		       p->type,
1714		       p->stype,
1715		       p->ctrl,
1716		       p->env,
1717		       p->seqid,
1718		       p->u.r.ackid,
1719		       lp->rcv_nxt);
1720
1721		if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1722			err = rx_bad_seq(lp, p, first_frag);
1723			copied = 0;
1724			break;
1725		}
1726
1727		if (p->type & LDC_CTRL) {
1728			err = process_control_frame(lp, p);
1729			if (err < 0)
1730				break;
1731			err = 0;
1732		}
1733
1734		lp->rcv_nxt = p->seqid;
1735
1736		if (!(p->type & LDC_DATA)) {
1737			new = rx_advance(lp, new);
1738			goto no_data;
1739		}
1740		if (p->stype & (LDC_ACK | LDC_NACK)) {
1741			err = data_ack_nack(lp, p);
1742			if (err)
1743				break;
1744		}
1745		if (!(p->stype & LDC_INFO)) {
1746			new = rx_advance(lp, new);
1747			err = rx_set_head(lp, new);
1748			if (err)
1749				break;
1750			goto no_data;
1751		}
1752
1753		pkt_len = p->env & LDC_LEN;
1754
1755		/* Every initial packet starts with the START bit set.
1756		 *
1757		 * Singleton packets will have both START+STOP set.
1758		 *
1759		 * Fragments will have START set in the first frame, STOP
1760		 * set in the last frame, and neither bit set in middle
1761		 * frames of the packet.
1762		 *
1763		 * Therefore if we are at the beginning of a packet and
1764		 * we don't see START, or we are in the middle of a fragmented
1765		 * packet and do see START, we are unsynchronized and should
1766		 * flush the RX queue.
1767		 */
1768		if ((first_frag == NULL && !(p->env & LDC_START)) ||
1769		    (first_frag != NULL &&  (p->env & LDC_START))) {
1770			if (!first_frag)
1771				new = rx_advance(lp, new);
1772
1773			err = rx_set_head(lp, new);
1774			if (err)
1775				break;
1776
1777			if (!first_frag)
1778				goto no_data;
1779		}
1780		if (!first_frag)
1781			first_frag = p;
1782
1783		if (pkt_len > size - copied) {
1784			/* User didn't give us a big enough buffer,
1785			 * what to do?  This is a pretty serious error.
1786			 *
1787			 * Since we haven't updated the RX ring head to
1788			 * consume any of the packets, signal the error
1789			 * to the user and just leave the RX ring alone.
1790			 *
1791			 * This seems the best behavior because this allows
1792			 * a user of the LDC layer to start with a small
1793			 * RX buffer for ldc_read() calls and use -EMSGSIZE
1794			 * as a cue to enlarge it's read buffer.
1795			 */
1796			err = -EMSGSIZE;
1797			break;
1798		}
1799
1800		/* Ok, we are gonna eat this one.  */
1801		new = rx_advance(lp, new);
1802
1803		memcpy(buf,
1804		       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1805			p->u.u_data : p->u.r.r_data), pkt_len);
1806		buf += pkt_len;
1807		copied += pkt_len;
1808
1809		if (p->env & LDC_STOP)
1810			break;
1811
1812no_data:
1813		if (new == lp->rx_tail) {
1814			err = rx_data_wait(lp, new);
1815			if (err)
1816				break;
1817		}
1818	}
1819
1820	if (!err)
1821		err = rx_set_head(lp, new);
1822
1823	if (err && first_frag)
1824		lp->rcv_nxt = first_frag->seqid - 1;
1825
1826	if (!err) {
1827		err = copied;
1828		if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1829			send_data_ack(lp);
1830	}
1831
1832	return err;
1833}
1834
1835static const struct ldc_mode_ops nonraw_ops = {
1836	.write		=	write_nonraw,
1837	.read		=	read_nonraw,
1838};
1839
1840static int write_stream(struct ldc_channel *lp, const void *buf,
1841			unsigned int size)
1842{
1843	if (size > lp->cfg.mtu)
1844		size = lp->cfg.mtu;
1845	return write_nonraw(lp, buf, size);
1846}
1847
1848static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1849{
1850	if (!lp->mssbuf_len) {
1851		int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1852		if (err < 0)
1853			return err;
1854
1855		lp->mssbuf_len = err;
1856		lp->mssbuf_off = 0;
1857	}
1858
1859	if (size > lp->mssbuf_len)
1860		size = lp->mssbuf_len;
1861	memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1862
1863	lp->mssbuf_off += size;
1864	lp->mssbuf_len -= size;
1865
1866	return size;
1867}
1868
1869static const struct ldc_mode_ops stream_ops = {
1870	.write		=	write_stream,
1871	.read		=	read_stream,
1872};
1873
1874int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1875{
1876	unsigned long flags;
1877	int err;
1878
1879	if (!buf)
1880		return -EINVAL;
1881
1882	if (!size)
1883		return 0;
1884
1885	spin_lock_irqsave(&lp->lock, flags);
1886
1887	if (lp->hs_state != LDC_HS_COMPLETE)
1888		err = -ENOTCONN;
1889	else
1890		err = lp->mops->write(lp, buf, size);
1891
1892	spin_unlock_irqrestore(&lp->lock, flags);
1893
1894	return err;
1895}
1896EXPORT_SYMBOL(ldc_write);
1897
1898int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1899{
1900	unsigned long flags;
1901	int err;
1902
1903	if (!buf)
1904		return -EINVAL;
1905
1906	if (!size)
1907		return 0;
1908
1909	spin_lock_irqsave(&lp->lock, flags);
1910
1911	if (lp->hs_state != LDC_HS_COMPLETE)
1912		err = -ENOTCONN;
1913	else
1914		err = lp->mops->read(lp, buf, size);
1915
1916	spin_unlock_irqrestore(&lp->lock, flags);
1917
1918	return err;
1919}
1920EXPORT_SYMBOL(ldc_read);
1921
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1922static u64 pagesize_code(void)
1923{
1924	switch (PAGE_SIZE) {
1925	default:
1926	case (8ULL * 1024ULL):
1927		return 0;
1928	case (64ULL * 1024ULL):
1929		return 1;
1930	case (512ULL * 1024ULL):
1931		return 2;
1932	case (4ULL * 1024ULL * 1024ULL):
1933		return 3;
1934	case (32ULL * 1024ULL * 1024ULL):
1935		return 4;
1936	case (256ULL * 1024ULL * 1024ULL):
1937		return 5;
1938	}
1939}
1940
1941static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1942{
1943	return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1944		(index << PAGE_SHIFT) |
1945		page_offset);
1946}
1947
 
 
 
 
 
 
 
 
 
 
1948
1949static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1950					     unsigned long npages)
1951{
1952	long entry;
1953
1954	entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
1955				      npages, NULL, (unsigned long)-1, 0);
1956	if (unlikely(entry == IOMMU_ERROR_CODE))
1957		return NULL;
1958
1959	return iommu->page_table + entry;
1960}
1961
1962static u64 perm_to_mte(unsigned int map_perm)
1963{
1964	u64 mte_base;
1965
1966	mte_base = pagesize_code();
1967
1968	if (map_perm & LDC_MAP_SHADOW) {
1969		if (map_perm & LDC_MAP_R)
1970			mte_base |= LDC_MTE_COPY_R;
1971		if (map_perm & LDC_MAP_W)
1972			mte_base |= LDC_MTE_COPY_W;
1973	}
1974	if (map_perm & LDC_MAP_DIRECT) {
1975		if (map_perm & LDC_MAP_R)
1976			mte_base |= LDC_MTE_READ;
1977		if (map_perm & LDC_MAP_W)
1978			mte_base |= LDC_MTE_WRITE;
1979		if (map_perm & LDC_MAP_X)
1980			mte_base |= LDC_MTE_EXEC;
1981	}
1982	if (map_perm & LDC_MAP_IO) {
1983		if (map_perm & LDC_MAP_R)
1984			mte_base |= LDC_MTE_IOMMU_R;
1985		if (map_perm & LDC_MAP_W)
1986			mte_base |= LDC_MTE_IOMMU_W;
1987	}
1988
1989	return mte_base;
1990}
1991
1992static int pages_in_region(unsigned long base, long len)
1993{
1994	int count = 0;
1995
1996	do {
1997		unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
1998
1999		len -= (new - base);
2000		base = new;
2001		count++;
2002	} while (len > 0);
2003
2004	return count;
2005}
2006
2007struct cookie_state {
2008	struct ldc_mtable_entry		*page_table;
2009	struct ldc_trans_cookie		*cookies;
2010	u64				mte_base;
2011	u64				prev_cookie;
2012	u32				pte_idx;
2013	u32				nc;
2014};
2015
2016static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2017			 unsigned long off, unsigned long len)
2018{
2019	do {
2020		unsigned long tlen, new = pa + PAGE_SIZE;
2021		u64 this_cookie;
2022
2023		sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2024
2025		tlen = PAGE_SIZE;
2026		if (off)
2027			tlen = PAGE_SIZE - off;
2028		if (tlen > len)
2029			tlen = len;
2030
2031		this_cookie = make_cookie(sp->pte_idx,
2032					  pagesize_code(), off);
2033
2034		off = 0;
2035
2036		if (this_cookie == sp->prev_cookie) {
2037			sp->cookies[sp->nc - 1].cookie_size += tlen;
2038		} else {
2039			sp->cookies[sp->nc].cookie_addr = this_cookie;
2040			sp->cookies[sp->nc].cookie_size = tlen;
2041			sp->nc++;
2042		}
2043		sp->prev_cookie = this_cookie + tlen;
2044
2045		sp->pte_idx++;
2046
2047		len -= tlen;
2048		pa = new;
2049	} while (len > 0);
2050}
2051
2052static int sg_count_one(struct scatterlist *sg)
2053{
2054	unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2055	long len = sg->length;
2056
2057	if ((sg->offset | len) & (8UL - 1))
2058		return -EFAULT;
2059
2060	return pages_in_region(base + sg->offset, len);
2061}
2062
2063static int sg_count_pages(struct scatterlist *sg, int num_sg)
2064{
2065	int count;
2066	int i;
2067
2068	count = 0;
2069	for (i = 0; i < num_sg; i++) {
2070		int err = sg_count_one(sg + i);
2071		if (err < 0)
2072			return err;
2073		count += err;
2074	}
2075
2076	return count;
2077}
2078
2079int ldc_map_sg(struct ldc_channel *lp,
2080	       struct scatterlist *sg, int num_sg,
2081	       struct ldc_trans_cookie *cookies, int ncookies,
2082	       unsigned int map_perm)
2083{
2084	unsigned long i, npages;
2085	struct ldc_mtable_entry *base;
2086	struct cookie_state state;
2087	struct ldc_iommu *iommu;
2088	int err;
2089	struct scatterlist *s;
2090
2091	if (map_perm & ~LDC_MAP_ALL)
2092		return -EINVAL;
2093
2094	err = sg_count_pages(sg, num_sg);
2095	if (err < 0)
2096		return err;
2097
2098	npages = err;
2099	if (err > ncookies)
2100		return -EMSGSIZE;
2101
2102	iommu = &lp->iommu;
2103
 
2104	base = alloc_npages(iommu, npages);
 
2105
2106	if (!base)
2107		return -ENOMEM;
2108
2109	state.page_table = iommu->page_table;
2110	state.cookies = cookies;
2111	state.mte_base = perm_to_mte(map_perm);
2112	state.prev_cookie = ~(u64)0;
2113	state.pte_idx = (base - iommu->page_table);
2114	state.nc = 0;
2115
2116	for_each_sg(sg, s, num_sg, i) {
2117		fill_cookies(&state, page_to_pfn(sg_page(s)) << PAGE_SHIFT,
2118			     s->offset, s->length);
2119	}
2120
2121	return state.nc;
2122}
2123EXPORT_SYMBOL(ldc_map_sg);
2124
2125int ldc_map_single(struct ldc_channel *lp,
2126		   void *buf, unsigned int len,
2127		   struct ldc_trans_cookie *cookies, int ncookies,
2128		   unsigned int map_perm)
2129{
2130	unsigned long npages, pa;
2131	struct ldc_mtable_entry *base;
2132	struct cookie_state state;
2133	struct ldc_iommu *iommu;
2134
2135	if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2136		return -EINVAL;
2137
2138	pa = __pa(buf);
2139	if ((pa | len) & (8UL - 1))
2140		return -EFAULT;
2141
2142	npages = pages_in_region(pa, len);
2143
2144	iommu = &lp->iommu;
2145
 
2146	base = alloc_npages(iommu, npages);
 
2147
2148	if (!base)
2149		return -ENOMEM;
2150
2151	state.page_table = iommu->page_table;
2152	state.cookies = cookies;
2153	state.mte_base = perm_to_mte(map_perm);
2154	state.prev_cookie = ~(u64)0;
2155	state.pte_idx = (base - iommu->page_table);
2156	state.nc = 0;
2157	fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2158	BUG_ON(state.nc > ncookies);
2159
2160	return state.nc;
2161}
2162EXPORT_SYMBOL(ldc_map_single);
2163
2164
2165static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2166			u64 cookie, u64 size)
2167{
2168	unsigned long npages, entry;
 
 
2169
2170	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
 
 
2171
2172	entry = ldc_cookie_to_index(cookie, iommu);
2173	ldc_demap(iommu, id, cookie, entry, npages);
2174	iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry);
 
 
 
 
 
 
 
2175}
2176
2177void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2178	       int ncookies)
2179{
2180	struct ldc_iommu *iommu = &lp->iommu;
 
2181	int i;
2182	unsigned long flags;
2183
2184	spin_lock_irqsave(&iommu->lock, flags);
2185	for (i = 0; i < ncookies; i++) {
2186		u64 addr = cookies[i].cookie_addr;
2187		u64 size = cookies[i].cookie_size;
2188
2189		free_npages(lp->id, iommu, addr, size);
2190	}
2191	spin_unlock_irqrestore(&iommu->lock, flags);
2192}
2193EXPORT_SYMBOL(ldc_unmap);
2194
2195int ldc_copy(struct ldc_channel *lp, int copy_dir,
2196	     void *buf, unsigned int len, unsigned long offset,
2197	     struct ldc_trans_cookie *cookies, int ncookies)
2198{
2199	unsigned int orig_len;
2200	unsigned long ra;
2201	int i;
2202
2203	if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2204		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2205		       lp->id, copy_dir);
2206		return -EINVAL;
2207	}
2208
2209	ra = __pa(buf);
2210	if ((ra | len | offset) & (8UL - 1)) {
2211		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2212		       "ra[%lx] len[%x] offset[%lx]\n",
2213		       lp->id, ra, len, offset);
2214		return -EFAULT;
2215	}
2216
2217	if (lp->hs_state != LDC_HS_COMPLETE ||
2218	    (lp->flags & LDC_FLAG_RESET)) {
2219		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2220		       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2221		return -ECONNRESET;
2222	}
2223
2224	orig_len = len;
2225	for (i = 0; i < ncookies; i++) {
2226		unsigned long cookie_raddr = cookies[i].cookie_addr;
2227		unsigned long this_len = cookies[i].cookie_size;
2228		unsigned long actual_len;
2229
2230		if (unlikely(offset)) {
2231			unsigned long this_off = offset;
2232
2233			if (this_off > this_len)
2234				this_off = this_len;
2235
2236			offset -= this_off;
2237			this_len -= this_off;
2238			if (!this_len)
2239				continue;
2240			cookie_raddr += this_off;
2241		}
2242
2243		if (this_len > len)
2244			this_len = len;
2245
2246		while (1) {
2247			unsigned long hv_err;
2248
2249			hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2250						cookie_raddr, ra,
2251						this_len, &actual_len);
2252			if (unlikely(hv_err)) {
2253				printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2254				       "HV error %lu\n",
2255				       lp->id, hv_err);
2256				if (lp->hs_state != LDC_HS_COMPLETE ||
2257				    (lp->flags & LDC_FLAG_RESET))
2258					return -ECONNRESET;
2259				else
2260					return -EFAULT;
2261			}
2262
2263			cookie_raddr += actual_len;
2264			ra += actual_len;
2265			len -= actual_len;
2266			if (actual_len == this_len)
2267				break;
2268
2269			this_len -= actual_len;
2270		}
2271
2272		if (!len)
2273			break;
2274	}
2275
2276	/* It is caller policy what to do about short copies.
2277	 * For example, a networking driver can declare the
2278	 * packet a runt and drop it.
2279	 */
2280
2281	return orig_len - len;
2282}
2283EXPORT_SYMBOL(ldc_copy);
2284
2285void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2286			  struct ldc_trans_cookie *cookies, int *ncookies,
2287			  unsigned int map_perm)
2288{
2289	void *buf;
2290	int err;
2291
2292	if (len & (8UL - 1))
2293		return ERR_PTR(-EINVAL);
2294
2295	buf = kzalloc(len, GFP_ATOMIC);
2296	if (!buf)
2297		return ERR_PTR(-ENOMEM);
2298
2299	err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2300	if (err < 0) {
2301		kfree(buf);
2302		return ERR_PTR(err);
2303	}
2304	*ncookies = err;
2305
2306	return buf;
2307}
2308EXPORT_SYMBOL(ldc_alloc_exp_dring);
2309
2310void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2311			struct ldc_trans_cookie *cookies, int ncookies)
2312{
2313	ldc_unmap(lp, cookies, ncookies);
2314	kfree(buf);
2315}
2316EXPORT_SYMBOL(ldc_free_exp_dring);
2317
2318static int __init ldc_init(void)
2319{
2320	unsigned long major, minor;
2321	struct mdesc_handle *hp;
2322	const u64 *v;
2323	int err;
2324	u64 mp;
2325
2326	hp = mdesc_grab();
2327	if (!hp)
2328		return -ENODEV;
2329
2330	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2331	err = -ENODEV;
2332	if (mp == MDESC_NODE_NULL)
2333		goto out;
2334
2335	v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2336	if (!v)
2337		goto out;
2338
2339	major = 1;
2340	minor = 0;
2341	if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2342		printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2343		goto out;
2344	}
2345
2346	printk(KERN_INFO "%s", version);
2347
2348	if (!*v) {
2349		printk(KERN_INFO PFX "Domaining disabled.\n");
2350		goto out;
2351	}
2352	ldom_domaining_enabled = 1;
2353	err = 0;
2354
2355out:
2356	mdesc_release(hp);
2357	return err;
2358}
2359
2360core_initcall(ldc_init);