Linux Audio

Check our new training course

Loading...
v4.17
   1// SPDX-License-Identifier: GPL-2.0
   2/* ldc.c: Logical Domain Channel link-layer protocol driver.
   3 *
   4 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
   5 */
   6
   7#include <linux/kernel.h>
   8#include <linux/export.h>
   9#include <linux/slab.h>
  10#include <linux/spinlock.h>
  11#include <linux/delay.h>
  12#include <linux/errno.h>
  13#include <linux/string.h>
  14#include <linux/scatterlist.h>
  15#include <linux/interrupt.h>
  16#include <linux/list.h>
  17#include <linux/init.h>
  18#include <linux/bitmap.h>
  19#include <linux/iommu-common.h>
  20
  21#include <asm/hypervisor.h>
  22#include <asm/iommu.h>
  23#include <asm/page.h>
  24#include <asm/ldc.h>
  25#include <asm/mdesc.h>
  26
  27#define DRV_MODULE_NAME		"ldc"
  28#define PFX DRV_MODULE_NAME	": "
  29#define DRV_MODULE_VERSION	"1.1"
  30#define DRV_MODULE_RELDATE	"July 22, 2008"
  31
  32#define COOKIE_PGSZ_CODE	0xf000000000000000ULL
  33#define COOKIE_PGSZ_CODE_SHIFT	60ULL
  34
  35
  36static char version[] =
  37	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
 
  38
  39/* Packet header layout for unreliable and reliable mode frames.
  40 * When in RAW mode, packets are simply straight 64-byte payloads
  41 * with no headers.
  42 */
  43struct ldc_packet {
  44	u8			type;
  45#define LDC_CTRL		0x01
  46#define LDC_DATA		0x02
  47#define LDC_ERR			0x10
  48
  49	u8			stype;
  50#define LDC_INFO		0x01
  51#define LDC_ACK			0x02
  52#define LDC_NACK		0x04
  53
  54	u8			ctrl;
  55#define LDC_VERS		0x01 /* Link Version		*/
  56#define LDC_RTS			0x02 /* Request To Send		*/
  57#define LDC_RTR			0x03 /* Ready To Receive	*/
  58#define LDC_RDX			0x04 /* Ready for Data eXchange	*/
  59#define LDC_CTRL_MSK		0x0f
  60
  61	u8			env;
  62#define LDC_LEN			0x3f
  63#define LDC_FRAG_MASK		0xc0
  64#define LDC_START		0x40
  65#define LDC_STOP		0x80
  66
  67	u32			seqid;
  68
  69	union {
  70		u8		u_data[LDC_PACKET_SIZE - 8];
  71		struct {
  72			u32	pad;
  73			u32	ackid;
  74			u8	r_data[LDC_PACKET_SIZE - 8 - 8];
  75		} r;
  76	} u;
  77};
  78
  79struct ldc_version {
  80	u16 major;
  81	u16 minor;
  82};
  83
  84/* Ordered from largest major to lowest.  */
  85static struct ldc_version ver_arr[] = {
  86	{ .major = 1, .minor = 0 },
  87};
  88
  89#define LDC_DEFAULT_MTU			(4 * LDC_PACKET_SIZE)
  90#define LDC_DEFAULT_NUM_ENTRIES		(PAGE_SIZE / LDC_PACKET_SIZE)
  91
  92struct ldc_channel;
  93
  94struct ldc_mode_ops {
  95	int (*write)(struct ldc_channel *, const void *, unsigned int);
  96	int (*read)(struct ldc_channel *, void *, unsigned int);
  97};
  98
  99static const struct ldc_mode_ops raw_ops;
 100static const struct ldc_mode_ops nonraw_ops;
 101static const struct ldc_mode_ops stream_ops;
 102
 103int ldom_domaining_enabled;
 104
 105struct ldc_iommu {
 106	/* Protects ldc_unmap.  */
 107	spinlock_t			lock;
 
 108	struct ldc_mtable_entry		*page_table;
 109	struct iommu_map_table		iommu_map_table;
 110};
 111
 112struct ldc_channel {
 113	/* Protects all operations that depend upon channel state.  */
 114	spinlock_t			lock;
 115
 116	unsigned long			id;
 117
 118	u8				*mssbuf;
 119	u32				mssbuf_len;
 120	u32				mssbuf_off;
 121
 122	struct ldc_packet		*tx_base;
 123	unsigned long			tx_head;
 124	unsigned long			tx_tail;
 125	unsigned long			tx_num_entries;
 126	unsigned long			tx_ra;
 127
 128	unsigned long			tx_acked;
 129
 130	struct ldc_packet		*rx_base;
 131	unsigned long			rx_head;
 132	unsigned long			rx_tail;
 133	unsigned long			rx_num_entries;
 134	unsigned long			rx_ra;
 135
 136	u32				rcv_nxt;
 137	u32				snd_nxt;
 138
 139	unsigned long			chan_state;
 140
 141	struct ldc_channel_config	cfg;
 142	void				*event_arg;
 143
 144	const struct ldc_mode_ops	*mops;
 145
 146	struct ldc_iommu		iommu;
 147
 148	struct ldc_version		ver;
 149
 150	u8				hs_state;
 151#define LDC_HS_CLOSED			0x00
 152#define LDC_HS_OPEN			0x01
 153#define LDC_HS_GOTVERS			0x02
 154#define LDC_HS_SENTRTR			0x03
 155#define LDC_HS_GOTRTR			0x04
 156#define LDC_HS_COMPLETE			0x10
 157
 158	u8				flags;
 159#define LDC_FLAG_ALLOCED_QUEUES		0x01
 160#define LDC_FLAG_REGISTERED_QUEUES	0x02
 161#define LDC_FLAG_REGISTERED_IRQS	0x04
 162#define LDC_FLAG_RESET			0x10
 163
 164	u8				mss;
 165	u8				state;
 166
 167#define LDC_IRQ_NAME_MAX		32
 168	char				rx_irq_name[LDC_IRQ_NAME_MAX];
 169	char				tx_irq_name[LDC_IRQ_NAME_MAX];
 170
 171	struct hlist_head		mh_list;
 172
 173	struct hlist_node		list;
 174};
 175
 176#define ldcdbg(TYPE, f, a...) \
 177do {	if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
 178		printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
 179} while (0)
 180
 181#define	LDC_ABORT(lp)	ldc_abort((lp), __func__)
 182
 183static const char *state_to_str(u8 state)
 184{
 185	switch (state) {
 186	case LDC_STATE_INVALID:
 187		return "INVALID";
 188	case LDC_STATE_INIT:
 189		return "INIT";
 190	case LDC_STATE_BOUND:
 191		return "BOUND";
 192	case LDC_STATE_READY:
 193		return "READY";
 194	case LDC_STATE_CONNECTED:
 195		return "CONNECTED";
 196	default:
 197		return "<UNKNOWN>";
 198	}
 199}
 200
 
 
 
 
 
 
 
 
 
 201static unsigned long __advance(unsigned long off, unsigned long num_entries)
 202{
 203	off += LDC_PACKET_SIZE;
 204	if (off == (num_entries * LDC_PACKET_SIZE))
 205		off = 0;
 206
 207	return off;
 208}
 209
 210static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
 211{
 212	return __advance(off, lp->rx_num_entries);
 213}
 214
 215static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
 216{
 217	return __advance(off, lp->tx_num_entries);
 218}
 219
 220static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
 221						  unsigned long *new_tail)
 222{
 223	struct ldc_packet *p;
 224	unsigned long t;
 225
 226	t = tx_advance(lp, lp->tx_tail);
 227	if (t == lp->tx_head)
 228		return NULL;
 229
 230	*new_tail = t;
 231
 232	p = lp->tx_base;
 233	return p + (lp->tx_tail / LDC_PACKET_SIZE);
 234}
 235
 236/* When we are in reliable or stream mode, have to track the next packet
 237 * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
 238 * to be careful not to stomp over the queue past that point.  During
 239 * the handshake, we don't have TX data packets pending in the queue
 240 * and that's why handshake_get_tx_packet() need not be mindful of
 241 * lp->tx_acked.
 242 */
 243static unsigned long head_for_data(struct ldc_channel *lp)
 244{
 245	if (lp->cfg.mode == LDC_MODE_STREAM)
 246		return lp->tx_acked;
 247	return lp->tx_head;
 248}
 249
 250static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
 251{
 252	unsigned long limit, tail, new_tail, diff;
 253	unsigned int mss;
 254
 255	limit = head_for_data(lp);
 256	tail = lp->tx_tail;
 257	new_tail = tx_advance(lp, tail);
 258	if (new_tail == limit)
 259		return 0;
 260
 261	if (limit > new_tail)
 262		diff = limit - new_tail;
 263	else
 264		diff = (limit +
 265			((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
 266	diff /= LDC_PACKET_SIZE;
 267	mss = lp->mss;
 268
 269	if (diff * mss < size)
 270		return 0;
 271
 272	return 1;
 273}
 274
 275static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
 276					     unsigned long *new_tail)
 277{
 278	struct ldc_packet *p;
 279	unsigned long h, t;
 280
 281	h = head_for_data(lp);
 282	t = tx_advance(lp, lp->tx_tail);
 283	if (t == h)
 284		return NULL;
 285
 286	*new_tail = t;
 287
 288	p = lp->tx_base;
 289	return p + (lp->tx_tail / LDC_PACKET_SIZE);
 290}
 291
 292static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
 293{
 294	unsigned long orig_tail = lp->tx_tail;
 295	int limit = 1000;
 296
 297	lp->tx_tail = tail;
 298	while (limit-- > 0) {
 299		unsigned long err;
 300
 301		err = sun4v_ldc_tx_set_qtail(lp->id, tail);
 302		if (!err)
 303			return 0;
 304
 305		if (err != HV_EWOULDBLOCK) {
 306			lp->tx_tail = orig_tail;
 307			return -EINVAL;
 308		}
 309		udelay(1);
 310	}
 311
 312	lp->tx_tail = orig_tail;
 313	return -EBUSY;
 314}
 315
 316/* This just updates the head value in the hypervisor using
 317 * a polling loop with a timeout.  The caller takes care of
 318 * upating software state representing the head change, if any.
 319 */
 320static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
 321{
 322	int limit = 1000;
 323
 324	while (limit-- > 0) {
 325		unsigned long err;
 326
 327		err = sun4v_ldc_rx_set_qhead(lp->id, head);
 328		if (!err)
 329			return 0;
 330
 331		if (err != HV_EWOULDBLOCK)
 332			return -EINVAL;
 333
 334		udelay(1);
 335	}
 336
 337	return -EBUSY;
 338}
 339
 340static int send_tx_packet(struct ldc_channel *lp,
 341			  struct ldc_packet *p,
 342			  unsigned long new_tail)
 343{
 344	BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
 345
 346	return set_tx_tail(lp, new_tail);
 347}
 348
 349static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
 350						 u8 stype, u8 ctrl,
 351						 void *data, int dlen,
 352						 unsigned long *new_tail)
 353{
 354	struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
 355
 356	if (p) {
 357		memset(p, 0, sizeof(*p));
 358		p->type = LDC_CTRL;
 359		p->stype = stype;
 360		p->ctrl = ctrl;
 361		if (data)
 362			memcpy(p->u.u_data, data, dlen);
 363	}
 364	return p;
 365}
 366
 367static int start_handshake(struct ldc_channel *lp)
 368{
 369	struct ldc_packet *p;
 370	struct ldc_version *ver;
 371	unsigned long new_tail;
 372
 373	ver = &ver_arr[0];
 374
 375	ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
 376	       ver->major, ver->minor);
 377
 378	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 379				   ver, sizeof(*ver), &new_tail);
 380	if (p) {
 381		int err = send_tx_packet(lp, p, new_tail);
 382		if (!err)
 383			lp->flags &= ~LDC_FLAG_RESET;
 384		return err;
 385	}
 386	return -EBUSY;
 387}
 388
 389static int send_version_nack(struct ldc_channel *lp,
 390			     u16 major, u16 minor)
 391{
 392	struct ldc_packet *p;
 393	struct ldc_version ver;
 394	unsigned long new_tail;
 395
 396	ver.major = major;
 397	ver.minor = minor;
 398
 399	p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
 400				   &ver, sizeof(ver), &new_tail);
 401	if (p) {
 402		ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
 403		       ver.major, ver.minor);
 404
 405		return send_tx_packet(lp, p, new_tail);
 406	}
 407	return -EBUSY;
 408}
 409
 410static int send_version_ack(struct ldc_channel *lp,
 411			    struct ldc_version *vp)
 412{
 413	struct ldc_packet *p;
 414	unsigned long new_tail;
 415
 416	p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
 417				   vp, sizeof(*vp), &new_tail);
 418	if (p) {
 419		ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
 420		       vp->major, vp->minor);
 421
 422		return send_tx_packet(lp, p, new_tail);
 423	}
 424	return -EBUSY;
 425}
 426
 427static int send_rts(struct ldc_channel *lp)
 428{
 429	struct ldc_packet *p;
 430	unsigned long new_tail;
 431
 432	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
 433				   &new_tail);
 434	if (p) {
 435		p->env = lp->cfg.mode;
 436		p->seqid = 0;
 437		lp->rcv_nxt = 0;
 438
 439		ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
 440		       p->env, p->seqid);
 441
 442		return send_tx_packet(lp, p, new_tail);
 443	}
 444	return -EBUSY;
 445}
 446
 447static int send_rtr(struct ldc_channel *lp)
 448{
 449	struct ldc_packet *p;
 450	unsigned long new_tail;
 451
 452	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
 453				   &new_tail);
 454	if (p) {
 455		p->env = lp->cfg.mode;
 456		p->seqid = 0;
 457
 458		ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
 459		       p->env, p->seqid);
 460
 461		return send_tx_packet(lp, p, new_tail);
 462	}
 463	return -EBUSY;
 464}
 465
 466static int send_rdx(struct ldc_channel *lp)
 467{
 468	struct ldc_packet *p;
 469	unsigned long new_tail;
 470
 471	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
 472				   &new_tail);
 473	if (p) {
 474		p->env = 0;
 475		p->seqid = ++lp->snd_nxt;
 476		p->u.r.ackid = lp->rcv_nxt;
 477
 478		ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
 479		       p->env, p->seqid, p->u.r.ackid);
 480
 481		return send_tx_packet(lp, p, new_tail);
 482	}
 483	return -EBUSY;
 484}
 485
 486static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
 487{
 488	struct ldc_packet *p;
 489	unsigned long new_tail;
 490	int err;
 491
 492	p = data_get_tx_packet(lp, &new_tail);
 493	if (!p)
 494		return -EBUSY;
 495	memset(p, 0, sizeof(*p));
 496	p->type = data_pkt->type;
 497	p->stype = LDC_NACK;
 498	p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
 499	p->seqid = lp->snd_nxt + 1;
 500	p->u.r.ackid = lp->rcv_nxt;
 501
 502	ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
 503	       p->type, p->ctrl, p->seqid, p->u.r.ackid);
 504
 505	err = send_tx_packet(lp, p, new_tail);
 506	if (!err)
 507		lp->snd_nxt++;
 508
 509	return err;
 510}
 511
 512static int ldc_abort(struct ldc_channel *lp, const char *msg)
 513{
 514	unsigned long hv_err;
 515
 516	ldcdbg(STATE, "ABORT[%s]\n", msg);
 517	ldc_print(lp);
 518
 519	/* We report but do not act upon the hypervisor errors because
 520	 * there really isn't much we can do if they fail at this point.
 521	 */
 522	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
 523	if (hv_err)
 524		printk(KERN_ERR PFX "ldc_abort: "
 525		       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 526		       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
 527
 528	hv_err = sun4v_ldc_tx_get_state(lp->id,
 529					&lp->tx_head,
 530					&lp->tx_tail,
 531					&lp->chan_state);
 532	if (hv_err)
 533		printk(KERN_ERR PFX "ldc_abort: "
 534		       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
 535		       lp->id, hv_err);
 536
 537	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
 538	if (hv_err)
 539		printk(KERN_ERR PFX "ldc_abort: "
 540		       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 541		       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
 542
 543	/* Refetch the RX queue state as well, because we could be invoked
 544	 * here in the queue processing context.
 545	 */
 546	hv_err = sun4v_ldc_rx_get_state(lp->id,
 547					&lp->rx_head,
 548					&lp->rx_tail,
 549					&lp->chan_state);
 550	if (hv_err)
 551		printk(KERN_ERR PFX "ldc_abort: "
 552		       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
 553		       lp->id, hv_err);
 554
 555	return -ECONNRESET;
 556}
 557
 558static struct ldc_version *find_by_major(u16 major)
 559{
 560	struct ldc_version *ret = NULL;
 561	int i;
 562
 563	for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
 564		struct ldc_version *v = &ver_arr[i];
 565		if (v->major <= major) {
 566			ret = v;
 567			break;
 568		}
 569	}
 570	return ret;
 571}
 572
 573static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
 574{
 575	struct ldc_version *vap;
 576	int err;
 577
 578	ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
 579	       vp->major, vp->minor);
 580
 581	if (lp->hs_state == LDC_HS_GOTVERS) {
 582		lp->hs_state = LDC_HS_OPEN;
 583		memset(&lp->ver, 0, sizeof(lp->ver));
 584	}
 585
 586	vap = find_by_major(vp->major);
 587	if (!vap) {
 588		err = send_version_nack(lp, 0, 0);
 589	} else if (vap->major != vp->major) {
 590		err = send_version_nack(lp, vap->major, vap->minor);
 591	} else {
 592		struct ldc_version ver = *vp;
 593		if (ver.minor > vap->minor)
 594			ver.minor = vap->minor;
 595		err = send_version_ack(lp, &ver);
 596		if (!err) {
 597			lp->ver = ver;
 598			lp->hs_state = LDC_HS_GOTVERS;
 599		}
 600	}
 601	if (err)
 602		return LDC_ABORT(lp);
 603
 604	return 0;
 605}
 606
 607static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
 608{
 609	ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
 610	       vp->major, vp->minor);
 611
 612	if (lp->hs_state == LDC_HS_GOTVERS) {
 613		if (lp->ver.major != vp->major ||
 614		    lp->ver.minor != vp->minor)
 615			return LDC_ABORT(lp);
 616	} else {
 617		lp->ver = *vp;
 618		lp->hs_state = LDC_HS_GOTVERS;
 619	}
 620	if (send_rts(lp))
 621		return LDC_ABORT(lp);
 622	return 0;
 623}
 624
 625static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
 626{
 627	struct ldc_version *vap;
 628	struct ldc_packet *p;
 629	unsigned long new_tail;
 630
 631	if (vp->major == 0 && vp->minor == 0)
 632		return LDC_ABORT(lp);
 633
 634	vap = find_by_major(vp->major);
 635	if (!vap)
 636		return LDC_ABORT(lp);
 637
 638	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 639					   vap, sizeof(*vap),
 640					   &new_tail);
 641	if (!p)
 642		return LDC_ABORT(lp);
 643
 644	return send_tx_packet(lp, p, new_tail);
 645}
 646
 647static int process_version(struct ldc_channel *lp,
 648			   struct ldc_packet *p)
 649{
 650	struct ldc_version *vp;
 651
 652	vp = (struct ldc_version *) p->u.u_data;
 653
 654	switch (p->stype) {
 655	case LDC_INFO:
 656		return process_ver_info(lp, vp);
 657
 658	case LDC_ACK:
 659		return process_ver_ack(lp, vp);
 660
 661	case LDC_NACK:
 662		return process_ver_nack(lp, vp);
 663
 664	default:
 665		return LDC_ABORT(lp);
 666	}
 667}
 668
 669static int process_rts(struct ldc_channel *lp,
 670		       struct ldc_packet *p)
 671{
 672	ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
 673	       p->stype, p->seqid, p->env);
 674
 675	if (p->stype     != LDC_INFO	   ||
 676	    lp->hs_state != LDC_HS_GOTVERS ||
 677	    p->env       != lp->cfg.mode)
 678		return LDC_ABORT(lp);
 679
 680	lp->snd_nxt = p->seqid;
 681	lp->rcv_nxt = p->seqid;
 682	lp->hs_state = LDC_HS_SENTRTR;
 683	if (send_rtr(lp))
 684		return LDC_ABORT(lp);
 685
 686	return 0;
 687}
 688
 689static int process_rtr(struct ldc_channel *lp,
 690		       struct ldc_packet *p)
 691{
 692	ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
 693	       p->stype, p->seqid, p->env);
 694
 695	if (p->stype     != LDC_INFO ||
 696	    p->env       != lp->cfg.mode)
 697		return LDC_ABORT(lp);
 698
 699	lp->snd_nxt = p->seqid;
 700	lp->hs_state = LDC_HS_COMPLETE;
 701	ldc_set_state(lp, LDC_STATE_CONNECTED);
 702	send_rdx(lp);
 703
 704	return LDC_EVENT_UP;
 705}
 706
 707static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
 708{
 709	return lp->rcv_nxt + 1 == seqid;
 710}
 711
 712static int process_rdx(struct ldc_channel *lp,
 713		       struct ldc_packet *p)
 714{
 715	ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
 716	       p->stype, p->seqid, p->env, p->u.r.ackid);
 717
 718	if (p->stype != LDC_INFO ||
 719	    !(rx_seq_ok(lp, p->seqid)))
 720		return LDC_ABORT(lp);
 721
 722	lp->rcv_nxt = p->seqid;
 723
 724	lp->hs_state = LDC_HS_COMPLETE;
 725	ldc_set_state(lp, LDC_STATE_CONNECTED);
 726
 727	return LDC_EVENT_UP;
 728}
 729
 730static int process_control_frame(struct ldc_channel *lp,
 731				 struct ldc_packet *p)
 732{
 733	switch (p->ctrl) {
 734	case LDC_VERS:
 735		return process_version(lp, p);
 736
 737	case LDC_RTS:
 738		return process_rts(lp, p);
 739
 740	case LDC_RTR:
 741		return process_rtr(lp, p);
 742
 743	case LDC_RDX:
 744		return process_rdx(lp, p);
 745
 746	default:
 747		return LDC_ABORT(lp);
 748	}
 749}
 750
 751static int process_error_frame(struct ldc_channel *lp,
 752			       struct ldc_packet *p)
 753{
 754	return LDC_ABORT(lp);
 755}
 756
 757static int process_data_ack(struct ldc_channel *lp,
 758			    struct ldc_packet *ack)
 759{
 760	unsigned long head = lp->tx_acked;
 761	u32 ackid = ack->u.r.ackid;
 762
 763	while (1) {
 764		struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
 765
 766		head = tx_advance(lp, head);
 767
 768		if (p->seqid == ackid) {
 769			lp->tx_acked = head;
 770			return 0;
 771		}
 772		if (head == lp->tx_tail)
 773			return LDC_ABORT(lp);
 774	}
 775
 776	return 0;
 777}
 778
 779static void send_events(struct ldc_channel *lp, unsigned int event_mask)
 780{
 781	if (event_mask & LDC_EVENT_RESET)
 782		lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
 783	if (event_mask & LDC_EVENT_UP)
 784		lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
 785	if (event_mask & LDC_EVENT_DATA_READY)
 786		lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
 787}
 788
 789static irqreturn_t ldc_rx(int irq, void *dev_id)
 790{
 791	struct ldc_channel *lp = dev_id;
 792	unsigned long orig_state, flags;
 793	unsigned int event_mask;
 794
 795	spin_lock_irqsave(&lp->lock, flags);
 796
 797	orig_state = lp->chan_state;
 798
 799	/* We should probably check for hypervisor errors here and
 800	 * reset the LDC channel if we get one.
 801	 */
 802	sun4v_ldc_rx_get_state(lp->id,
 803			       &lp->rx_head,
 804			       &lp->rx_tail,
 805			       &lp->chan_state);
 806
 807	ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 808	       orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
 809
 810	event_mask = 0;
 811
 812	if (lp->cfg.mode == LDC_MODE_RAW &&
 813	    lp->chan_state == LDC_CHANNEL_UP) {
 814		lp->hs_state = LDC_HS_COMPLETE;
 815		ldc_set_state(lp, LDC_STATE_CONNECTED);
 816
 817		/*
 818		 * Generate an LDC_EVENT_UP event if the channel
 819		 * was not already up.
 820		 */
 821		if (orig_state != LDC_CHANNEL_UP) {
 822			event_mask |= LDC_EVENT_UP;
 823			orig_state = lp->chan_state;
 824		}
 825	}
 826
 827	/* If we are in reset state, flush the RX queue and ignore
 828	 * everything.
 829	 */
 830	if (lp->flags & LDC_FLAG_RESET) {
 831		(void) ldc_rx_reset(lp);
 832		goto out;
 833	}
 834
 835	/* Once we finish the handshake, we let the ldc_read()
 836	 * paths do all of the control frame and state management.
 837	 * Just trigger the callback.
 838	 */
 839	if (lp->hs_state == LDC_HS_COMPLETE) {
 840handshake_complete:
 841		if (lp->chan_state != orig_state) {
 842			unsigned int event = LDC_EVENT_RESET;
 843
 844			if (lp->chan_state == LDC_CHANNEL_UP)
 845				event = LDC_EVENT_UP;
 846
 847			event_mask |= event;
 848		}
 849		if (lp->rx_head != lp->rx_tail)
 850			event_mask |= LDC_EVENT_DATA_READY;
 851
 852		goto out;
 853	}
 854
 855	if (lp->chan_state != orig_state)
 856		goto out;
 857
 858	while (lp->rx_head != lp->rx_tail) {
 859		struct ldc_packet *p;
 860		unsigned long new;
 861		int err;
 862
 863		p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
 864
 865		switch (p->type) {
 866		case LDC_CTRL:
 867			err = process_control_frame(lp, p);
 868			if (err > 0)
 869				event_mask |= err;
 870			break;
 871
 872		case LDC_DATA:
 873			event_mask |= LDC_EVENT_DATA_READY;
 874			err = 0;
 875			break;
 876
 877		case LDC_ERR:
 878			err = process_error_frame(lp, p);
 879			break;
 880
 881		default:
 882			err = LDC_ABORT(lp);
 883			break;
 884		}
 885
 886		if (err < 0)
 887			break;
 888
 889		new = lp->rx_head;
 890		new += LDC_PACKET_SIZE;
 891		if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
 892			new = 0;
 893		lp->rx_head = new;
 894
 895		err = __set_rx_head(lp, new);
 896		if (err < 0) {
 897			(void) LDC_ABORT(lp);
 898			break;
 899		}
 900		if (lp->hs_state == LDC_HS_COMPLETE)
 901			goto handshake_complete;
 902	}
 903
 904out:
 905	spin_unlock_irqrestore(&lp->lock, flags);
 906
 907	send_events(lp, event_mask);
 908
 909	return IRQ_HANDLED;
 910}
 911
 912static irqreturn_t ldc_tx(int irq, void *dev_id)
 913{
 914	struct ldc_channel *lp = dev_id;
 915	unsigned long flags, orig_state;
 916	unsigned int event_mask = 0;
 917
 918	spin_lock_irqsave(&lp->lock, flags);
 919
 920	orig_state = lp->chan_state;
 921
 922	/* We should probably check for hypervisor errors here and
 923	 * reset the LDC channel if we get one.
 924	 */
 925	sun4v_ldc_tx_get_state(lp->id,
 926			       &lp->tx_head,
 927			       &lp->tx_tail,
 928			       &lp->chan_state);
 929
 930	ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 931	       orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
 932
 933	if (lp->cfg.mode == LDC_MODE_RAW &&
 934	    lp->chan_state == LDC_CHANNEL_UP) {
 935		lp->hs_state = LDC_HS_COMPLETE;
 936		ldc_set_state(lp, LDC_STATE_CONNECTED);
 937
 938		/*
 939		 * Generate an LDC_EVENT_UP event if the channel
 940		 * was not already up.
 941		 */
 942		if (orig_state != LDC_CHANNEL_UP) {
 943			event_mask |= LDC_EVENT_UP;
 944			orig_state = lp->chan_state;
 945		}
 946	}
 947
 948	spin_unlock_irqrestore(&lp->lock, flags);
 949
 950	send_events(lp, event_mask);
 951
 952	return IRQ_HANDLED;
 953}
 954
 955/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
 956 * XXX that addition and removal from the ldc_channel_list has
 957 * XXX atomicity, otherwise the __ldc_channel_exists() check is
 958 * XXX totally pointless as another thread can slip into ldc_alloc()
 959 * XXX and add a channel with the same ID.  There also needs to be
 960 * XXX a spinlock for ldc_channel_list.
 961 */
 962static HLIST_HEAD(ldc_channel_list);
 963
 964static int __ldc_channel_exists(unsigned long id)
 965{
 966	struct ldc_channel *lp;
 967
 968	hlist_for_each_entry(lp, &ldc_channel_list, list) {
 969		if (lp->id == id)
 970			return 1;
 971	}
 972	return 0;
 973}
 974
 975static int alloc_queue(const char *name, unsigned long num_entries,
 976		       struct ldc_packet **base, unsigned long *ra)
 977{
 978	unsigned long size, order;
 979	void *q;
 980
 981	size = num_entries * LDC_PACKET_SIZE;
 982	order = get_order(size);
 983
 984	q = (void *) __get_free_pages(GFP_KERNEL, order);
 985	if (!q) {
 986		printk(KERN_ERR PFX "Alloc of %s queue failed with "
 987		       "size=%lu order=%lu\n", name, size, order);
 988		return -ENOMEM;
 989	}
 990
 991	memset(q, 0, PAGE_SIZE << order);
 992
 993	*base = q;
 994	*ra = __pa(q);
 995
 996	return 0;
 997}
 998
 999static void free_queue(unsigned long num_entries, struct ldc_packet *q)
1000{
1001	unsigned long size, order;
1002
1003	if (!q)
1004		return;
1005
1006	size = num_entries * LDC_PACKET_SIZE;
1007	order = get_order(size);
1008
1009	free_pages((unsigned long)q, order);
1010}
1011
1012static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
1013{
1014	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1015	/* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
1016
1017	cookie &= ~COOKIE_PGSZ_CODE;
1018
1019	return (cookie >> (13ULL + (szcode * 3ULL)));
1020}
1021
1022static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie,
1023		      unsigned long entry, unsigned long npages)
1024{
1025	struct ldc_mtable_entry *base;
1026	unsigned long i, shift;
1027
1028	shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
1029	base = iommu->page_table + entry;
1030	for (i = 0; i < npages; i++) {
1031		if (base->cookie)
1032			sun4v_ldc_revoke(id, cookie + (i << shift),
1033					 base->cookie);
1034		base->mte = 0;
1035	}
1036}
1037
1038/* XXX Make this configurable... XXX */
1039#define LDC_IOTABLE_SIZE	(8 * 1024)
1040
1041static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
1042{
1043	unsigned long sz, num_tsb_entries, tsbsize, order;
1044	struct ldc_iommu *ldc_iommu = &lp->iommu;
1045	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1046	struct ldc_mtable_entry *table;
1047	unsigned long hv_err;
1048	int err;
1049
1050	num_tsb_entries = LDC_IOTABLE_SIZE;
1051	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1052	spin_lock_init(&ldc_iommu->lock);
 
1053
1054	sz = num_tsb_entries / 8;
1055	sz = (sz + 7UL) & ~7UL;
1056	iommu->map = kzalloc(sz, GFP_KERNEL);
1057	if (!iommu->map) {
1058		printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1059		return -ENOMEM;
1060	}
1061	iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
1062			    NULL, false /* no large pool */,
1063			    1 /* npools */,
1064			    true /* skip span boundary check */);
1065
1066	order = get_order(tsbsize);
1067
1068	table = (struct ldc_mtable_entry *)
1069		__get_free_pages(GFP_KERNEL, order);
1070	err = -ENOMEM;
1071	if (!table) {
1072		printk(KERN_ERR PFX "Alloc of MTE table failed, "
1073		       "size=%lu order=%lu\n", tsbsize, order);
1074		goto out_free_map;
1075	}
1076
1077	memset(table, 0, PAGE_SIZE << order);
1078
1079	ldc_iommu->page_table = table;
1080
1081	hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1082					 num_tsb_entries);
1083	err = -EINVAL;
1084	if (hv_err)
1085		goto out_free_table;
1086
1087	return 0;
1088
1089out_free_table:
1090	free_pages((unsigned long) table, order);
1091	ldc_iommu->page_table = NULL;
1092
1093out_free_map:
1094	kfree(iommu->map);
1095	iommu->map = NULL;
1096
1097	return err;
1098}
1099
1100static void ldc_iommu_release(struct ldc_channel *lp)
1101{
1102	struct ldc_iommu *ldc_iommu = &lp->iommu;
1103	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1104	unsigned long num_tsb_entries, tsbsize, order;
1105
1106	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1107
1108	num_tsb_entries = iommu->poolsize * iommu->nr_pools;
1109	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1110	order = get_order(tsbsize);
1111
1112	free_pages((unsigned long) ldc_iommu->page_table, order);
1113	ldc_iommu->page_table = NULL;
1114
1115	kfree(iommu->map);
1116	iommu->map = NULL;
1117}
1118
1119struct ldc_channel *ldc_alloc(unsigned long id,
1120			      const struct ldc_channel_config *cfgp,
1121			      void *event_arg,
1122			      const char *name)
1123{
1124	struct ldc_channel *lp;
1125	const struct ldc_mode_ops *mops;
1126	unsigned long dummy1, dummy2, hv_err;
1127	u8 mss, *mssbuf;
1128	int err;
1129
1130	err = -ENODEV;
1131	if (!ldom_domaining_enabled)
1132		goto out_err;
1133
1134	err = -EINVAL;
1135	if (!cfgp)
1136		goto out_err;
1137	if (!name)
1138		goto out_err;
1139
1140	switch (cfgp->mode) {
1141	case LDC_MODE_RAW:
1142		mops = &raw_ops;
1143		mss = LDC_PACKET_SIZE;
1144		break;
1145
1146	case LDC_MODE_UNRELIABLE:
1147		mops = &nonraw_ops;
1148		mss = LDC_PACKET_SIZE - 8;
1149		break;
1150
1151	case LDC_MODE_STREAM:
1152		mops = &stream_ops;
1153		mss = LDC_PACKET_SIZE - 8 - 8;
1154		break;
1155
1156	default:
1157		goto out_err;
1158	}
1159
1160	if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1161		goto out_err;
1162
1163	hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1164	err = -ENODEV;
1165	if (hv_err == HV_ECHANNEL)
1166		goto out_err;
1167
1168	err = -EEXIST;
1169	if (__ldc_channel_exists(id))
1170		goto out_err;
1171
1172	mssbuf = NULL;
1173
1174	lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1175	err = -ENOMEM;
1176	if (!lp)
1177		goto out_err;
1178
1179	spin_lock_init(&lp->lock);
1180
1181	lp->id = id;
1182
1183	err = ldc_iommu_init(name, lp);
1184	if (err)
1185		goto out_free_ldc;
1186
1187	lp->mops = mops;
1188	lp->mss = mss;
1189
1190	lp->cfg = *cfgp;
1191	if (!lp->cfg.mtu)
1192		lp->cfg.mtu = LDC_DEFAULT_MTU;
1193
1194	if (lp->cfg.mode == LDC_MODE_STREAM) {
1195		mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1196		if (!mssbuf) {
1197			err = -ENOMEM;
1198			goto out_free_iommu;
1199		}
1200		lp->mssbuf = mssbuf;
1201	}
1202
1203	lp->event_arg = event_arg;
1204
1205	/* XXX allow setting via ldc_channel_config to override defaults
1206	 * XXX or use some formula based upon mtu
1207	 */
1208	lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1209	lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1210
1211	err = alloc_queue("TX", lp->tx_num_entries,
1212			  &lp->tx_base, &lp->tx_ra);
1213	if (err)
1214		goto out_free_mssbuf;
1215
1216	err = alloc_queue("RX", lp->rx_num_entries,
1217			  &lp->rx_base, &lp->rx_ra);
1218	if (err)
1219		goto out_free_txq;
1220
1221	lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1222
1223	lp->hs_state = LDC_HS_CLOSED;
1224	ldc_set_state(lp, LDC_STATE_INIT);
1225
1226	INIT_HLIST_NODE(&lp->list);
1227	hlist_add_head(&lp->list, &ldc_channel_list);
1228
1229	INIT_HLIST_HEAD(&lp->mh_list);
1230
1231	snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1232	snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1233
1234	err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
1235			  lp->rx_irq_name, lp);
1236	if (err)
1237		goto out_free_txq;
1238
1239	err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
1240			  lp->tx_irq_name, lp);
1241	if (err) {
1242		free_irq(lp->cfg.rx_irq, lp);
1243		goto out_free_txq;
1244	}
1245
1246	return lp;
1247
1248out_free_txq:
1249	free_queue(lp->tx_num_entries, lp->tx_base);
1250
1251out_free_mssbuf:
1252	kfree(mssbuf);
1253
1254out_free_iommu:
1255	ldc_iommu_release(lp);
1256
1257out_free_ldc:
1258	kfree(lp);
1259
1260out_err:
1261	return ERR_PTR(err);
1262}
1263EXPORT_SYMBOL(ldc_alloc);
1264
1265void ldc_unbind(struct ldc_channel *lp)
1266{
1267	if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1268		free_irq(lp->cfg.rx_irq, lp);
1269		free_irq(lp->cfg.tx_irq, lp);
1270		lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1271	}
1272
1273	if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1274		sun4v_ldc_tx_qconf(lp->id, 0, 0);
1275		sun4v_ldc_rx_qconf(lp->id, 0, 0);
1276		lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1277	}
1278	if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1279		free_queue(lp->tx_num_entries, lp->tx_base);
1280		free_queue(lp->rx_num_entries, lp->rx_base);
1281		lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1282	}
1283
1284	ldc_set_state(lp, LDC_STATE_INIT);
1285}
1286EXPORT_SYMBOL(ldc_unbind);
1287
1288void ldc_free(struct ldc_channel *lp)
1289{
1290	ldc_unbind(lp);
1291	hlist_del(&lp->list);
 
1292	kfree(lp->mssbuf);
 
1293	ldc_iommu_release(lp);
1294
1295	kfree(lp);
1296}
1297EXPORT_SYMBOL(ldc_free);
1298
1299/* Bind the channel.  This registers the LDC queues with
1300 * the hypervisor and puts the channel into a pseudo-listening
1301 * state.  This does not initiate a handshake, ldc_connect() does
1302 * that.
1303 */
1304int ldc_bind(struct ldc_channel *lp)
1305{
1306	unsigned long hv_err, flags;
1307	int err = -EINVAL;
1308
1309	if (lp->state != LDC_STATE_INIT)
 
1310		return -EINVAL;
1311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1312	spin_lock_irqsave(&lp->lock, flags);
1313
1314	enable_irq(lp->cfg.rx_irq);
1315	enable_irq(lp->cfg.tx_irq);
1316
1317	lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1318
1319	err = -ENODEV;
1320	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1321	if (hv_err)
1322		goto out_free_irqs;
1323
1324	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1325	if (hv_err)
1326		goto out_free_irqs;
1327
1328	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1329	if (hv_err)
1330		goto out_unmap_tx;
1331
1332	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1333	if (hv_err)
1334		goto out_unmap_tx;
1335
1336	lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1337
1338	hv_err = sun4v_ldc_tx_get_state(lp->id,
1339					&lp->tx_head,
1340					&lp->tx_tail,
1341					&lp->chan_state);
1342	err = -EBUSY;
1343	if (hv_err)
1344		goto out_unmap_rx;
1345
1346	lp->tx_acked = lp->tx_head;
1347
1348	lp->hs_state = LDC_HS_OPEN;
1349	ldc_set_state(lp, LDC_STATE_BOUND);
1350
1351	if (lp->cfg.mode == LDC_MODE_RAW) {
1352		/*
1353		 * There is no handshake in RAW mode, so handshake
1354		 * is completed.
1355		 */
1356		lp->hs_state = LDC_HS_COMPLETE;
1357	}
1358
1359	spin_unlock_irqrestore(&lp->lock, flags);
1360
1361	return 0;
1362
1363out_unmap_rx:
1364	lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1365	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1366
1367out_unmap_tx:
1368	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1369
1370out_free_irqs:
1371	lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1372	free_irq(lp->cfg.tx_irq, lp);
1373	free_irq(lp->cfg.rx_irq, lp);
1374
1375	spin_unlock_irqrestore(&lp->lock, flags);
1376
1377	return err;
1378}
1379EXPORT_SYMBOL(ldc_bind);
1380
1381int ldc_connect(struct ldc_channel *lp)
1382{
1383	unsigned long flags;
1384	int err;
1385
1386	if (lp->cfg.mode == LDC_MODE_RAW)
1387		return -EINVAL;
1388
1389	spin_lock_irqsave(&lp->lock, flags);
1390
1391	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1392	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1393	    lp->hs_state != LDC_HS_OPEN)
1394		err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
1395	else
1396		err = start_handshake(lp);
1397
1398	spin_unlock_irqrestore(&lp->lock, flags);
1399
1400	return err;
1401}
1402EXPORT_SYMBOL(ldc_connect);
1403
1404int ldc_disconnect(struct ldc_channel *lp)
1405{
1406	unsigned long hv_err, flags;
1407	int err;
1408
1409	if (lp->cfg.mode == LDC_MODE_RAW)
1410		return -EINVAL;
1411
1412	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1413	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1414		return -EINVAL;
1415
1416	spin_lock_irqsave(&lp->lock, flags);
1417
1418	err = -ENODEV;
1419	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1420	if (hv_err)
1421		goto out_err;
1422
1423	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1424	if (hv_err)
1425		goto out_err;
1426
1427	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1428	if (hv_err)
1429		goto out_err;
1430
1431	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1432	if (hv_err)
1433		goto out_err;
1434
1435	ldc_set_state(lp, LDC_STATE_BOUND);
1436	lp->hs_state = LDC_HS_OPEN;
1437	lp->flags |= LDC_FLAG_RESET;
1438
1439	spin_unlock_irqrestore(&lp->lock, flags);
1440
1441	return 0;
1442
1443out_err:
1444	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1445	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1446	free_irq(lp->cfg.tx_irq, lp);
1447	free_irq(lp->cfg.rx_irq, lp);
1448	lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1449		       LDC_FLAG_REGISTERED_QUEUES);
1450	ldc_set_state(lp, LDC_STATE_INIT);
1451
1452	spin_unlock_irqrestore(&lp->lock, flags);
1453
1454	return err;
1455}
1456EXPORT_SYMBOL(ldc_disconnect);
1457
1458int ldc_state(struct ldc_channel *lp)
1459{
1460	return lp->state;
1461}
1462EXPORT_SYMBOL(ldc_state);
1463
1464void ldc_set_state(struct ldc_channel *lp, u8 state)
1465{
1466	ldcdbg(STATE, "STATE (%s) --> (%s)\n",
1467	       state_to_str(lp->state),
1468	       state_to_str(state));
1469
1470	lp->state = state;
1471}
1472EXPORT_SYMBOL(ldc_set_state);
1473
1474int ldc_mode(struct ldc_channel *lp)
1475{
1476	return lp->cfg.mode;
1477}
1478EXPORT_SYMBOL(ldc_mode);
1479
1480int ldc_rx_reset(struct ldc_channel *lp)
1481{
1482	return __set_rx_head(lp, lp->rx_tail);
1483}
1484EXPORT_SYMBOL(ldc_rx_reset);
1485
1486void __ldc_print(struct ldc_channel *lp, const char *caller)
1487{
1488	pr_info("%s: id=0x%lx flags=0x%x state=%s cstate=0x%lx hsstate=0x%x\n"
1489		"\trx_h=0x%lx rx_t=0x%lx rx_n=%ld\n"
1490		"\ttx_h=0x%lx tx_t=0x%lx tx_n=%ld\n"
1491		"\trcv_nxt=%u snd_nxt=%u\n",
1492		caller, lp->id, lp->flags, state_to_str(lp->state),
1493		lp->chan_state, lp->hs_state,
1494		lp->rx_head, lp->rx_tail, lp->rx_num_entries,
1495		lp->tx_head, lp->tx_tail, lp->tx_num_entries,
1496		lp->rcv_nxt, lp->snd_nxt);
1497}
1498EXPORT_SYMBOL(__ldc_print);
1499
1500static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1501{
1502	struct ldc_packet *p;
1503	unsigned long new_tail, hv_err;
1504	int err;
1505
1506	hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1507					&lp->chan_state);
1508	if (unlikely(hv_err))
1509		return -EBUSY;
1510
1511	if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1512		return LDC_ABORT(lp);
1513
1514	if (size > LDC_PACKET_SIZE)
1515		return -EMSGSIZE;
1516
1517	p = data_get_tx_packet(lp, &new_tail);
1518	if (!p)
1519		return -EAGAIN;
1520
1521	memcpy(p, buf, size);
1522
1523	err = send_tx_packet(lp, p, new_tail);
1524	if (!err)
1525		err = size;
1526
1527	return err;
1528}
1529
1530static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1531{
1532	struct ldc_packet *p;
1533	unsigned long hv_err, new;
1534	int err;
1535
1536	if (size < LDC_PACKET_SIZE)
1537		return -EINVAL;
1538
1539	hv_err = sun4v_ldc_rx_get_state(lp->id,
1540					&lp->rx_head,
1541					&lp->rx_tail,
1542					&lp->chan_state);
1543	if (hv_err)
1544		return LDC_ABORT(lp);
1545
1546	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1547	    lp->chan_state == LDC_CHANNEL_RESETTING)
1548		return -ECONNRESET;
1549
1550	if (lp->rx_head == lp->rx_tail)
1551		return 0;
1552
1553	p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1554	memcpy(buf, p, LDC_PACKET_SIZE);
1555
1556	new = rx_advance(lp, lp->rx_head);
1557	lp->rx_head = new;
1558
1559	err = __set_rx_head(lp, new);
1560	if (err < 0)
1561		err = -ECONNRESET;
1562	else
1563		err = LDC_PACKET_SIZE;
1564
1565	return err;
1566}
1567
1568static const struct ldc_mode_ops raw_ops = {
1569	.write		=	write_raw,
1570	.read		=	read_raw,
1571};
1572
1573static int write_nonraw(struct ldc_channel *lp, const void *buf,
1574			unsigned int size)
1575{
1576	unsigned long hv_err, tail;
1577	unsigned int copied;
1578	u32 seq;
1579	int err;
1580
1581	hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1582					&lp->chan_state);
1583	if (unlikely(hv_err))
1584		return -EBUSY;
1585
1586	if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1587		return LDC_ABORT(lp);
1588
1589	if (!tx_has_space_for(lp, size))
1590		return -EAGAIN;
1591
1592	seq = lp->snd_nxt;
1593	copied = 0;
1594	tail = lp->tx_tail;
1595	while (copied < size) {
1596		struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1597		u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1598			    p->u.u_data :
1599			    p->u.r.r_data);
1600		int data_len;
1601
1602		p->type = LDC_DATA;
1603		p->stype = LDC_INFO;
1604		p->ctrl = 0;
1605
1606		data_len = size - copied;
1607		if (data_len > lp->mss)
1608			data_len = lp->mss;
1609
1610		BUG_ON(data_len > LDC_LEN);
1611
1612		p->env = (data_len |
1613			  (copied == 0 ? LDC_START : 0) |
1614			  (data_len == size - copied ? LDC_STOP : 0));
1615
1616		p->seqid = ++seq;
1617
1618		ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1619		       p->type,
1620		       p->stype,
1621		       p->ctrl,
1622		       p->env,
1623		       p->seqid);
1624
1625		memcpy(data, buf, data_len);
1626		buf += data_len;
1627		copied += data_len;
1628
1629		tail = tx_advance(lp, tail);
1630	}
1631
1632	err = set_tx_tail(lp, tail);
1633	if (!err) {
1634		lp->snd_nxt = seq;
1635		err = size;
1636	}
1637
1638	return err;
1639}
1640
1641static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1642		      struct ldc_packet *first_frag)
1643{
1644	int err;
1645
1646	if (first_frag)
1647		lp->rcv_nxt = first_frag->seqid - 1;
1648
1649	err = send_data_nack(lp, p);
1650	if (err)
1651		return err;
1652
1653	err = ldc_rx_reset(lp);
1654	if (err < 0)
1655		return LDC_ABORT(lp);
1656
1657	return 0;
1658}
1659
1660static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1661{
1662	if (p->stype & LDC_ACK) {
1663		int err = process_data_ack(lp, p);
1664		if (err)
1665			return err;
1666	}
1667	if (p->stype & LDC_NACK)
1668		return LDC_ABORT(lp);
1669
1670	return 0;
1671}
1672
1673static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1674{
1675	unsigned long dummy;
1676	int limit = 1000;
1677
1678	ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1679	       cur_head, lp->rx_head, lp->rx_tail);
1680	while (limit-- > 0) {
1681		unsigned long hv_err;
1682
1683		hv_err = sun4v_ldc_rx_get_state(lp->id,
1684						&dummy,
1685						&lp->rx_tail,
1686						&lp->chan_state);
1687		if (hv_err)
1688			return LDC_ABORT(lp);
1689
1690		if (lp->chan_state == LDC_CHANNEL_DOWN ||
1691		    lp->chan_state == LDC_CHANNEL_RESETTING)
1692			return -ECONNRESET;
1693
1694		if (cur_head != lp->rx_tail) {
1695			ldcdbg(DATA, "DATA WAIT DONE "
1696			       "head[%lx] tail[%lx] chan_state[%lx]\n",
1697			       dummy, lp->rx_tail, lp->chan_state);
1698			return 0;
1699		}
1700
1701		udelay(1);
1702	}
1703	return -EAGAIN;
1704}
1705
1706static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1707{
1708	int err = __set_rx_head(lp, head);
1709
1710	if (err < 0)
1711		return LDC_ABORT(lp);
1712
1713	lp->rx_head = head;
1714	return 0;
1715}
1716
1717static void send_data_ack(struct ldc_channel *lp)
1718{
1719	unsigned long new_tail;
1720	struct ldc_packet *p;
1721
1722	p = data_get_tx_packet(lp, &new_tail);
1723	if (likely(p)) {
1724		int err;
1725
1726		memset(p, 0, sizeof(*p));
1727		p->type = LDC_DATA;
1728		p->stype = LDC_ACK;
1729		p->ctrl = 0;
1730		p->seqid = lp->snd_nxt + 1;
1731		p->u.r.ackid = lp->rcv_nxt;
1732
1733		err = send_tx_packet(lp, p, new_tail);
1734		if (!err)
1735			lp->snd_nxt++;
1736	}
1737}
1738
1739static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1740{
1741	struct ldc_packet *first_frag;
1742	unsigned long hv_err, new;
1743	int err, copied;
1744
1745	hv_err = sun4v_ldc_rx_get_state(lp->id,
1746					&lp->rx_head,
1747					&lp->rx_tail,
1748					&lp->chan_state);
1749	if (hv_err)
1750		return LDC_ABORT(lp);
1751
1752	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1753	    lp->chan_state == LDC_CHANNEL_RESETTING)
1754		return -ECONNRESET;
1755
1756	if (lp->rx_head == lp->rx_tail)
1757		return 0;
1758
1759	first_frag = NULL;
1760	copied = err = 0;
1761	new = lp->rx_head;
1762	while (1) {
1763		struct ldc_packet *p;
1764		int pkt_len;
1765
1766		BUG_ON(new == lp->rx_tail);
1767		p = lp->rx_base + (new / LDC_PACKET_SIZE);
1768
1769		ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1770		       "rcv_nxt[%08x]\n",
1771		       p->type,
1772		       p->stype,
1773		       p->ctrl,
1774		       p->env,
1775		       p->seqid,
1776		       p->u.r.ackid,
1777		       lp->rcv_nxt);
1778
1779		if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1780			err = rx_bad_seq(lp, p, first_frag);
1781			copied = 0;
1782			break;
1783		}
1784
1785		if (p->type & LDC_CTRL) {
1786			err = process_control_frame(lp, p);
1787			if (err < 0)
1788				break;
1789			err = 0;
1790		}
1791
1792		lp->rcv_nxt = p->seqid;
1793
1794		/*
1795		 * If this is a control-only packet, there is nothing
1796		 * else to do but advance the rx queue since the packet
1797		 * was already processed above.
1798		 */
1799		if (!(p->type & LDC_DATA)) {
1800			new = rx_advance(lp, new);
1801			break;
1802		}
1803		if (p->stype & (LDC_ACK | LDC_NACK)) {
1804			err = data_ack_nack(lp, p);
1805			if (err)
1806				break;
1807		}
1808		if (!(p->stype & LDC_INFO)) {
1809			new = rx_advance(lp, new);
1810			err = rx_set_head(lp, new);
1811			if (err)
1812				break;
1813			goto no_data;
1814		}
1815
1816		pkt_len = p->env & LDC_LEN;
1817
1818		/* Every initial packet starts with the START bit set.
1819		 *
1820		 * Singleton packets will have both START+STOP set.
1821		 *
1822		 * Fragments will have START set in the first frame, STOP
1823		 * set in the last frame, and neither bit set in middle
1824		 * frames of the packet.
1825		 *
1826		 * Therefore if we are at the beginning of a packet and
1827		 * we don't see START, or we are in the middle of a fragmented
1828		 * packet and do see START, we are unsynchronized and should
1829		 * flush the RX queue.
1830		 */
1831		if ((first_frag == NULL && !(p->env & LDC_START)) ||
1832		    (first_frag != NULL &&  (p->env & LDC_START))) {
1833			if (!first_frag)
1834				new = rx_advance(lp, new);
1835
1836			err = rx_set_head(lp, new);
1837			if (err)
1838				break;
1839
1840			if (!first_frag)
1841				goto no_data;
1842		}
1843		if (!first_frag)
1844			first_frag = p;
1845
1846		if (pkt_len > size - copied) {
1847			/* User didn't give us a big enough buffer,
1848			 * what to do?  This is a pretty serious error.
1849			 *
1850			 * Since we haven't updated the RX ring head to
1851			 * consume any of the packets, signal the error
1852			 * to the user and just leave the RX ring alone.
1853			 *
1854			 * This seems the best behavior because this allows
1855			 * a user of the LDC layer to start with a small
1856			 * RX buffer for ldc_read() calls and use -EMSGSIZE
1857			 * as a cue to enlarge it's read buffer.
1858			 */
1859			err = -EMSGSIZE;
1860			break;
1861		}
1862
1863		/* Ok, we are gonna eat this one.  */
1864		new = rx_advance(lp, new);
1865
1866		memcpy(buf,
1867		       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1868			p->u.u_data : p->u.r.r_data), pkt_len);
1869		buf += pkt_len;
1870		copied += pkt_len;
1871
1872		if (p->env & LDC_STOP)
1873			break;
1874
1875no_data:
1876		if (new == lp->rx_tail) {
1877			err = rx_data_wait(lp, new);
1878			if (err)
1879				break;
1880		}
1881	}
1882
1883	if (!err)
1884		err = rx_set_head(lp, new);
1885
1886	if (err && first_frag)
1887		lp->rcv_nxt = first_frag->seqid - 1;
1888
1889	if (!err) {
1890		err = copied;
1891		if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1892			send_data_ack(lp);
1893	}
1894
1895	return err;
1896}
1897
1898static const struct ldc_mode_ops nonraw_ops = {
1899	.write		=	write_nonraw,
1900	.read		=	read_nonraw,
1901};
1902
1903static int write_stream(struct ldc_channel *lp, const void *buf,
1904			unsigned int size)
1905{
1906	if (size > lp->cfg.mtu)
1907		size = lp->cfg.mtu;
1908	return write_nonraw(lp, buf, size);
1909}
1910
1911static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1912{
1913	if (!lp->mssbuf_len) {
1914		int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1915		if (err < 0)
1916			return err;
1917
1918		lp->mssbuf_len = err;
1919		lp->mssbuf_off = 0;
1920	}
1921
1922	if (size > lp->mssbuf_len)
1923		size = lp->mssbuf_len;
1924	memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1925
1926	lp->mssbuf_off += size;
1927	lp->mssbuf_len -= size;
1928
1929	return size;
1930}
1931
1932static const struct ldc_mode_ops stream_ops = {
1933	.write		=	write_stream,
1934	.read		=	read_stream,
1935};
1936
1937int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1938{
1939	unsigned long flags;
1940	int err;
1941
1942	if (!buf)
1943		return -EINVAL;
1944
1945	if (!size)
1946		return 0;
1947
1948	spin_lock_irqsave(&lp->lock, flags);
1949
1950	if (lp->hs_state != LDC_HS_COMPLETE)
1951		err = -ENOTCONN;
1952	else
1953		err = lp->mops->write(lp, buf, size);
1954
1955	spin_unlock_irqrestore(&lp->lock, flags);
1956
1957	return err;
1958}
1959EXPORT_SYMBOL(ldc_write);
1960
1961int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1962{
1963	unsigned long flags;
1964	int err;
1965
1966	ldcdbg(RX, "%s: entered size=%d\n", __func__, size);
1967
1968	if (!buf)
1969		return -EINVAL;
1970
1971	if (!size)
1972		return 0;
1973
1974	spin_lock_irqsave(&lp->lock, flags);
1975
1976	if (lp->hs_state != LDC_HS_COMPLETE)
1977		err = -ENOTCONN;
1978	else
1979		err = lp->mops->read(lp, buf, size);
1980
1981	spin_unlock_irqrestore(&lp->lock, flags);
1982
1983	ldcdbg(RX, "%s: mode=%d, head=%lu, tail=%lu rv=%d\n", __func__,
1984	       lp->cfg.mode, lp->rx_head, lp->rx_tail, err);
1985
1986	return err;
1987}
1988EXPORT_SYMBOL(ldc_read);
1989
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1990static u64 pagesize_code(void)
1991{
1992	switch (PAGE_SIZE) {
1993	default:
1994	case (8ULL * 1024ULL):
1995		return 0;
1996	case (64ULL * 1024ULL):
1997		return 1;
1998	case (512ULL * 1024ULL):
1999		return 2;
2000	case (4ULL * 1024ULL * 1024ULL):
2001		return 3;
2002	case (32ULL * 1024ULL * 1024ULL):
2003		return 4;
2004	case (256ULL * 1024ULL * 1024ULL):
2005		return 5;
2006	}
2007}
2008
2009static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
2010{
2011	return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
2012		(index << PAGE_SHIFT) |
2013		page_offset);
2014}
2015
 
 
 
 
 
 
 
 
 
 
2016
2017static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
2018					     unsigned long npages)
2019{
2020	long entry;
2021
2022	entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
2023				      npages, NULL, (unsigned long)-1, 0);
2024	if (unlikely(entry == IOMMU_ERROR_CODE))
2025		return NULL;
2026
2027	return iommu->page_table + entry;
2028}
2029
2030static u64 perm_to_mte(unsigned int map_perm)
2031{
2032	u64 mte_base;
2033
2034	mte_base = pagesize_code();
2035
2036	if (map_perm & LDC_MAP_SHADOW) {
2037		if (map_perm & LDC_MAP_R)
2038			mte_base |= LDC_MTE_COPY_R;
2039		if (map_perm & LDC_MAP_W)
2040			mte_base |= LDC_MTE_COPY_W;
2041	}
2042	if (map_perm & LDC_MAP_DIRECT) {
2043		if (map_perm & LDC_MAP_R)
2044			mte_base |= LDC_MTE_READ;
2045		if (map_perm & LDC_MAP_W)
2046			mte_base |= LDC_MTE_WRITE;
2047		if (map_perm & LDC_MAP_X)
2048			mte_base |= LDC_MTE_EXEC;
2049	}
2050	if (map_perm & LDC_MAP_IO) {
2051		if (map_perm & LDC_MAP_R)
2052			mte_base |= LDC_MTE_IOMMU_R;
2053		if (map_perm & LDC_MAP_W)
2054			mte_base |= LDC_MTE_IOMMU_W;
2055	}
2056
2057	return mte_base;
2058}
2059
2060static int pages_in_region(unsigned long base, long len)
2061{
2062	int count = 0;
2063
2064	do {
2065		unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2066
2067		len -= (new - base);
2068		base = new;
2069		count++;
2070	} while (len > 0);
2071
2072	return count;
2073}
2074
2075struct cookie_state {
2076	struct ldc_mtable_entry		*page_table;
2077	struct ldc_trans_cookie		*cookies;
2078	u64				mte_base;
2079	u64				prev_cookie;
2080	u32				pte_idx;
2081	u32				nc;
2082};
2083
2084static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2085			 unsigned long off, unsigned long len)
2086{
2087	do {
2088		unsigned long tlen, new = pa + PAGE_SIZE;
2089		u64 this_cookie;
2090
2091		sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2092
2093		tlen = PAGE_SIZE;
2094		if (off)
2095			tlen = PAGE_SIZE - off;
2096		if (tlen > len)
2097			tlen = len;
2098
2099		this_cookie = make_cookie(sp->pte_idx,
2100					  pagesize_code(), off);
2101
2102		off = 0;
2103
2104		if (this_cookie == sp->prev_cookie) {
2105			sp->cookies[sp->nc - 1].cookie_size += tlen;
2106		} else {
2107			sp->cookies[sp->nc].cookie_addr = this_cookie;
2108			sp->cookies[sp->nc].cookie_size = tlen;
2109			sp->nc++;
2110		}
2111		sp->prev_cookie = this_cookie + tlen;
2112
2113		sp->pte_idx++;
2114
2115		len -= tlen;
2116		pa = new;
2117	} while (len > 0);
2118}
2119
2120static int sg_count_one(struct scatterlist *sg)
2121{
2122	unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2123	long len = sg->length;
2124
2125	if ((sg->offset | len) & (8UL - 1))
2126		return -EFAULT;
2127
2128	return pages_in_region(base + sg->offset, len);
2129}
2130
2131static int sg_count_pages(struct scatterlist *sg, int num_sg)
2132{
2133	int count;
2134	int i;
2135
2136	count = 0;
2137	for (i = 0; i < num_sg; i++) {
2138		int err = sg_count_one(sg + i);
2139		if (err < 0)
2140			return err;
2141		count += err;
2142	}
2143
2144	return count;
2145}
2146
2147int ldc_map_sg(struct ldc_channel *lp,
2148	       struct scatterlist *sg, int num_sg,
2149	       struct ldc_trans_cookie *cookies, int ncookies,
2150	       unsigned int map_perm)
2151{
2152	unsigned long i, npages;
2153	struct ldc_mtable_entry *base;
2154	struct cookie_state state;
2155	struct ldc_iommu *iommu;
2156	int err;
2157	struct scatterlist *s;
2158
2159	if (map_perm & ~LDC_MAP_ALL)
2160		return -EINVAL;
2161
2162	err = sg_count_pages(sg, num_sg);
2163	if (err < 0)
2164		return err;
2165
2166	npages = err;
2167	if (err > ncookies)
2168		return -EMSGSIZE;
2169
2170	iommu = &lp->iommu;
2171
 
2172	base = alloc_npages(iommu, npages);
 
2173
2174	if (!base)
2175		return -ENOMEM;
2176
2177	state.page_table = iommu->page_table;
2178	state.cookies = cookies;
2179	state.mte_base = perm_to_mte(map_perm);
2180	state.prev_cookie = ~(u64)0;
2181	state.pte_idx = (base - iommu->page_table);
2182	state.nc = 0;
2183
2184	for_each_sg(sg, s, num_sg, i) {
2185		fill_cookies(&state, page_to_pfn(sg_page(s)) << PAGE_SHIFT,
2186			     s->offset, s->length);
2187	}
2188
2189	return state.nc;
2190}
2191EXPORT_SYMBOL(ldc_map_sg);
2192
2193int ldc_map_single(struct ldc_channel *lp,
2194		   void *buf, unsigned int len,
2195		   struct ldc_trans_cookie *cookies, int ncookies,
2196		   unsigned int map_perm)
2197{
2198	unsigned long npages, pa;
2199	struct ldc_mtable_entry *base;
2200	struct cookie_state state;
2201	struct ldc_iommu *iommu;
2202
2203	if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2204		return -EINVAL;
2205
2206	pa = __pa(buf);
2207	if ((pa | len) & (8UL - 1))
2208		return -EFAULT;
2209
2210	npages = pages_in_region(pa, len);
2211
2212	iommu = &lp->iommu;
2213
 
2214	base = alloc_npages(iommu, npages);
 
2215
2216	if (!base)
2217		return -ENOMEM;
2218
2219	state.page_table = iommu->page_table;
2220	state.cookies = cookies;
2221	state.mte_base = perm_to_mte(map_perm);
2222	state.prev_cookie = ~(u64)0;
2223	state.pte_idx = (base - iommu->page_table);
2224	state.nc = 0;
2225	fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2226	BUG_ON(state.nc > ncookies);
2227
2228	return state.nc;
2229}
2230EXPORT_SYMBOL(ldc_map_single);
2231
2232
2233static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2234			u64 cookie, u64 size)
2235{
2236	unsigned long npages, entry;
 
 
2237
2238	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
 
 
2239
2240	entry = ldc_cookie_to_index(cookie, iommu);
2241	ldc_demap(iommu, id, cookie, entry, npages);
2242	iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry);
 
 
 
 
 
 
 
2243}
2244
2245void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2246	       int ncookies)
2247{
2248	struct ldc_iommu *iommu = &lp->iommu;
2249	int i;
2250	unsigned long flags;
 
2251
2252	spin_lock_irqsave(&iommu->lock, flags);
2253	for (i = 0; i < ncookies; i++) {
2254		u64 addr = cookies[i].cookie_addr;
2255		u64 size = cookies[i].cookie_size;
2256
2257		free_npages(lp->id, iommu, addr, size);
2258	}
2259	spin_unlock_irqrestore(&iommu->lock, flags);
2260}
2261EXPORT_SYMBOL(ldc_unmap);
2262
2263int ldc_copy(struct ldc_channel *lp, int copy_dir,
2264	     void *buf, unsigned int len, unsigned long offset,
2265	     struct ldc_trans_cookie *cookies, int ncookies)
2266{
2267	unsigned int orig_len;
2268	unsigned long ra;
2269	int i;
2270
2271	if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2272		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2273		       lp->id, copy_dir);
2274		return -EINVAL;
2275	}
2276
2277	ra = __pa(buf);
2278	if ((ra | len | offset) & (8UL - 1)) {
2279		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2280		       "ra[%lx] len[%x] offset[%lx]\n",
2281		       lp->id, ra, len, offset);
2282		return -EFAULT;
2283	}
2284
2285	if (lp->hs_state != LDC_HS_COMPLETE ||
2286	    (lp->flags & LDC_FLAG_RESET)) {
2287		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2288		       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2289		return -ECONNRESET;
2290	}
2291
2292	orig_len = len;
2293	for (i = 0; i < ncookies; i++) {
2294		unsigned long cookie_raddr = cookies[i].cookie_addr;
2295		unsigned long this_len = cookies[i].cookie_size;
2296		unsigned long actual_len;
2297
2298		if (unlikely(offset)) {
2299			unsigned long this_off = offset;
2300
2301			if (this_off > this_len)
2302				this_off = this_len;
2303
2304			offset -= this_off;
2305			this_len -= this_off;
2306			if (!this_len)
2307				continue;
2308			cookie_raddr += this_off;
2309		}
2310
2311		if (this_len > len)
2312			this_len = len;
2313
2314		while (1) {
2315			unsigned long hv_err;
2316
2317			hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2318						cookie_raddr, ra,
2319						this_len, &actual_len);
2320			if (unlikely(hv_err)) {
2321				printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2322				       "HV error %lu\n",
2323				       lp->id, hv_err);
2324				if (lp->hs_state != LDC_HS_COMPLETE ||
2325				    (lp->flags & LDC_FLAG_RESET))
2326					return -ECONNRESET;
2327				else
2328					return -EFAULT;
2329			}
2330
2331			cookie_raddr += actual_len;
2332			ra += actual_len;
2333			len -= actual_len;
2334			if (actual_len == this_len)
2335				break;
2336
2337			this_len -= actual_len;
2338		}
2339
2340		if (!len)
2341			break;
2342	}
2343
2344	/* It is caller policy what to do about short copies.
2345	 * For example, a networking driver can declare the
2346	 * packet a runt and drop it.
2347	 */
2348
2349	return orig_len - len;
2350}
2351EXPORT_SYMBOL(ldc_copy);
2352
2353void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2354			  struct ldc_trans_cookie *cookies, int *ncookies,
2355			  unsigned int map_perm)
2356{
2357	void *buf;
2358	int err;
2359
2360	if (len & (8UL - 1))
2361		return ERR_PTR(-EINVAL);
2362
2363	buf = kzalloc(len, GFP_ATOMIC);
2364	if (!buf)
2365		return ERR_PTR(-ENOMEM);
2366
2367	err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2368	if (err < 0) {
2369		kfree(buf);
2370		return ERR_PTR(err);
2371	}
2372	*ncookies = err;
2373
2374	return buf;
2375}
2376EXPORT_SYMBOL(ldc_alloc_exp_dring);
2377
2378void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2379			struct ldc_trans_cookie *cookies, int ncookies)
2380{
2381	ldc_unmap(lp, cookies, ncookies);
2382	kfree(buf);
2383}
2384EXPORT_SYMBOL(ldc_free_exp_dring);
2385
2386static int __init ldc_init(void)
2387{
2388	unsigned long major, minor;
2389	struct mdesc_handle *hp;
2390	const u64 *v;
2391	int err;
2392	u64 mp;
2393
2394	hp = mdesc_grab();
2395	if (!hp)
2396		return -ENODEV;
2397
2398	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2399	err = -ENODEV;
2400	if (mp == MDESC_NODE_NULL)
2401		goto out;
2402
2403	v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2404	if (!v)
2405		goto out;
2406
2407	major = 1;
2408	minor = 0;
2409	if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2410		printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2411		goto out;
2412	}
2413
2414	printk(KERN_INFO "%s", version);
2415
2416	if (!*v) {
2417		printk(KERN_INFO PFX "Domaining disabled.\n");
2418		goto out;
2419	}
2420	ldom_domaining_enabled = 1;
2421	err = 0;
2422
2423out:
2424	mdesc_release(hp);
2425	return err;
2426}
2427
2428core_initcall(ldc_init);
v3.15
 
   1/* ldc.c: Logical Domain Channel link-layer protocol driver.
   2 *
   3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/export.h>
   8#include <linux/slab.h>
   9#include <linux/spinlock.h>
  10#include <linux/delay.h>
  11#include <linux/errno.h>
  12#include <linux/string.h>
  13#include <linux/scatterlist.h>
  14#include <linux/interrupt.h>
  15#include <linux/list.h>
  16#include <linux/init.h>
  17#include <linux/bitmap.h>
 
  18
  19#include <asm/hypervisor.h>
  20#include <asm/iommu.h>
  21#include <asm/page.h>
  22#include <asm/ldc.h>
  23#include <asm/mdesc.h>
  24
  25#define DRV_MODULE_NAME		"ldc"
  26#define PFX DRV_MODULE_NAME	": "
  27#define DRV_MODULE_VERSION	"1.1"
  28#define DRV_MODULE_RELDATE	"July 22, 2008"
  29
 
 
 
 
  30static char version[] =
  31	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
  32#define LDC_PACKET_SIZE		64
  33
  34/* Packet header layout for unreliable and reliable mode frames.
  35 * When in RAW mode, packets are simply straight 64-byte payloads
  36 * with no headers.
  37 */
  38struct ldc_packet {
  39	u8			type;
  40#define LDC_CTRL		0x01
  41#define LDC_DATA		0x02
  42#define LDC_ERR			0x10
  43
  44	u8			stype;
  45#define LDC_INFO		0x01
  46#define LDC_ACK			0x02
  47#define LDC_NACK		0x04
  48
  49	u8			ctrl;
  50#define LDC_VERS		0x01 /* Link Version		*/
  51#define LDC_RTS			0x02 /* Request To Send		*/
  52#define LDC_RTR			0x03 /* Ready To Receive	*/
  53#define LDC_RDX			0x04 /* Ready for Data eXchange	*/
  54#define LDC_CTRL_MSK		0x0f
  55
  56	u8			env;
  57#define LDC_LEN			0x3f
  58#define LDC_FRAG_MASK		0xc0
  59#define LDC_START		0x40
  60#define LDC_STOP		0x80
  61
  62	u32			seqid;
  63
  64	union {
  65		u8		u_data[LDC_PACKET_SIZE - 8];
  66		struct {
  67			u32	pad;
  68			u32	ackid;
  69			u8	r_data[LDC_PACKET_SIZE - 8 - 8];
  70		} r;
  71	} u;
  72};
  73
  74struct ldc_version {
  75	u16 major;
  76	u16 minor;
  77};
  78
  79/* Ordered from largest major to lowest.  */
  80static struct ldc_version ver_arr[] = {
  81	{ .major = 1, .minor = 0 },
  82};
  83
  84#define LDC_DEFAULT_MTU			(4 * LDC_PACKET_SIZE)
  85#define LDC_DEFAULT_NUM_ENTRIES		(PAGE_SIZE / LDC_PACKET_SIZE)
  86
  87struct ldc_channel;
  88
  89struct ldc_mode_ops {
  90	int (*write)(struct ldc_channel *, const void *, unsigned int);
  91	int (*read)(struct ldc_channel *, void *, unsigned int);
  92};
  93
  94static const struct ldc_mode_ops raw_ops;
  95static const struct ldc_mode_ops nonraw_ops;
  96static const struct ldc_mode_ops stream_ops;
  97
  98int ldom_domaining_enabled;
  99
 100struct ldc_iommu {
 101	/* Protects arena alloc/free.  */
 102	spinlock_t			lock;
 103	struct iommu_arena		arena;
 104	struct ldc_mtable_entry		*page_table;
 
 105};
 106
 107struct ldc_channel {
 108	/* Protects all operations that depend upon channel state.  */
 109	spinlock_t			lock;
 110
 111	unsigned long			id;
 112
 113	u8				*mssbuf;
 114	u32				mssbuf_len;
 115	u32				mssbuf_off;
 116
 117	struct ldc_packet		*tx_base;
 118	unsigned long			tx_head;
 119	unsigned long			tx_tail;
 120	unsigned long			tx_num_entries;
 121	unsigned long			tx_ra;
 122
 123	unsigned long			tx_acked;
 124
 125	struct ldc_packet		*rx_base;
 126	unsigned long			rx_head;
 127	unsigned long			rx_tail;
 128	unsigned long			rx_num_entries;
 129	unsigned long			rx_ra;
 130
 131	u32				rcv_nxt;
 132	u32				snd_nxt;
 133
 134	unsigned long			chan_state;
 135
 136	struct ldc_channel_config	cfg;
 137	void				*event_arg;
 138
 139	const struct ldc_mode_ops	*mops;
 140
 141	struct ldc_iommu		iommu;
 142
 143	struct ldc_version		ver;
 144
 145	u8				hs_state;
 146#define LDC_HS_CLOSED			0x00
 147#define LDC_HS_OPEN			0x01
 148#define LDC_HS_GOTVERS			0x02
 149#define LDC_HS_SENTRTR			0x03
 150#define LDC_HS_GOTRTR			0x04
 151#define LDC_HS_COMPLETE			0x10
 152
 153	u8				flags;
 154#define LDC_FLAG_ALLOCED_QUEUES		0x01
 155#define LDC_FLAG_REGISTERED_QUEUES	0x02
 156#define LDC_FLAG_REGISTERED_IRQS	0x04
 157#define LDC_FLAG_RESET			0x10
 158
 159	u8				mss;
 160	u8				state;
 161
 162#define LDC_IRQ_NAME_MAX		32
 163	char				rx_irq_name[LDC_IRQ_NAME_MAX];
 164	char				tx_irq_name[LDC_IRQ_NAME_MAX];
 165
 166	struct hlist_head		mh_list;
 167
 168	struct hlist_node		list;
 169};
 170
 171#define ldcdbg(TYPE, f, a...) \
 172do {	if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
 173		printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
 174} while (0)
 175
 
 
 176static const char *state_to_str(u8 state)
 177{
 178	switch (state) {
 179	case LDC_STATE_INVALID:
 180		return "INVALID";
 181	case LDC_STATE_INIT:
 182		return "INIT";
 183	case LDC_STATE_BOUND:
 184		return "BOUND";
 185	case LDC_STATE_READY:
 186		return "READY";
 187	case LDC_STATE_CONNECTED:
 188		return "CONNECTED";
 189	default:
 190		return "<UNKNOWN>";
 191	}
 192}
 193
 194static void ldc_set_state(struct ldc_channel *lp, u8 state)
 195{
 196	ldcdbg(STATE, "STATE (%s) --> (%s)\n",
 197	       state_to_str(lp->state),
 198	       state_to_str(state));
 199
 200	lp->state = state;
 201}
 202
 203static unsigned long __advance(unsigned long off, unsigned long num_entries)
 204{
 205	off += LDC_PACKET_SIZE;
 206	if (off == (num_entries * LDC_PACKET_SIZE))
 207		off = 0;
 208
 209	return off;
 210}
 211
 212static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
 213{
 214	return __advance(off, lp->rx_num_entries);
 215}
 216
 217static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
 218{
 219	return __advance(off, lp->tx_num_entries);
 220}
 221
 222static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
 223						  unsigned long *new_tail)
 224{
 225	struct ldc_packet *p;
 226	unsigned long t;
 227
 228	t = tx_advance(lp, lp->tx_tail);
 229	if (t == lp->tx_head)
 230		return NULL;
 231
 232	*new_tail = t;
 233
 234	p = lp->tx_base;
 235	return p + (lp->tx_tail / LDC_PACKET_SIZE);
 236}
 237
 238/* When we are in reliable or stream mode, have to track the next packet
 239 * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
 240 * to be careful not to stomp over the queue past that point.  During
 241 * the handshake, we don't have TX data packets pending in the queue
 242 * and that's why handshake_get_tx_packet() need not be mindful of
 243 * lp->tx_acked.
 244 */
 245static unsigned long head_for_data(struct ldc_channel *lp)
 246{
 247	if (lp->cfg.mode == LDC_MODE_STREAM)
 248		return lp->tx_acked;
 249	return lp->tx_head;
 250}
 251
 252static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
 253{
 254	unsigned long limit, tail, new_tail, diff;
 255	unsigned int mss;
 256
 257	limit = head_for_data(lp);
 258	tail = lp->tx_tail;
 259	new_tail = tx_advance(lp, tail);
 260	if (new_tail == limit)
 261		return 0;
 262
 263	if (limit > new_tail)
 264		diff = limit - new_tail;
 265	else
 266		diff = (limit +
 267			((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
 268	diff /= LDC_PACKET_SIZE;
 269	mss = lp->mss;
 270
 271	if (diff * mss < size)
 272		return 0;
 273
 274	return 1;
 275}
 276
 277static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
 278					     unsigned long *new_tail)
 279{
 280	struct ldc_packet *p;
 281	unsigned long h, t;
 282
 283	h = head_for_data(lp);
 284	t = tx_advance(lp, lp->tx_tail);
 285	if (t == h)
 286		return NULL;
 287
 288	*new_tail = t;
 289
 290	p = lp->tx_base;
 291	return p + (lp->tx_tail / LDC_PACKET_SIZE);
 292}
 293
 294static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
 295{
 296	unsigned long orig_tail = lp->tx_tail;
 297	int limit = 1000;
 298
 299	lp->tx_tail = tail;
 300	while (limit-- > 0) {
 301		unsigned long err;
 302
 303		err = sun4v_ldc_tx_set_qtail(lp->id, tail);
 304		if (!err)
 305			return 0;
 306
 307		if (err != HV_EWOULDBLOCK) {
 308			lp->tx_tail = orig_tail;
 309			return -EINVAL;
 310		}
 311		udelay(1);
 312	}
 313
 314	lp->tx_tail = orig_tail;
 315	return -EBUSY;
 316}
 317
 318/* This just updates the head value in the hypervisor using
 319 * a polling loop with a timeout.  The caller takes care of
 320 * upating software state representing the head change, if any.
 321 */
 322static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
 323{
 324	int limit = 1000;
 325
 326	while (limit-- > 0) {
 327		unsigned long err;
 328
 329		err = sun4v_ldc_rx_set_qhead(lp->id, head);
 330		if (!err)
 331			return 0;
 332
 333		if (err != HV_EWOULDBLOCK)
 334			return -EINVAL;
 335
 336		udelay(1);
 337	}
 338
 339	return -EBUSY;
 340}
 341
 342static int send_tx_packet(struct ldc_channel *lp,
 343			  struct ldc_packet *p,
 344			  unsigned long new_tail)
 345{
 346	BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
 347
 348	return set_tx_tail(lp, new_tail);
 349}
 350
 351static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
 352						 u8 stype, u8 ctrl,
 353						 void *data, int dlen,
 354						 unsigned long *new_tail)
 355{
 356	struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
 357
 358	if (p) {
 359		memset(p, 0, sizeof(*p));
 360		p->type = LDC_CTRL;
 361		p->stype = stype;
 362		p->ctrl = ctrl;
 363		if (data)
 364			memcpy(p->u.u_data, data, dlen);
 365	}
 366	return p;
 367}
 368
 369static int start_handshake(struct ldc_channel *lp)
 370{
 371	struct ldc_packet *p;
 372	struct ldc_version *ver;
 373	unsigned long new_tail;
 374
 375	ver = &ver_arr[0];
 376
 377	ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
 378	       ver->major, ver->minor);
 379
 380	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 381				   ver, sizeof(*ver), &new_tail);
 382	if (p) {
 383		int err = send_tx_packet(lp, p, new_tail);
 384		if (!err)
 385			lp->flags &= ~LDC_FLAG_RESET;
 386		return err;
 387	}
 388	return -EBUSY;
 389}
 390
 391static int send_version_nack(struct ldc_channel *lp,
 392			     u16 major, u16 minor)
 393{
 394	struct ldc_packet *p;
 395	struct ldc_version ver;
 396	unsigned long new_tail;
 397
 398	ver.major = major;
 399	ver.minor = minor;
 400
 401	p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
 402				   &ver, sizeof(ver), &new_tail);
 403	if (p) {
 404		ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
 405		       ver.major, ver.minor);
 406
 407		return send_tx_packet(lp, p, new_tail);
 408	}
 409	return -EBUSY;
 410}
 411
 412static int send_version_ack(struct ldc_channel *lp,
 413			    struct ldc_version *vp)
 414{
 415	struct ldc_packet *p;
 416	unsigned long new_tail;
 417
 418	p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
 419				   vp, sizeof(*vp), &new_tail);
 420	if (p) {
 421		ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
 422		       vp->major, vp->minor);
 423
 424		return send_tx_packet(lp, p, new_tail);
 425	}
 426	return -EBUSY;
 427}
 428
 429static int send_rts(struct ldc_channel *lp)
 430{
 431	struct ldc_packet *p;
 432	unsigned long new_tail;
 433
 434	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
 435				   &new_tail);
 436	if (p) {
 437		p->env = lp->cfg.mode;
 438		p->seqid = 0;
 439		lp->rcv_nxt = 0;
 440
 441		ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
 442		       p->env, p->seqid);
 443
 444		return send_tx_packet(lp, p, new_tail);
 445	}
 446	return -EBUSY;
 447}
 448
 449static int send_rtr(struct ldc_channel *lp)
 450{
 451	struct ldc_packet *p;
 452	unsigned long new_tail;
 453
 454	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
 455				   &new_tail);
 456	if (p) {
 457		p->env = lp->cfg.mode;
 458		p->seqid = 0;
 459
 460		ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
 461		       p->env, p->seqid);
 462
 463		return send_tx_packet(lp, p, new_tail);
 464	}
 465	return -EBUSY;
 466}
 467
 468static int send_rdx(struct ldc_channel *lp)
 469{
 470	struct ldc_packet *p;
 471	unsigned long new_tail;
 472
 473	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
 474				   &new_tail);
 475	if (p) {
 476		p->env = 0;
 477		p->seqid = ++lp->snd_nxt;
 478		p->u.r.ackid = lp->rcv_nxt;
 479
 480		ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
 481		       p->env, p->seqid, p->u.r.ackid);
 482
 483		return send_tx_packet(lp, p, new_tail);
 484	}
 485	return -EBUSY;
 486}
 487
 488static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
 489{
 490	struct ldc_packet *p;
 491	unsigned long new_tail;
 492	int err;
 493
 494	p = data_get_tx_packet(lp, &new_tail);
 495	if (!p)
 496		return -EBUSY;
 497	memset(p, 0, sizeof(*p));
 498	p->type = data_pkt->type;
 499	p->stype = LDC_NACK;
 500	p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
 501	p->seqid = lp->snd_nxt + 1;
 502	p->u.r.ackid = lp->rcv_nxt;
 503
 504	ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
 505	       p->type, p->ctrl, p->seqid, p->u.r.ackid);
 506
 507	err = send_tx_packet(lp, p, new_tail);
 508	if (!err)
 509		lp->snd_nxt++;
 510
 511	return err;
 512}
 513
 514static int ldc_abort(struct ldc_channel *lp)
 515{
 516	unsigned long hv_err;
 517
 518	ldcdbg(STATE, "ABORT\n");
 
 519
 520	/* We report but do not act upon the hypervisor errors because
 521	 * there really isn't much we can do if they fail at this point.
 522	 */
 523	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
 524	if (hv_err)
 525		printk(KERN_ERR PFX "ldc_abort: "
 526		       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 527		       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
 528
 529	hv_err = sun4v_ldc_tx_get_state(lp->id,
 530					&lp->tx_head,
 531					&lp->tx_tail,
 532					&lp->chan_state);
 533	if (hv_err)
 534		printk(KERN_ERR PFX "ldc_abort: "
 535		       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
 536		       lp->id, hv_err);
 537
 538	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
 539	if (hv_err)
 540		printk(KERN_ERR PFX "ldc_abort: "
 541		       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 542		       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
 543
 544	/* Refetch the RX queue state as well, because we could be invoked
 545	 * here in the queue processing context.
 546	 */
 547	hv_err = sun4v_ldc_rx_get_state(lp->id,
 548					&lp->rx_head,
 549					&lp->rx_tail,
 550					&lp->chan_state);
 551	if (hv_err)
 552		printk(KERN_ERR PFX "ldc_abort: "
 553		       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
 554		       lp->id, hv_err);
 555
 556	return -ECONNRESET;
 557}
 558
 559static struct ldc_version *find_by_major(u16 major)
 560{
 561	struct ldc_version *ret = NULL;
 562	int i;
 563
 564	for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
 565		struct ldc_version *v = &ver_arr[i];
 566		if (v->major <= major) {
 567			ret = v;
 568			break;
 569		}
 570	}
 571	return ret;
 572}
 573
 574static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
 575{
 576	struct ldc_version *vap;
 577	int err;
 578
 579	ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
 580	       vp->major, vp->minor);
 581
 582	if (lp->hs_state == LDC_HS_GOTVERS) {
 583		lp->hs_state = LDC_HS_OPEN;
 584		memset(&lp->ver, 0, sizeof(lp->ver));
 585	}
 586
 587	vap = find_by_major(vp->major);
 588	if (!vap) {
 589		err = send_version_nack(lp, 0, 0);
 590	} else if (vap->major != vp->major) {
 591		err = send_version_nack(lp, vap->major, vap->minor);
 592	} else {
 593		struct ldc_version ver = *vp;
 594		if (ver.minor > vap->minor)
 595			ver.minor = vap->minor;
 596		err = send_version_ack(lp, &ver);
 597		if (!err) {
 598			lp->ver = ver;
 599			lp->hs_state = LDC_HS_GOTVERS;
 600		}
 601	}
 602	if (err)
 603		return ldc_abort(lp);
 604
 605	return 0;
 606}
 607
 608static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
 609{
 610	ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
 611	       vp->major, vp->minor);
 612
 613	if (lp->hs_state == LDC_HS_GOTVERS) {
 614		if (lp->ver.major != vp->major ||
 615		    lp->ver.minor != vp->minor)
 616			return ldc_abort(lp);
 617	} else {
 618		lp->ver = *vp;
 619		lp->hs_state = LDC_HS_GOTVERS;
 620	}
 621	if (send_rts(lp))
 622		return ldc_abort(lp);
 623	return 0;
 624}
 625
 626static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
 627{
 628	struct ldc_version *vap;
 629	struct ldc_packet *p;
 630	unsigned long new_tail;
 631
 632	if (vp->major == 0 && vp->minor == 0)
 633		return ldc_abort(lp);
 634
 635	vap = find_by_major(vp->major);
 636	if (!vap)
 637		return ldc_abort(lp);
 638
 639	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 640					   vap, sizeof(*vap),
 641					   &new_tail);
 642	if (!p)
 643		return ldc_abort(lp);
 644
 645	return send_tx_packet(lp, p, new_tail);
 646}
 647
 648static int process_version(struct ldc_channel *lp,
 649			   struct ldc_packet *p)
 650{
 651	struct ldc_version *vp;
 652
 653	vp = (struct ldc_version *) p->u.u_data;
 654
 655	switch (p->stype) {
 656	case LDC_INFO:
 657		return process_ver_info(lp, vp);
 658
 659	case LDC_ACK:
 660		return process_ver_ack(lp, vp);
 661
 662	case LDC_NACK:
 663		return process_ver_nack(lp, vp);
 664
 665	default:
 666		return ldc_abort(lp);
 667	}
 668}
 669
 670static int process_rts(struct ldc_channel *lp,
 671		       struct ldc_packet *p)
 672{
 673	ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
 674	       p->stype, p->seqid, p->env);
 675
 676	if (p->stype     != LDC_INFO	   ||
 677	    lp->hs_state != LDC_HS_GOTVERS ||
 678	    p->env       != lp->cfg.mode)
 679		return ldc_abort(lp);
 680
 681	lp->snd_nxt = p->seqid;
 682	lp->rcv_nxt = p->seqid;
 683	lp->hs_state = LDC_HS_SENTRTR;
 684	if (send_rtr(lp))
 685		return ldc_abort(lp);
 686
 687	return 0;
 688}
 689
 690static int process_rtr(struct ldc_channel *lp,
 691		       struct ldc_packet *p)
 692{
 693	ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
 694	       p->stype, p->seqid, p->env);
 695
 696	if (p->stype     != LDC_INFO ||
 697	    p->env       != lp->cfg.mode)
 698		return ldc_abort(lp);
 699
 700	lp->snd_nxt = p->seqid;
 701	lp->hs_state = LDC_HS_COMPLETE;
 702	ldc_set_state(lp, LDC_STATE_CONNECTED);
 703	send_rdx(lp);
 704
 705	return LDC_EVENT_UP;
 706}
 707
 708static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
 709{
 710	return lp->rcv_nxt + 1 == seqid;
 711}
 712
 713static int process_rdx(struct ldc_channel *lp,
 714		       struct ldc_packet *p)
 715{
 716	ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
 717	       p->stype, p->seqid, p->env, p->u.r.ackid);
 718
 719	if (p->stype != LDC_INFO ||
 720	    !(rx_seq_ok(lp, p->seqid)))
 721		return ldc_abort(lp);
 722
 723	lp->rcv_nxt = p->seqid;
 724
 725	lp->hs_state = LDC_HS_COMPLETE;
 726	ldc_set_state(lp, LDC_STATE_CONNECTED);
 727
 728	return LDC_EVENT_UP;
 729}
 730
 731static int process_control_frame(struct ldc_channel *lp,
 732				 struct ldc_packet *p)
 733{
 734	switch (p->ctrl) {
 735	case LDC_VERS:
 736		return process_version(lp, p);
 737
 738	case LDC_RTS:
 739		return process_rts(lp, p);
 740
 741	case LDC_RTR:
 742		return process_rtr(lp, p);
 743
 744	case LDC_RDX:
 745		return process_rdx(lp, p);
 746
 747	default:
 748		return ldc_abort(lp);
 749	}
 750}
 751
 752static int process_error_frame(struct ldc_channel *lp,
 753			       struct ldc_packet *p)
 754{
 755	return ldc_abort(lp);
 756}
 757
 758static int process_data_ack(struct ldc_channel *lp,
 759			    struct ldc_packet *ack)
 760{
 761	unsigned long head = lp->tx_acked;
 762	u32 ackid = ack->u.r.ackid;
 763
 764	while (1) {
 765		struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
 766
 767		head = tx_advance(lp, head);
 768
 769		if (p->seqid == ackid) {
 770			lp->tx_acked = head;
 771			return 0;
 772		}
 773		if (head == lp->tx_tail)
 774			return ldc_abort(lp);
 775	}
 776
 777	return 0;
 778}
 779
 780static void send_events(struct ldc_channel *lp, unsigned int event_mask)
 781{
 782	if (event_mask & LDC_EVENT_RESET)
 783		lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
 784	if (event_mask & LDC_EVENT_UP)
 785		lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
 786	if (event_mask & LDC_EVENT_DATA_READY)
 787		lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
 788}
 789
 790static irqreturn_t ldc_rx(int irq, void *dev_id)
 791{
 792	struct ldc_channel *lp = dev_id;
 793	unsigned long orig_state, flags;
 794	unsigned int event_mask;
 795
 796	spin_lock_irqsave(&lp->lock, flags);
 797
 798	orig_state = lp->chan_state;
 799
 800	/* We should probably check for hypervisor errors here and
 801	 * reset the LDC channel if we get one.
 802	 */
 803	sun4v_ldc_rx_get_state(lp->id,
 804			       &lp->rx_head,
 805			       &lp->rx_tail,
 806			       &lp->chan_state);
 807
 808	ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 809	       orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
 810
 811	event_mask = 0;
 812
 813	if (lp->cfg.mode == LDC_MODE_RAW &&
 814	    lp->chan_state == LDC_CHANNEL_UP) {
 815		lp->hs_state = LDC_HS_COMPLETE;
 816		ldc_set_state(lp, LDC_STATE_CONNECTED);
 817
 818		event_mask |= LDC_EVENT_UP;
 819
 820		orig_state = lp->chan_state;
 
 
 
 
 
 821	}
 822
 823	/* If we are in reset state, flush the RX queue and ignore
 824	 * everything.
 825	 */
 826	if (lp->flags & LDC_FLAG_RESET) {
 827		(void) __set_rx_head(lp, lp->rx_tail);
 828		goto out;
 829	}
 830
 831	/* Once we finish the handshake, we let the ldc_read()
 832	 * paths do all of the control frame and state management.
 833	 * Just trigger the callback.
 834	 */
 835	if (lp->hs_state == LDC_HS_COMPLETE) {
 836handshake_complete:
 837		if (lp->chan_state != orig_state) {
 838			unsigned int event = LDC_EVENT_RESET;
 839
 840			if (lp->chan_state == LDC_CHANNEL_UP)
 841				event = LDC_EVENT_UP;
 842
 843			event_mask |= event;
 844		}
 845		if (lp->rx_head != lp->rx_tail)
 846			event_mask |= LDC_EVENT_DATA_READY;
 847
 848		goto out;
 849	}
 850
 851	if (lp->chan_state != orig_state)
 852		goto out;
 853
 854	while (lp->rx_head != lp->rx_tail) {
 855		struct ldc_packet *p;
 856		unsigned long new;
 857		int err;
 858
 859		p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
 860
 861		switch (p->type) {
 862		case LDC_CTRL:
 863			err = process_control_frame(lp, p);
 864			if (err > 0)
 865				event_mask |= err;
 866			break;
 867
 868		case LDC_DATA:
 869			event_mask |= LDC_EVENT_DATA_READY;
 870			err = 0;
 871			break;
 872
 873		case LDC_ERR:
 874			err = process_error_frame(lp, p);
 875			break;
 876
 877		default:
 878			err = ldc_abort(lp);
 879			break;
 880		}
 881
 882		if (err < 0)
 883			break;
 884
 885		new = lp->rx_head;
 886		new += LDC_PACKET_SIZE;
 887		if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
 888			new = 0;
 889		lp->rx_head = new;
 890
 891		err = __set_rx_head(lp, new);
 892		if (err < 0) {
 893			(void) ldc_abort(lp);
 894			break;
 895		}
 896		if (lp->hs_state == LDC_HS_COMPLETE)
 897			goto handshake_complete;
 898	}
 899
 900out:
 901	spin_unlock_irqrestore(&lp->lock, flags);
 902
 903	send_events(lp, event_mask);
 904
 905	return IRQ_HANDLED;
 906}
 907
 908static irqreturn_t ldc_tx(int irq, void *dev_id)
 909{
 910	struct ldc_channel *lp = dev_id;
 911	unsigned long flags, orig_state;
 912	unsigned int event_mask = 0;
 913
 914	spin_lock_irqsave(&lp->lock, flags);
 915
 916	orig_state = lp->chan_state;
 917
 918	/* We should probably check for hypervisor errors here and
 919	 * reset the LDC channel if we get one.
 920	 */
 921	sun4v_ldc_tx_get_state(lp->id,
 922			       &lp->tx_head,
 923			       &lp->tx_tail,
 924			       &lp->chan_state);
 925
 926	ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 927	       orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
 928
 929	if (lp->cfg.mode == LDC_MODE_RAW &&
 930	    lp->chan_state == LDC_CHANNEL_UP) {
 931		lp->hs_state = LDC_HS_COMPLETE;
 932		ldc_set_state(lp, LDC_STATE_CONNECTED);
 933
 934		event_mask |= LDC_EVENT_UP;
 
 
 
 
 
 
 
 935	}
 936
 937	spin_unlock_irqrestore(&lp->lock, flags);
 938
 939	send_events(lp, event_mask);
 940
 941	return IRQ_HANDLED;
 942}
 943
 944/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
 945 * XXX that addition and removal from the ldc_channel_list has
 946 * XXX atomicity, otherwise the __ldc_channel_exists() check is
 947 * XXX totally pointless as another thread can slip into ldc_alloc()
 948 * XXX and add a channel with the same ID.  There also needs to be
 949 * XXX a spinlock for ldc_channel_list.
 950 */
 951static HLIST_HEAD(ldc_channel_list);
 952
 953static int __ldc_channel_exists(unsigned long id)
 954{
 955	struct ldc_channel *lp;
 956
 957	hlist_for_each_entry(lp, &ldc_channel_list, list) {
 958		if (lp->id == id)
 959			return 1;
 960	}
 961	return 0;
 962}
 963
 964static int alloc_queue(const char *name, unsigned long num_entries,
 965		       struct ldc_packet **base, unsigned long *ra)
 966{
 967	unsigned long size, order;
 968	void *q;
 969
 970	size = num_entries * LDC_PACKET_SIZE;
 971	order = get_order(size);
 972
 973	q = (void *) __get_free_pages(GFP_KERNEL, order);
 974	if (!q) {
 975		printk(KERN_ERR PFX "Alloc of %s queue failed with "
 976		       "size=%lu order=%lu\n", name, size, order);
 977		return -ENOMEM;
 978	}
 979
 980	memset(q, 0, PAGE_SIZE << order);
 981
 982	*base = q;
 983	*ra = __pa(q);
 984
 985	return 0;
 986}
 987
 988static void free_queue(unsigned long num_entries, struct ldc_packet *q)
 989{
 990	unsigned long size, order;
 991
 992	if (!q)
 993		return;
 994
 995	size = num_entries * LDC_PACKET_SIZE;
 996	order = get_order(size);
 997
 998	free_pages((unsigned long)q, order);
 999}
1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1001/* XXX Make this configurable... XXX */
1002#define LDC_IOTABLE_SIZE	(8 * 1024)
1003
1004static int ldc_iommu_init(struct ldc_channel *lp)
1005{
1006	unsigned long sz, num_tsb_entries, tsbsize, order;
1007	struct ldc_iommu *iommu = &lp->iommu;
 
1008	struct ldc_mtable_entry *table;
1009	unsigned long hv_err;
1010	int err;
1011
1012	num_tsb_entries = LDC_IOTABLE_SIZE;
1013	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1014
1015	spin_lock_init(&iommu->lock);
1016
1017	sz = num_tsb_entries / 8;
1018	sz = (sz + 7UL) & ~7UL;
1019	iommu->arena.map = kzalloc(sz, GFP_KERNEL);
1020	if (!iommu->arena.map) {
1021		printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1022		return -ENOMEM;
1023	}
1024
1025	iommu->arena.limit = num_tsb_entries;
 
 
1026
1027	order = get_order(tsbsize);
1028
1029	table = (struct ldc_mtable_entry *)
1030		__get_free_pages(GFP_KERNEL, order);
1031	err = -ENOMEM;
1032	if (!table) {
1033		printk(KERN_ERR PFX "Alloc of MTE table failed, "
1034		       "size=%lu order=%lu\n", tsbsize, order);
1035		goto out_free_map;
1036	}
1037
1038	memset(table, 0, PAGE_SIZE << order);
1039
1040	iommu->page_table = table;
1041
1042	hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1043					 num_tsb_entries);
1044	err = -EINVAL;
1045	if (hv_err)
1046		goto out_free_table;
1047
1048	return 0;
1049
1050out_free_table:
1051	free_pages((unsigned long) table, order);
1052	iommu->page_table = NULL;
1053
1054out_free_map:
1055	kfree(iommu->arena.map);
1056	iommu->arena.map = NULL;
1057
1058	return err;
1059}
1060
1061static void ldc_iommu_release(struct ldc_channel *lp)
1062{
1063	struct ldc_iommu *iommu = &lp->iommu;
 
1064	unsigned long num_tsb_entries, tsbsize, order;
1065
1066	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1067
1068	num_tsb_entries = iommu->arena.limit;
1069	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1070	order = get_order(tsbsize);
1071
1072	free_pages((unsigned long) iommu->page_table, order);
1073	iommu->page_table = NULL;
1074
1075	kfree(iommu->arena.map);
1076	iommu->arena.map = NULL;
1077}
1078
1079struct ldc_channel *ldc_alloc(unsigned long id,
1080			      const struct ldc_channel_config *cfgp,
1081			      void *event_arg)
 
1082{
1083	struct ldc_channel *lp;
1084	const struct ldc_mode_ops *mops;
1085	unsigned long dummy1, dummy2, hv_err;
1086	u8 mss, *mssbuf;
1087	int err;
1088
1089	err = -ENODEV;
1090	if (!ldom_domaining_enabled)
1091		goto out_err;
1092
1093	err = -EINVAL;
1094	if (!cfgp)
1095		goto out_err;
 
 
1096
1097	switch (cfgp->mode) {
1098	case LDC_MODE_RAW:
1099		mops = &raw_ops;
1100		mss = LDC_PACKET_SIZE;
1101		break;
1102
1103	case LDC_MODE_UNRELIABLE:
1104		mops = &nonraw_ops;
1105		mss = LDC_PACKET_SIZE - 8;
1106		break;
1107
1108	case LDC_MODE_STREAM:
1109		mops = &stream_ops;
1110		mss = LDC_PACKET_SIZE - 8 - 8;
1111		break;
1112
1113	default:
1114		goto out_err;
1115	}
1116
1117	if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1118		goto out_err;
1119
1120	hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1121	err = -ENODEV;
1122	if (hv_err == HV_ECHANNEL)
1123		goto out_err;
1124
1125	err = -EEXIST;
1126	if (__ldc_channel_exists(id))
1127		goto out_err;
1128
1129	mssbuf = NULL;
1130
1131	lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1132	err = -ENOMEM;
1133	if (!lp)
1134		goto out_err;
1135
1136	spin_lock_init(&lp->lock);
1137
1138	lp->id = id;
1139
1140	err = ldc_iommu_init(lp);
1141	if (err)
1142		goto out_free_ldc;
1143
1144	lp->mops = mops;
1145	lp->mss = mss;
1146
1147	lp->cfg = *cfgp;
1148	if (!lp->cfg.mtu)
1149		lp->cfg.mtu = LDC_DEFAULT_MTU;
1150
1151	if (lp->cfg.mode == LDC_MODE_STREAM) {
1152		mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1153		if (!mssbuf) {
1154			err = -ENOMEM;
1155			goto out_free_iommu;
1156		}
1157		lp->mssbuf = mssbuf;
1158	}
1159
1160	lp->event_arg = event_arg;
1161
1162	/* XXX allow setting via ldc_channel_config to override defaults
1163	 * XXX or use some formula based upon mtu
1164	 */
1165	lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1166	lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1167
1168	err = alloc_queue("TX", lp->tx_num_entries,
1169			  &lp->tx_base, &lp->tx_ra);
1170	if (err)
1171		goto out_free_mssbuf;
1172
1173	err = alloc_queue("RX", lp->rx_num_entries,
1174			  &lp->rx_base, &lp->rx_ra);
1175	if (err)
1176		goto out_free_txq;
1177
1178	lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1179
1180	lp->hs_state = LDC_HS_CLOSED;
1181	ldc_set_state(lp, LDC_STATE_INIT);
1182
1183	INIT_HLIST_NODE(&lp->list);
1184	hlist_add_head(&lp->list, &ldc_channel_list);
1185
1186	INIT_HLIST_HEAD(&lp->mh_list);
1187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1188	return lp;
1189
1190out_free_txq:
1191	free_queue(lp->tx_num_entries, lp->tx_base);
1192
1193out_free_mssbuf:
1194	kfree(mssbuf);
1195
1196out_free_iommu:
1197	ldc_iommu_release(lp);
1198
1199out_free_ldc:
1200	kfree(lp);
1201
1202out_err:
1203	return ERR_PTR(err);
1204}
1205EXPORT_SYMBOL(ldc_alloc);
1206
1207void ldc_free(struct ldc_channel *lp)
1208{
1209	if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1210		free_irq(lp->cfg.rx_irq, lp);
1211		free_irq(lp->cfg.tx_irq, lp);
 
1212	}
1213
1214	if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1215		sun4v_ldc_tx_qconf(lp->id, 0, 0);
1216		sun4v_ldc_rx_qconf(lp->id, 0, 0);
1217		lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1218	}
1219	if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1220		free_queue(lp->tx_num_entries, lp->tx_base);
1221		free_queue(lp->rx_num_entries, lp->rx_base);
1222		lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1223	}
1224
 
 
 
 
 
 
 
1225	hlist_del(&lp->list);
1226
1227	kfree(lp->mssbuf);
1228
1229	ldc_iommu_release(lp);
1230
1231	kfree(lp);
1232}
1233EXPORT_SYMBOL(ldc_free);
1234
1235/* Bind the channel.  This registers the LDC queues with
1236 * the hypervisor and puts the channel into a pseudo-listening
1237 * state.  This does not initiate a handshake, ldc_connect() does
1238 * that.
1239 */
1240int ldc_bind(struct ldc_channel *lp, const char *name)
1241{
1242	unsigned long hv_err, flags;
1243	int err = -EINVAL;
1244
1245	if (!name ||
1246	    (lp->state != LDC_STATE_INIT))
1247		return -EINVAL;
1248
1249	snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1250	snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1251
1252	err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
1253			  lp->rx_irq_name, lp);
1254	if (err)
1255		return err;
1256
1257	err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
1258			  lp->tx_irq_name, lp);
1259	if (err) {
1260		free_irq(lp->cfg.rx_irq, lp);
1261		return err;
1262	}
1263
1264
1265	spin_lock_irqsave(&lp->lock, flags);
1266
1267	enable_irq(lp->cfg.rx_irq);
1268	enable_irq(lp->cfg.tx_irq);
1269
1270	lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1271
1272	err = -ENODEV;
1273	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1274	if (hv_err)
1275		goto out_free_irqs;
1276
1277	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1278	if (hv_err)
1279		goto out_free_irqs;
1280
1281	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1282	if (hv_err)
1283		goto out_unmap_tx;
1284
1285	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1286	if (hv_err)
1287		goto out_unmap_tx;
1288
1289	lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1290
1291	hv_err = sun4v_ldc_tx_get_state(lp->id,
1292					&lp->tx_head,
1293					&lp->tx_tail,
1294					&lp->chan_state);
1295	err = -EBUSY;
1296	if (hv_err)
1297		goto out_unmap_rx;
1298
1299	lp->tx_acked = lp->tx_head;
1300
1301	lp->hs_state = LDC_HS_OPEN;
1302	ldc_set_state(lp, LDC_STATE_BOUND);
1303
 
 
 
 
 
 
 
 
1304	spin_unlock_irqrestore(&lp->lock, flags);
1305
1306	return 0;
1307
1308out_unmap_rx:
1309	lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1310	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1311
1312out_unmap_tx:
1313	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1314
1315out_free_irqs:
1316	lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1317	free_irq(lp->cfg.tx_irq, lp);
1318	free_irq(lp->cfg.rx_irq, lp);
1319
1320	spin_unlock_irqrestore(&lp->lock, flags);
1321
1322	return err;
1323}
1324EXPORT_SYMBOL(ldc_bind);
1325
1326int ldc_connect(struct ldc_channel *lp)
1327{
1328	unsigned long flags;
1329	int err;
1330
1331	if (lp->cfg.mode == LDC_MODE_RAW)
1332		return -EINVAL;
1333
1334	spin_lock_irqsave(&lp->lock, flags);
1335
1336	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1337	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1338	    lp->hs_state != LDC_HS_OPEN)
1339		err = -EINVAL;
1340	else
1341		err = start_handshake(lp);
1342
1343	spin_unlock_irqrestore(&lp->lock, flags);
1344
1345	return err;
1346}
1347EXPORT_SYMBOL(ldc_connect);
1348
1349int ldc_disconnect(struct ldc_channel *lp)
1350{
1351	unsigned long hv_err, flags;
1352	int err;
1353
1354	if (lp->cfg.mode == LDC_MODE_RAW)
1355		return -EINVAL;
1356
1357	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1358	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1359		return -EINVAL;
1360
1361	spin_lock_irqsave(&lp->lock, flags);
1362
1363	err = -ENODEV;
1364	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1365	if (hv_err)
1366		goto out_err;
1367
1368	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1369	if (hv_err)
1370		goto out_err;
1371
1372	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1373	if (hv_err)
1374		goto out_err;
1375
1376	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1377	if (hv_err)
1378		goto out_err;
1379
1380	ldc_set_state(lp, LDC_STATE_BOUND);
1381	lp->hs_state = LDC_HS_OPEN;
1382	lp->flags |= LDC_FLAG_RESET;
1383
1384	spin_unlock_irqrestore(&lp->lock, flags);
1385
1386	return 0;
1387
1388out_err:
1389	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1390	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1391	free_irq(lp->cfg.tx_irq, lp);
1392	free_irq(lp->cfg.rx_irq, lp);
1393	lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1394		       LDC_FLAG_REGISTERED_QUEUES);
1395	ldc_set_state(lp, LDC_STATE_INIT);
1396
1397	spin_unlock_irqrestore(&lp->lock, flags);
1398
1399	return err;
1400}
1401EXPORT_SYMBOL(ldc_disconnect);
1402
1403int ldc_state(struct ldc_channel *lp)
1404{
1405	return lp->state;
1406}
1407EXPORT_SYMBOL(ldc_state);
1408
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1409static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1410{
1411	struct ldc_packet *p;
1412	unsigned long new_tail;
1413	int err;
1414
 
 
 
 
 
 
 
 
1415	if (size > LDC_PACKET_SIZE)
1416		return -EMSGSIZE;
1417
1418	p = data_get_tx_packet(lp, &new_tail);
1419	if (!p)
1420		return -EAGAIN;
1421
1422	memcpy(p, buf, size);
1423
1424	err = send_tx_packet(lp, p, new_tail);
1425	if (!err)
1426		err = size;
1427
1428	return err;
1429}
1430
1431static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1432{
1433	struct ldc_packet *p;
1434	unsigned long hv_err, new;
1435	int err;
1436
1437	if (size < LDC_PACKET_SIZE)
1438		return -EINVAL;
1439
1440	hv_err = sun4v_ldc_rx_get_state(lp->id,
1441					&lp->rx_head,
1442					&lp->rx_tail,
1443					&lp->chan_state);
1444	if (hv_err)
1445		return ldc_abort(lp);
1446
1447	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1448	    lp->chan_state == LDC_CHANNEL_RESETTING)
1449		return -ECONNRESET;
1450
1451	if (lp->rx_head == lp->rx_tail)
1452		return 0;
1453
1454	p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1455	memcpy(buf, p, LDC_PACKET_SIZE);
1456
1457	new = rx_advance(lp, lp->rx_head);
1458	lp->rx_head = new;
1459
1460	err = __set_rx_head(lp, new);
1461	if (err < 0)
1462		err = -ECONNRESET;
1463	else
1464		err = LDC_PACKET_SIZE;
1465
1466	return err;
1467}
1468
1469static const struct ldc_mode_ops raw_ops = {
1470	.write		=	write_raw,
1471	.read		=	read_raw,
1472};
1473
1474static int write_nonraw(struct ldc_channel *lp, const void *buf,
1475			unsigned int size)
1476{
1477	unsigned long hv_err, tail;
1478	unsigned int copied;
1479	u32 seq;
1480	int err;
1481
1482	hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1483					&lp->chan_state);
1484	if (unlikely(hv_err))
1485		return -EBUSY;
1486
1487	if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1488		return ldc_abort(lp);
1489
1490	if (!tx_has_space_for(lp, size))
1491		return -EAGAIN;
1492
1493	seq = lp->snd_nxt;
1494	copied = 0;
1495	tail = lp->tx_tail;
1496	while (copied < size) {
1497		struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1498		u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1499			    p->u.u_data :
1500			    p->u.r.r_data);
1501		int data_len;
1502
1503		p->type = LDC_DATA;
1504		p->stype = LDC_INFO;
1505		p->ctrl = 0;
1506
1507		data_len = size - copied;
1508		if (data_len > lp->mss)
1509			data_len = lp->mss;
1510
1511		BUG_ON(data_len > LDC_LEN);
1512
1513		p->env = (data_len |
1514			  (copied == 0 ? LDC_START : 0) |
1515			  (data_len == size - copied ? LDC_STOP : 0));
1516
1517		p->seqid = ++seq;
1518
1519		ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1520		       p->type,
1521		       p->stype,
1522		       p->ctrl,
1523		       p->env,
1524		       p->seqid);
1525
1526		memcpy(data, buf, data_len);
1527		buf += data_len;
1528		copied += data_len;
1529
1530		tail = tx_advance(lp, tail);
1531	}
1532
1533	err = set_tx_tail(lp, tail);
1534	if (!err) {
1535		lp->snd_nxt = seq;
1536		err = size;
1537	}
1538
1539	return err;
1540}
1541
1542static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1543		      struct ldc_packet *first_frag)
1544{
1545	int err;
1546
1547	if (first_frag)
1548		lp->rcv_nxt = first_frag->seqid - 1;
1549
1550	err = send_data_nack(lp, p);
1551	if (err)
1552		return err;
1553
1554	err = __set_rx_head(lp, lp->rx_tail);
1555	if (err < 0)
1556		return ldc_abort(lp);
1557
1558	return 0;
1559}
1560
1561static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1562{
1563	if (p->stype & LDC_ACK) {
1564		int err = process_data_ack(lp, p);
1565		if (err)
1566			return err;
1567	}
1568	if (p->stype & LDC_NACK)
1569		return ldc_abort(lp);
1570
1571	return 0;
1572}
1573
1574static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1575{
1576	unsigned long dummy;
1577	int limit = 1000;
1578
1579	ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1580	       cur_head, lp->rx_head, lp->rx_tail);
1581	while (limit-- > 0) {
1582		unsigned long hv_err;
1583
1584		hv_err = sun4v_ldc_rx_get_state(lp->id,
1585						&dummy,
1586						&lp->rx_tail,
1587						&lp->chan_state);
1588		if (hv_err)
1589			return ldc_abort(lp);
1590
1591		if (lp->chan_state == LDC_CHANNEL_DOWN ||
1592		    lp->chan_state == LDC_CHANNEL_RESETTING)
1593			return -ECONNRESET;
1594
1595		if (cur_head != lp->rx_tail) {
1596			ldcdbg(DATA, "DATA WAIT DONE "
1597			       "head[%lx] tail[%lx] chan_state[%lx]\n",
1598			       dummy, lp->rx_tail, lp->chan_state);
1599			return 0;
1600		}
1601
1602		udelay(1);
1603	}
1604	return -EAGAIN;
1605}
1606
1607static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1608{
1609	int err = __set_rx_head(lp, head);
1610
1611	if (err < 0)
1612		return ldc_abort(lp);
1613
1614	lp->rx_head = head;
1615	return 0;
1616}
1617
1618static void send_data_ack(struct ldc_channel *lp)
1619{
1620	unsigned long new_tail;
1621	struct ldc_packet *p;
1622
1623	p = data_get_tx_packet(lp, &new_tail);
1624	if (likely(p)) {
1625		int err;
1626
1627		memset(p, 0, sizeof(*p));
1628		p->type = LDC_DATA;
1629		p->stype = LDC_ACK;
1630		p->ctrl = 0;
1631		p->seqid = lp->snd_nxt + 1;
1632		p->u.r.ackid = lp->rcv_nxt;
1633
1634		err = send_tx_packet(lp, p, new_tail);
1635		if (!err)
1636			lp->snd_nxt++;
1637	}
1638}
1639
1640static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1641{
1642	struct ldc_packet *first_frag;
1643	unsigned long hv_err, new;
1644	int err, copied;
1645
1646	hv_err = sun4v_ldc_rx_get_state(lp->id,
1647					&lp->rx_head,
1648					&lp->rx_tail,
1649					&lp->chan_state);
1650	if (hv_err)
1651		return ldc_abort(lp);
1652
1653	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1654	    lp->chan_state == LDC_CHANNEL_RESETTING)
1655		return -ECONNRESET;
1656
1657	if (lp->rx_head == lp->rx_tail)
1658		return 0;
1659
1660	first_frag = NULL;
1661	copied = err = 0;
1662	new = lp->rx_head;
1663	while (1) {
1664		struct ldc_packet *p;
1665		int pkt_len;
1666
1667		BUG_ON(new == lp->rx_tail);
1668		p = lp->rx_base + (new / LDC_PACKET_SIZE);
1669
1670		ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1671		       "rcv_nxt[%08x]\n",
1672		       p->type,
1673		       p->stype,
1674		       p->ctrl,
1675		       p->env,
1676		       p->seqid,
1677		       p->u.r.ackid,
1678		       lp->rcv_nxt);
1679
1680		if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1681			err = rx_bad_seq(lp, p, first_frag);
1682			copied = 0;
1683			break;
1684		}
1685
1686		if (p->type & LDC_CTRL) {
1687			err = process_control_frame(lp, p);
1688			if (err < 0)
1689				break;
1690			err = 0;
1691		}
1692
1693		lp->rcv_nxt = p->seqid;
1694
 
 
 
 
 
1695		if (!(p->type & LDC_DATA)) {
1696			new = rx_advance(lp, new);
1697			goto no_data;
1698		}
1699		if (p->stype & (LDC_ACK | LDC_NACK)) {
1700			err = data_ack_nack(lp, p);
1701			if (err)
1702				break;
1703		}
1704		if (!(p->stype & LDC_INFO)) {
1705			new = rx_advance(lp, new);
1706			err = rx_set_head(lp, new);
1707			if (err)
1708				break;
1709			goto no_data;
1710		}
1711
1712		pkt_len = p->env & LDC_LEN;
1713
1714		/* Every initial packet starts with the START bit set.
1715		 *
1716		 * Singleton packets will have both START+STOP set.
1717		 *
1718		 * Fragments will have START set in the first frame, STOP
1719		 * set in the last frame, and neither bit set in middle
1720		 * frames of the packet.
1721		 *
1722		 * Therefore if we are at the beginning of a packet and
1723		 * we don't see START, or we are in the middle of a fragmented
1724		 * packet and do see START, we are unsynchronized and should
1725		 * flush the RX queue.
1726		 */
1727		if ((first_frag == NULL && !(p->env & LDC_START)) ||
1728		    (first_frag != NULL &&  (p->env & LDC_START))) {
1729			if (!first_frag)
1730				new = rx_advance(lp, new);
1731
1732			err = rx_set_head(lp, new);
1733			if (err)
1734				break;
1735
1736			if (!first_frag)
1737				goto no_data;
1738		}
1739		if (!first_frag)
1740			first_frag = p;
1741
1742		if (pkt_len > size - copied) {
1743			/* User didn't give us a big enough buffer,
1744			 * what to do?  This is a pretty serious error.
1745			 *
1746			 * Since we haven't updated the RX ring head to
1747			 * consume any of the packets, signal the error
1748			 * to the user and just leave the RX ring alone.
1749			 *
1750			 * This seems the best behavior because this allows
1751			 * a user of the LDC layer to start with a small
1752			 * RX buffer for ldc_read() calls and use -EMSGSIZE
1753			 * as a cue to enlarge it's read buffer.
1754			 */
1755			err = -EMSGSIZE;
1756			break;
1757		}
1758
1759		/* Ok, we are gonna eat this one.  */
1760		new = rx_advance(lp, new);
1761
1762		memcpy(buf,
1763		       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1764			p->u.u_data : p->u.r.r_data), pkt_len);
1765		buf += pkt_len;
1766		copied += pkt_len;
1767
1768		if (p->env & LDC_STOP)
1769			break;
1770
1771no_data:
1772		if (new == lp->rx_tail) {
1773			err = rx_data_wait(lp, new);
1774			if (err)
1775				break;
1776		}
1777	}
1778
1779	if (!err)
1780		err = rx_set_head(lp, new);
1781
1782	if (err && first_frag)
1783		lp->rcv_nxt = first_frag->seqid - 1;
1784
1785	if (!err) {
1786		err = copied;
1787		if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1788			send_data_ack(lp);
1789	}
1790
1791	return err;
1792}
1793
1794static const struct ldc_mode_ops nonraw_ops = {
1795	.write		=	write_nonraw,
1796	.read		=	read_nonraw,
1797};
1798
1799static int write_stream(struct ldc_channel *lp, const void *buf,
1800			unsigned int size)
1801{
1802	if (size > lp->cfg.mtu)
1803		size = lp->cfg.mtu;
1804	return write_nonraw(lp, buf, size);
1805}
1806
1807static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1808{
1809	if (!lp->mssbuf_len) {
1810		int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1811		if (err < 0)
1812			return err;
1813
1814		lp->mssbuf_len = err;
1815		lp->mssbuf_off = 0;
1816	}
1817
1818	if (size > lp->mssbuf_len)
1819		size = lp->mssbuf_len;
1820	memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1821
1822	lp->mssbuf_off += size;
1823	lp->mssbuf_len -= size;
1824
1825	return size;
1826}
1827
1828static const struct ldc_mode_ops stream_ops = {
1829	.write		=	write_stream,
1830	.read		=	read_stream,
1831};
1832
1833int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1834{
1835	unsigned long flags;
1836	int err;
1837
1838	if (!buf)
1839		return -EINVAL;
1840
1841	if (!size)
1842		return 0;
1843
1844	spin_lock_irqsave(&lp->lock, flags);
1845
1846	if (lp->hs_state != LDC_HS_COMPLETE)
1847		err = -ENOTCONN;
1848	else
1849		err = lp->mops->write(lp, buf, size);
1850
1851	spin_unlock_irqrestore(&lp->lock, flags);
1852
1853	return err;
1854}
1855EXPORT_SYMBOL(ldc_write);
1856
1857int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1858{
1859	unsigned long flags;
1860	int err;
1861
 
 
1862	if (!buf)
1863		return -EINVAL;
1864
1865	if (!size)
1866		return 0;
1867
1868	spin_lock_irqsave(&lp->lock, flags);
1869
1870	if (lp->hs_state != LDC_HS_COMPLETE)
1871		err = -ENOTCONN;
1872	else
1873		err = lp->mops->read(lp, buf, size);
1874
1875	spin_unlock_irqrestore(&lp->lock, flags);
1876
 
 
 
1877	return err;
1878}
1879EXPORT_SYMBOL(ldc_read);
1880
1881static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
1882{
1883	struct iommu_arena *arena = &iommu->arena;
1884	unsigned long n, start, end, limit;
1885	int pass;
1886
1887	limit = arena->limit;
1888	start = arena->hint;
1889	pass = 0;
1890
1891again:
1892	n = bitmap_find_next_zero_area(arena->map, limit, start, npages, 0);
1893	end = n + npages;
1894	if (unlikely(end >= limit)) {
1895		if (likely(pass < 1)) {
1896			limit = start;
1897			start = 0;
1898			pass++;
1899			goto again;
1900		} else {
1901			/* Scanned the whole thing, give up. */
1902			return -1;
1903		}
1904	}
1905	bitmap_set(arena->map, n, npages);
1906
1907	arena->hint = end;
1908
1909	return n;
1910}
1911
1912#define COOKIE_PGSZ_CODE	0xf000000000000000ULL
1913#define COOKIE_PGSZ_CODE_SHIFT	60ULL
1914
1915static u64 pagesize_code(void)
1916{
1917	switch (PAGE_SIZE) {
1918	default:
1919	case (8ULL * 1024ULL):
1920		return 0;
1921	case (64ULL * 1024ULL):
1922		return 1;
1923	case (512ULL * 1024ULL):
1924		return 2;
1925	case (4ULL * 1024ULL * 1024ULL):
1926		return 3;
1927	case (32ULL * 1024ULL * 1024ULL):
1928		return 4;
1929	case (256ULL * 1024ULL * 1024ULL):
1930		return 5;
1931	}
1932}
1933
1934static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1935{
1936	return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1937		(index << PAGE_SHIFT) |
1938		page_offset);
1939}
1940
1941static u64 cookie_to_index(u64 cookie, unsigned long *shift)
1942{
1943	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1944
1945	cookie &= ~COOKIE_PGSZ_CODE;
1946
1947	*shift = szcode * 3;
1948
1949	return (cookie >> (13ULL + (szcode * 3ULL)));
1950}
1951
1952static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1953					     unsigned long npages)
1954{
1955	long entry;
1956
1957	entry = arena_alloc(iommu, npages);
1958	if (unlikely(entry < 0))
 
1959		return NULL;
1960
1961	return iommu->page_table + entry;
1962}
1963
1964static u64 perm_to_mte(unsigned int map_perm)
1965{
1966	u64 mte_base;
1967
1968	mte_base = pagesize_code();
1969
1970	if (map_perm & LDC_MAP_SHADOW) {
1971		if (map_perm & LDC_MAP_R)
1972			mte_base |= LDC_MTE_COPY_R;
1973		if (map_perm & LDC_MAP_W)
1974			mte_base |= LDC_MTE_COPY_W;
1975	}
1976	if (map_perm & LDC_MAP_DIRECT) {
1977		if (map_perm & LDC_MAP_R)
1978			mte_base |= LDC_MTE_READ;
1979		if (map_perm & LDC_MAP_W)
1980			mte_base |= LDC_MTE_WRITE;
1981		if (map_perm & LDC_MAP_X)
1982			mte_base |= LDC_MTE_EXEC;
1983	}
1984	if (map_perm & LDC_MAP_IO) {
1985		if (map_perm & LDC_MAP_R)
1986			mte_base |= LDC_MTE_IOMMU_R;
1987		if (map_perm & LDC_MAP_W)
1988			mte_base |= LDC_MTE_IOMMU_W;
1989	}
1990
1991	return mte_base;
1992}
1993
1994static int pages_in_region(unsigned long base, long len)
1995{
1996	int count = 0;
1997
1998	do {
1999		unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2000
2001		len -= (new - base);
2002		base = new;
2003		count++;
2004	} while (len > 0);
2005
2006	return count;
2007}
2008
2009struct cookie_state {
2010	struct ldc_mtable_entry		*page_table;
2011	struct ldc_trans_cookie		*cookies;
2012	u64				mte_base;
2013	u64				prev_cookie;
2014	u32				pte_idx;
2015	u32				nc;
2016};
2017
2018static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2019			 unsigned long off, unsigned long len)
2020{
2021	do {
2022		unsigned long tlen, new = pa + PAGE_SIZE;
2023		u64 this_cookie;
2024
2025		sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2026
2027		tlen = PAGE_SIZE;
2028		if (off)
2029			tlen = PAGE_SIZE - off;
2030		if (tlen > len)
2031			tlen = len;
2032
2033		this_cookie = make_cookie(sp->pte_idx,
2034					  pagesize_code(), off);
2035
2036		off = 0;
2037
2038		if (this_cookie == sp->prev_cookie) {
2039			sp->cookies[sp->nc - 1].cookie_size += tlen;
2040		} else {
2041			sp->cookies[sp->nc].cookie_addr = this_cookie;
2042			sp->cookies[sp->nc].cookie_size = tlen;
2043			sp->nc++;
2044		}
2045		sp->prev_cookie = this_cookie + tlen;
2046
2047		sp->pte_idx++;
2048
2049		len -= tlen;
2050		pa = new;
2051	} while (len > 0);
2052}
2053
2054static int sg_count_one(struct scatterlist *sg)
2055{
2056	unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2057	long len = sg->length;
2058
2059	if ((sg->offset | len) & (8UL - 1))
2060		return -EFAULT;
2061
2062	return pages_in_region(base + sg->offset, len);
2063}
2064
2065static int sg_count_pages(struct scatterlist *sg, int num_sg)
2066{
2067	int count;
2068	int i;
2069
2070	count = 0;
2071	for (i = 0; i < num_sg; i++) {
2072		int err = sg_count_one(sg + i);
2073		if (err < 0)
2074			return err;
2075		count += err;
2076	}
2077
2078	return count;
2079}
2080
2081int ldc_map_sg(struct ldc_channel *lp,
2082	       struct scatterlist *sg, int num_sg,
2083	       struct ldc_trans_cookie *cookies, int ncookies,
2084	       unsigned int map_perm)
2085{
2086	unsigned long i, npages, flags;
2087	struct ldc_mtable_entry *base;
2088	struct cookie_state state;
2089	struct ldc_iommu *iommu;
2090	int err;
 
2091
2092	if (map_perm & ~LDC_MAP_ALL)
2093		return -EINVAL;
2094
2095	err = sg_count_pages(sg, num_sg);
2096	if (err < 0)
2097		return err;
2098
2099	npages = err;
2100	if (err > ncookies)
2101		return -EMSGSIZE;
2102
2103	iommu = &lp->iommu;
2104
2105	spin_lock_irqsave(&iommu->lock, flags);
2106	base = alloc_npages(iommu, npages);
2107	spin_unlock_irqrestore(&iommu->lock, flags);
2108
2109	if (!base)
2110		return -ENOMEM;
2111
2112	state.page_table = iommu->page_table;
2113	state.cookies = cookies;
2114	state.mte_base = perm_to_mte(map_perm);
2115	state.prev_cookie = ~(u64)0;
2116	state.pte_idx = (base - iommu->page_table);
2117	state.nc = 0;
2118
2119	for (i = 0; i < num_sg; i++)
2120		fill_cookies(&state, page_to_pfn(sg_page(&sg[i])) << PAGE_SHIFT,
2121			     sg[i].offset, sg[i].length);
 
2122
2123	return state.nc;
2124}
2125EXPORT_SYMBOL(ldc_map_sg);
2126
2127int ldc_map_single(struct ldc_channel *lp,
2128		   void *buf, unsigned int len,
2129		   struct ldc_trans_cookie *cookies, int ncookies,
2130		   unsigned int map_perm)
2131{
2132	unsigned long npages, pa, flags;
2133	struct ldc_mtable_entry *base;
2134	struct cookie_state state;
2135	struct ldc_iommu *iommu;
2136
2137	if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2138		return -EINVAL;
2139
2140	pa = __pa(buf);
2141	if ((pa | len) & (8UL - 1))
2142		return -EFAULT;
2143
2144	npages = pages_in_region(pa, len);
2145
2146	iommu = &lp->iommu;
2147
2148	spin_lock_irqsave(&iommu->lock, flags);
2149	base = alloc_npages(iommu, npages);
2150	spin_unlock_irqrestore(&iommu->lock, flags);
2151
2152	if (!base)
2153		return -ENOMEM;
2154
2155	state.page_table = iommu->page_table;
2156	state.cookies = cookies;
2157	state.mte_base = perm_to_mte(map_perm);
2158	state.prev_cookie = ~(u64)0;
2159	state.pte_idx = (base - iommu->page_table);
2160	state.nc = 0;
2161	fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2162	BUG_ON(state.nc != 1);
2163
2164	return state.nc;
2165}
2166EXPORT_SYMBOL(ldc_map_single);
2167
 
2168static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2169			u64 cookie, u64 size)
2170{
2171	struct iommu_arena *arena = &iommu->arena;
2172	unsigned long i, shift, index, npages;
2173	struct ldc_mtable_entry *base;
2174
2175	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2176	index = cookie_to_index(cookie, &shift);
2177	base = iommu->page_table + index;
2178
2179	BUG_ON(index > arena->limit ||
2180	       (index + npages) > arena->limit);
2181
2182	for (i = 0; i < npages; i++) {
2183		if (base->cookie)
2184			sun4v_ldc_revoke(id, cookie + (i << shift),
2185					 base->cookie);
2186		base->mte = 0;
2187		__clear_bit(index + i, arena->map);
2188	}
2189}
2190
2191void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2192	       int ncookies)
2193{
2194	struct ldc_iommu *iommu = &lp->iommu;
 
2195	unsigned long flags;
2196	int i;
2197
2198	spin_lock_irqsave(&iommu->lock, flags);
2199	for (i = 0; i < ncookies; i++) {
2200		u64 addr = cookies[i].cookie_addr;
2201		u64 size = cookies[i].cookie_size;
2202
2203		free_npages(lp->id, iommu, addr, size);
2204	}
2205	spin_unlock_irqrestore(&iommu->lock, flags);
2206}
2207EXPORT_SYMBOL(ldc_unmap);
2208
2209int ldc_copy(struct ldc_channel *lp, int copy_dir,
2210	     void *buf, unsigned int len, unsigned long offset,
2211	     struct ldc_trans_cookie *cookies, int ncookies)
2212{
2213	unsigned int orig_len;
2214	unsigned long ra;
2215	int i;
2216
2217	if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2218		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2219		       lp->id, copy_dir);
2220		return -EINVAL;
2221	}
2222
2223	ra = __pa(buf);
2224	if ((ra | len | offset) & (8UL - 1)) {
2225		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2226		       "ra[%lx] len[%x] offset[%lx]\n",
2227		       lp->id, ra, len, offset);
2228		return -EFAULT;
2229	}
2230
2231	if (lp->hs_state != LDC_HS_COMPLETE ||
2232	    (lp->flags & LDC_FLAG_RESET)) {
2233		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2234		       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2235		return -ECONNRESET;
2236	}
2237
2238	orig_len = len;
2239	for (i = 0; i < ncookies; i++) {
2240		unsigned long cookie_raddr = cookies[i].cookie_addr;
2241		unsigned long this_len = cookies[i].cookie_size;
2242		unsigned long actual_len;
2243
2244		if (unlikely(offset)) {
2245			unsigned long this_off = offset;
2246
2247			if (this_off > this_len)
2248				this_off = this_len;
2249
2250			offset -= this_off;
2251			this_len -= this_off;
2252			if (!this_len)
2253				continue;
2254			cookie_raddr += this_off;
2255		}
2256
2257		if (this_len > len)
2258			this_len = len;
2259
2260		while (1) {
2261			unsigned long hv_err;
2262
2263			hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2264						cookie_raddr, ra,
2265						this_len, &actual_len);
2266			if (unlikely(hv_err)) {
2267				printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2268				       "HV error %lu\n",
2269				       lp->id, hv_err);
2270				if (lp->hs_state != LDC_HS_COMPLETE ||
2271				    (lp->flags & LDC_FLAG_RESET))
2272					return -ECONNRESET;
2273				else
2274					return -EFAULT;
2275			}
2276
2277			cookie_raddr += actual_len;
2278			ra += actual_len;
2279			len -= actual_len;
2280			if (actual_len == this_len)
2281				break;
2282
2283			this_len -= actual_len;
2284		}
2285
2286		if (!len)
2287			break;
2288	}
2289
2290	/* It is caller policy what to do about short copies.
2291	 * For example, a networking driver can declare the
2292	 * packet a runt and drop it.
2293	 */
2294
2295	return orig_len - len;
2296}
2297EXPORT_SYMBOL(ldc_copy);
2298
2299void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2300			  struct ldc_trans_cookie *cookies, int *ncookies,
2301			  unsigned int map_perm)
2302{
2303	void *buf;
2304	int err;
2305
2306	if (len & (8UL - 1))
2307		return ERR_PTR(-EINVAL);
2308
2309	buf = kzalloc(len, GFP_KERNEL);
2310	if (!buf)
2311		return ERR_PTR(-ENOMEM);
2312
2313	err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2314	if (err < 0) {
2315		kfree(buf);
2316		return ERR_PTR(err);
2317	}
2318	*ncookies = err;
2319
2320	return buf;
2321}
2322EXPORT_SYMBOL(ldc_alloc_exp_dring);
2323
2324void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2325			struct ldc_trans_cookie *cookies, int ncookies)
2326{
2327	ldc_unmap(lp, cookies, ncookies);
2328	kfree(buf);
2329}
2330EXPORT_SYMBOL(ldc_free_exp_dring);
2331
2332static int __init ldc_init(void)
2333{
2334	unsigned long major, minor;
2335	struct mdesc_handle *hp;
2336	const u64 *v;
2337	int err;
2338	u64 mp;
2339
2340	hp = mdesc_grab();
2341	if (!hp)
2342		return -ENODEV;
2343
2344	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2345	err = -ENODEV;
2346	if (mp == MDESC_NODE_NULL)
2347		goto out;
2348
2349	v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2350	if (!v)
2351		goto out;
2352
2353	major = 1;
2354	minor = 0;
2355	if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2356		printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2357		goto out;
2358	}
2359
2360	printk(KERN_INFO "%s", version);
2361
2362	if (!*v) {
2363		printk(KERN_INFO PFX "Domaining disabled.\n");
2364		goto out;
2365	}
2366	ldom_domaining_enabled = 1;
2367	err = 0;
2368
2369out:
2370	mdesc_release(hp);
2371	return err;
2372}
2373
2374core_initcall(ldc_init);