Linux Audio

Check our new training course

Loading...
v3.1
   1/* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
   2/*
   3 * aoecmd.c
   4 * Filesystem request handling methods
   5 */
   6
   7#include <linux/ata.h>
   8#include <linux/slab.h>
   9#include <linux/hdreg.h>
  10#include <linux/blkdev.h>
  11#include <linux/skbuff.h>
  12#include <linux/netdevice.h>
  13#include <linux/genhd.h>
  14#include <linux/moduleparam.h>
 
 
  15#include <net/net_namespace.h>
  16#include <asm/unaligned.h>
 
  17#include "aoe.h"
  18
 
 
 
 
 
 
 
  19static int aoe_deadsecs = 60 * 3;
  20module_param(aoe_deadsecs, int, 0644);
  21MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
  22
  23static int aoe_maxout = 16;
  24module_param(aoe_maxout, int, 0644);
  25MODULE_PARM_DESC(aoe_maxout,
  26	"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
  27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  28static struct sk_buff *
  29new_skb(ulong len)
  30{
  31	struct sk_buff *skb;
  32
  33	skb = alloc_skb(len, GFP_ATOMIC);
  34	if (skb) {
 
  35		skb_reset_mac_header(skb);
  36		skb_reset_network_header(skb);
  37		skb->protocol = __constant_htons(ETH_P_AOE);
 
  38	}
  39	return skb;
  40}
  41
  42static struct frame *
  43getframe(struct aoetgt *t, int tag)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  44{
  45	struct frame *f, *e;
 
 
  46
  47	f = t->frames;
  48	e = f + t->nframes;
  49	for (; f<e; f++)
  50		if (f->tag == tag)
 
 
  51			return f;
 
 
  52	return NULL;
  53}
  54
  55/*
  56 * Leave the top bit clear so we have tagspace for userland.
  57 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
  58 * This driver reserves tag -1 to mean "unused frame."
  59 */
  60static int
  61newtag(struct aoetgt *t)
  62{
  63	register ulong n;
  64
  65	n = jiffies & 0xffff;
  66	return n |= (++t->lasttag & 0x7fff) << 16;
  67}
  68
  69static int
  70aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
  71{
  72	u32 host_tag = newtag(t);
  73
  74	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
  75	memcpy(h->dst, t->addr, sizeof h->dst);
  76	h->type = __constant_cpu_to_be16(ETH_P_AOE);
  77	h->verfl = AOE_HVER;
  78	h->major = cpu_to_be16(d->aoemajor);
  79	h->minor = d->aoeminor;
  80	h->cmd = AOECMD_ATA;
  81	h->tag = cpu_to_be32(host_tag);
  82
  83	return host_tag;
  84}
  85
  86static inline void
  87put_lba(struct aoe_atahdr *ah, sector_t lba)
  88{
  89	ah->lba0 = lba;
  90	ah->lba1 = lba >>= 8;
  91	ah->lba2 = lba >>= 8;
  92	ah->lba3 = lba >>= 8;
  93	ah->lba4 = lba >>= 8;
  94	ah->lba5 = lba >>= 8;
  95}
  96
  97static void
  98ifrotate(struct aoetgt *t)
  99{
 100	t->ifp++;
 101	if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
 102		t->ifp = t->ifs;
 103	if (t->ifp->nd == NULL) {
 104		printk(KERN_INFO "aoe: no interface to rotate to\n");
 105		BUG();
 106	}
 
 
 107}
 108
 109static void
 110skb_pool_put(struct aoedev *d, struct sk_buff *skb)
 111{
 112	__skb_queue_tail(&d->skbpool, skb);
 113}
 114
 115static struct sk_buff *
 116skb_pool_get(struct aoedev *d)
 117{
 118	struct sk_buff *skb = skb_peek(&d->skbpool);
 119
 120	if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
 121		__skb_unlink(skb, &d->skbpool);
 122		return skb;
 123	}
 124	if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX &&
 125	    (skb = new_skb(ETH_ZLEN)))
 126		return skb;
 127
 128	return NULL;
 129}
 130
 131/* freeframe is where we do our load balancing so it's a little hairy. */
 
 
 
 
 
 
 
 
 
 
 
 
 132static struct frame *
 133freeframe(struct aoedev *d)
 134{
 135	struct frame *f, *e, *rf;
 136	struct aoetgt **t;
 137	struct sk_buff *skb;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 138
 139	if (d->targets[0] == NULL) {	/* shouldn't happen, but I'm paranoid */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 140		printk(KERN_ERR "aoe: NULL TARGETS!\n");
 141		return NULL;
 142	}
 143	t = d->tgt;
 144	t++;
 145	if (t >= &d->targets[NTARGETS] || !*t)
 146		t = d->targets;
 147	for (;;) {
 148		if ((*t)->nout < (*t)->maxout
 149		&& t != d->htgt
 150		&& (*t)->ifp->nd) {
 151			rf = NULL;
 152			f = (*t)->frames;
 153			e = f + (*t)->nframes;
 154			for (; f < e; f++) {
 155				if (f->tag != FREETAG)
 156					continue;
 157				skb = f->skb;
 158				if (!skb
 159				&& !(f->skb = skb = new_skb(ETH_ZLEN)))
 160					continue;
 161				if (atomic_read(&skb_shinfo(skb)->dataref)
 162					!= 1) {
 163					if (!rf)
 164						rf = f;
 165					continue;
 166				}
 167gotone:				skb_shinfo(skb)->nr_frags = skb->data_len = 0;
 168				skb_trim(skb, 0);
 169				d->tgt = t;
 170				ifrotate(*t);
 171				return f;
 172			}
 173			/* Work can be done, but the network layer is
 174			   holding our precious packets.  Try to grab
 175			   one from the pool. */
 176			f = rf;
 177			if (f == NULL) {	/* more paranoia */
 178				printk(KERN_ERR
 179					"aoe: freeframe: %s.\n",
 180					"unexpected null rf");
 181				d->flags |= DEVFL_KICKME;
 182				return NULL;
 183			}
 184			skb = skb_pool_get(d);
 185			if (skb) {
 186				skb_pool_put(d, f->skb);
 187				f->skb = skb;
 188				goto gotone;
 189			}
 190			(*t)->dataref++;
 191			if ((*t)->nout == 0)
 192				d->flags |= DEVFL_KICKME;
 193		}
 194		if (t == d->tgt)	/* we've looped and found nada */
 195			break;
 196		t++;
 197		if (t >= &d->targets[NTARGETS] || !*t)
 198			t = d->targets;
 
 
 
 
 
 199	}
 200	return NULL;
 201}
 202
 203static int
 204aoecmd_ata_rw(struct aoedev *d)
 205{
 206	struct frame *f;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 207	struct aoe_hdr *h;
 208	struct aoe_atahdr *ah;
 209	struct buf *buf;
 210	struct bio_vec *bv;
 211	struct aoetgt *t;
 212	struct sk_buff *skb;
 213	ulong bcnt;
 214	char writebit, extbit;
 215
 216	writebit = 0x10;
 217	extbit = 0x4;
 218
 219	f = freeframe(d);
 220	if (f == NULL)
 221		return 0;
 222	t = *d->tgt;
 223	buf = d->inprocess;
 224	bv = buf->bv;
 225	bcnt = t->ifp->maxbcnt;
 226	if (bcnt == 0)
 227		bcnt = DEFAULTBCNT;
 228	if (bcnt > buf->bv_resid)
 229		bcnt = buf->bv_resid;
 230	/* initialize the headers & frame */
 231	skb = f->skb;
 232	h = (struct aoe_hdr *) skb_mac_header(skb);
 233	ah = (struct aoe_atahdr *) (h+1);
 234	skb_put(skb, sizeof *h + sizeof *ah);
 235	memset(h, 0, skb->len);
 236	f->tag = aoehdr_atainit(d, t, h);
 
 
 
 
 
 
 237	t->nout++;
 238	f->waited = 0;
 239	f->buf = buf;
 240	f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
 241	f->bcnt = bcnt;
 242	f->lba = buf->sector;
 243
 244	/* set up ata header */
 245	ah->scnt = bcnt >> 9;
 246	put_lba(ah, buf->sector);
 247	if (d->flags & DEVFL_EXT) {
 248		ah->aflags |= AOEAFL_EXT;
 249	} else {
 250		extbit = 0;
 251		ah->lba3 &= 0x0f;
 252		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
 253	}
 254	if (bio_data_dir(buf->bio) == WRITE) {
 255		skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
 256		ah->aflags |= AOEAFL_WRITE;
 257		skb->len += bcnt;
 258		skb->data_len = bcnt;
 
 259		t->wpkts++;
 260	} else {
 261		t->rpkts++;
 262		writebit = 0;
 263	}
 264
 265	ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 266
 267	/* mark all tracking fields and load out */
 268	buf->nframesout += 1;
 269	buf->bv_off += bcnt;
 270	buf->bv_resid -= bcnt;
 271	buf->resid -= bcnt;
 272	buf->sector += bcnt >> 9;
 273	if (buf->resid == 0) {
 274		d->inprocess = NULL;
 275	} else if (buf->bv_resid == 0) {
 276		buf->bv = ++bv;
 277		buf->bv_resid = bv->bv_len;
 278		WARN_ON(buf->bv_resid == 0);
 279		buf->bv_off = bv->bv_offset;
 280	}
 281
 282	skb->dev = t->ifp->nd;
 283	skb = skb_clone(skb, GFP_ATOMIC);
 284	if (skb)
 285		__skb_queue_tail(&d->sendq, skb);
 
 
 
 
 
 286	return 1;
 287}
 288
 289/* some callers cannot sleep, and they can call this function,
 290 * transmitting the packets later, when interrupts are on
 291 */
 292static void
 293aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue)
 294{
 295	struct aoe_hdr *h;
 296	struct aoe_cfghdr *ch;
 297	struct sk_buff *skb;
 298	struct net_device *ifp;
 299
 300	rcu_read_lock();
 301	for_each_netdev_rcu(&init_net, ifp) {
 302		dev_hold(ifp);
 303		if (!is_aoe_netif(ifp))
 304			goto cont;
 305
 306		skb = new_skb(sizeof *h + sizeof *ch);
 307		if (skb == NULL) {
 308			printk(KERN_INFO "aoe: skb alloc failure\n");
 309			goto cont;
 310		}
 311		skb_put(skb, sizeof *h + sizeof *ch);
 312		skb->dev = ifp;
 313		__skb_queue_tail(queue, skb);
 314		h = (struct aoe_hdr *) skb_mac_header(skb);
 315		memset(h, 0, sizeof *h + sizeof *ch);
 316
 317		memset(h->dst, 0xff, sizeof h->dst);
 318		memcpy(h->src, ifp->dev_addr, sizeof h->src);
 319		h->type = __constant_cpu_to_be16(ETH_P_AOE);
 320		h->verfl = AOE_HVER;
 321		h->major = cpu_to_be16(aoemajor);
 322		h->minor = aoeminor;
 323		h->cmd = AOECMD_CFG;
 324
 325cont:
 326		dev_put(ifp);
 327	}
 328	rcu_read_unlock();
 329}
 330
 331static void
 332resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
 333{
 334	struct sk_buff *skb;
 
 335	struct aoe_hdr *h;
 336	struct aoe_atahdr *ah;
 337	char buf[128];
 338	u32 n;
 339
 340	ifrotate(t);
 341	n = newtag(t);
 342	skb = f->skb;
 
 
 
 
 
 
 343	h = (struct aoe_hdr *) skb_mac_header(skb);
 344	ah = (struct aoe_atahdr *) (h+1);
 345
 346	snprintf(buf, sizeof buf,
 347		"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
 348		"retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
 349		h->src, h->dst, t->nout);
 350	aoechr_error(buf);
 
 
 
 351
 352	f->tag = n;
 
 353	h->tag = cpu_to_be32(n);
 354	memcpy(h->dst, t->addr, sizeof h->dst);
 355	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
 356
 357	switch (ah->cmdstat) {
 358	default:
 359		break;
 360	case ATA_CMD_PIO_READ:
 361	case ATA_CMD_PIO_READ_EXT:
 362	case ATA_CMD_PIO_WRITE:
 363	case ATA_CMD_PIO_WRITE_EXT:
 364		put_lba(ah, f->lba);
 365
 366		n = f->bcnt;
 367		if (n > DEFAULTBCNT)
 368			n = DEFAULTBCNT;
 369		ah->scnt = n >> 9;
 370		if (ah->aflags & AOEAFL_WRITE) {
 371			skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
 372				offset_in_page(f->bufaddr), n);
 373			skb->len = sizeof *h + sizeof *ah + n;
 374			skb->data_len = n;
 375		}
 376	}
 377	skb->dev = t->ifp->nd;
 378	skb = skb_clone(skb, GFP_ATOMIC);
 379	if (skb == NULL)
 380		return;
 381	__skb_queue_tail(&d->sendq, skb);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 382}
 383
 384static int
 385tsince(int tag)
 386{
 387	int n;
 388
 389	n = jiffies & 0xffff;
 390	n -= tag & 0xffff;
 391	if (n < 0)
 392		n += 1<<16;
 393	return n;
 394}
 395
 396static struct aoeif *
 397getif(struct aoetgt *t, struct net_device *nd)
 398{
 399	struct aoeif *p, *e;
 400
 401	p = t->ifs;
 402	e = p + NAOEIFS;
 403	for (; p < e; p++)
 404		if (p->nd == nd)
 405			return p;
 406	return NULL;
 407}
 408
 409static struct aoeif *
 410addif(struct aoetgt *t, struct net_device *nd)
 411{
 412	struct aoeif *p;
 413
 414	p = getif(t, NULL);
 415	if (!p)
 416		return NULL;
 417	p->nd = nd;
 418	p->maxbcnt = DEFAULTBCNT;
 419	p->lost = 0;
 420	p->lostjumbo = 0;
 421	return p;
 422}
 423
 424static void
 425ejectif(struct aoetgt *t, struct aoeif *ifp)
 426{
 427	struct aoeif *e;
 
 428	ulong n;
 429
 
 430	e = t->ifs + NAOEIFS - 1;
 431	n = (e - ifp) * sizeof *ifp;
 432	memmove(ifp, ifp+1, n);
 433	e->nd = NULL;
 
 434}
 435
 436static int
 437sthtith(struct aoedev *d)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 438{
 439	struct frame *f, *e, *nf;
 
 440	struct sk_buff *skb;
 441	struct aoetgt *ht = *d->htgt;
 
 
 442
 443	f = ht->frames;
 444	e = f + ht->nframes;
 445	for (; f < e; f++) {
 446		if (f->tag == FREETAG)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 447			continue;
 448		nf = freeframe(d);
 449		if (!nf)
 450			return 0;
 451		skb = nf->skb;
 452		*nf = *f;
 453		f->skb = skb;
 454		f->tag = FREETAG;
 455		nf->waited = 0;
 456		ht->nout--;
 457		(*d->tgt)->nout++;
 458		resend(d, *d->tgt, nf);
 459	}
 460	/* he's clean, he's useless.  take away his interfaces */
 461	memset(ht->ifs, 0, sizeof ht->ifs);
 462	d->htgt = NULL;
 463	return 1;
 464}
 465
 466static inline unsigned char
 467ata_scnt(unsigned char *packet) {
 468	struct aoe_hdr *h;
 469	struct aoe_atahdr *ah;
 
 
 
 470
 471	h = (struct aoe_hdr *) packet;
 472	ah = (struct aoe_atahdr *) (h+1);
 473	return ah->scnt;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 474}
 475
 476static void
 477rexmit_timer(ulong vp)
 478{
 479	struct sk_buff_head queue;
 480	struct aoedev *d;
 481	struct aoetgt *t, **tt, **te;
 482	struct aoeif *ifp;
 483	struct frame *f, *e;
 
 
 484	register long timeout;
 485	ulong flags, n;
 
 
 
 486
 487	d = (struct aoedev *) vp;
 488
 489	/* timeout is always ~150% of the moving average */
 490	timeout = d->rttavg;
 491	timeout += timeout >> 1;
 492
 493	spin_lock_irqsave(&d->lock, flags);
 494
 
 
 
 
 
 495	if (d->flags & DEVFL_TKILL) {
 496		spin_unlock_irqrestore(&d->lock, flags);
 497		return;
 498	}
 499	tt = d->targets;
 500	te = tt + NTARGETS;
 501	for (; tt < te && *tt; tt++) {
 502		t = *tt;
 503		f = t->frames;
 504		e = f + t->nframes;
 505		for (; f < e; f++) {
 506			if (f->tag == FREETAG
 507			|| tsince(f->tag) < timeout)
 508				continue;
 509			n = f->waited += timeout;
 510			n /= HZ;
 511			if (n > aoe_deadsecs) {
 512				/* waited too long.  device failure. */
 513				aoedev_downdev(d);
 514				break;
 515			}
 516
 517			if (n > HELPWAIT /* see if another target can help */
 518			&& (tt != d->targets || d->targets[1]))
 519				d->htgt = tt;
 520
 521			if (t->nout == t->maxout) {
 522				if (t->maxout > 1)
 523					t->maxout--;
 524				t->lastwadj = jiffies;
 525			}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 526
 
 
 
 527			ifp = getif(t, f->skb->dev);
 528			if (ifp && ++ifp->lost > (t->nframes << 1)
 529			&& (ifp != t->ifs || t->ifs[1].nd)) {
 530				ejectif(t, ifp);
 531				ifp = NULL;
 532			}
 533
 534			if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512
 535			&& ifp && ++ifp->lostjumbo > (t->nframes << 1)
 536			&& ifp->maxbcnt != DEFAULTBCNT) {
 537				printk(KERN_INFO
 538					"aoe: e%ld.%d: "
 539					"too many lost jumbo on "
 540					"%s:%pm - "
 541					"falling back to %d frames.\n",
 542					d->aoemajor, d->aoeminor,
 543					ifp->nd->name, t->addr,
 544					DEFAULTBCNT);
 545				ifp->maxbcnt = 0;
 546			}
 547			resend(d, t, f);
 548		}
 549
 550		/* window check */
 551		if (t->nout == t->maxout
 552		&& t->maxout < t->nframes
 553		&& (jiffies - t->lastwadj)/HZ > 10) {
 554			t->maxout++;
 555			t->lastwadj = jiffies;
 556		}
 
 
 557	}
 
 558
 559	if (!skb_queue_empty(&d->sendq)) {
 560		n = d->rttavg <<= 1;
 561		if (n > MAXTIMER)
 562			d->rttavg = MAXTIMER;
 563	}
 564
 565	if (d->flags & DEVFL_KICKME || d->htgt) {
 566		d->flags &= ~DEVFL_KICKME;
 567		aoecmd_work(d);
 568	}
 569
 570	__skb_queue_head_init(&queue);
 571	skb_queue_splice_init(&d->sendq, &queue);
 572
 573	d->timer.expires = jiffies + TIMERTICK;
 574	add_timer(&d->timer);
 575
 576	spin_unlock_irqrestore(&d->lock, flags);
 
 577
 578	aoenet_xmit(&queue);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 579}
 580
 581/* enters with d->lock held */
 582void
 583aoecmd_work(struct aoedev *d)
 584{
 585	struct buf *buf;
 586loop:
 587	if (d->htgt && !sthtith(d))
 588		return;
 589	if (d->inprocess == NULL) {
 590		if (list_empty(&d->bufq))
 591			return;
 592		buf = container_of(d->bufq.next, struct buf, bufs);
 593		list_del(d->bufq.next);
 594		d->inprocess = buf;
 595	}
 596	if (aoecmd_ata_rw(d))
 597		goto loop;
 598}
 599
 600/* this function performs work that has been deferred until sleeping is OK
 601 */
 602void
 603aoecmd_sleepwork(struct work_struct *work)
 604{
 605	struct aoedev *d = container_of(work, struct aoedev, work);
 
 
 606
 607	if (d->flags & DEVFL_GDALLOC)
 608		aoeblk_gdalloc(d);
 609
 610	if (d->flags & DEVFL_NEWSIZE) {
 611		struct block_device *bd;
 612		unsigned long flags;
 613		u64 ssize;
 614
 615		ssize = get_capacity(d->gd);
 616		bd = bdget_disk(d->gd, 0);
 617
 618		if (bd) {
 619			mutex_lock(&bd->bd_inode->i_mutex);
 620			i_size_write(bd->bd_inode, (loff_t)ssize<<9);
 621			mutex_unlock(&bd->bd_inode->i_mutex);
 622			bdput(bd);
 623		}
 624		spin_lock_irqsave(&d->lock, flags);
 625		d->flags |= DEVFL_UP;
 626		d->flags &= ~DEVFL_NEWSIZE;
 627		spin_unlock_irqrestore(&d->lock, flags);
 
 
 
 
 
 
 
 
 
 
 
 628	}
 629}
 630
 631static void
 632ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
 633{
 634	u64 ssize;
 635	u16 n;
 636
 637	/* word 83: command set supported */
 638	n = get_unaligned_le16(&id[83 << 1]);
 639
 640	/* word 86: command set/feature enabled */
 641	n |= get_unaligned_le16(&id[86 << 1]);
 642
 643	if (n & (1<<10)) {	/* bit 10: LBA 48 */
 644		d->flags |= DEVFL_EXT;
 645
 646		/* word 100: number lba48 sectors */
 647		ssize = get_unaligned_le64(&id[100 << 1]);
 648
 649		/* set as in ide-disk.c:init_idedisk_capacity */
 650		d->geo.cylinders = ssize;
 651		d->geo.cylinders /= (255 * 63);
 652		d->geo.heads = 255;
 653		d->geo.sectors = 63;
 654	} else {
 655		d->flags &= ~DEVFL_EXT;
 656
 657		/* number lba28 sectors */
 658		ssize = get_unaligned_le32(&id[60 << 1]);
 659
 660		/* NOTE: obsolete in ATA 6 */
 661		d->geo.cylinders = get_unaligned_le16(&id[54 << 1]);
 662		d->geo.heads = get_unaligned_le16(&id[55 << 1]);
 663		d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
 664	}
 665
 
 
 
 
 
 666	if (d->ssize != ssize)
 667		printk(KERN_INFO
 668			"aoe: %pm e%ld.%d v%04x has %llu sectors\n",
 669			t->addr,
 670			d->aoemajor, d->aoeminor,
 671			d->fw_ver, (long long)ssize);
 672	d->ssize = ssize;
 673	d->geo.start = 0;
 674	if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
 675		return;
 676	if (d->gd != NULL) {
 677		set_capacity(d->gd, ssize);
 678		d->flags |= DEVFL_NEWSIZE;
 679	} else
 680		d->flags |= DEVFL_GDALLOC;
 681	schedule_work(&d->work);
 682}
 683
 684static void
 685calc_rttavg(struct aoedev *d, int rtt)
 686{
 687	register long n;
 688
 689	n = rtt;
 690	if (n < 0) {
 691		n = -rtt;
 692		if (n < MINTIMER)
 693			n = MINTIMER;
 694		else if (n > MAXTIMER)
 695			n = MAXTIMER;
 696		d->mintimer += (n - d->mintimer) >> 1;
 697	} else if (n < d->mintimer)
 698		n = d->mintimer;
 699	else if (n > MAXTIMER)
 700		n = MAXTIMER;
 701
 702	/* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
 703	n -= d->rttavg;
 704	d->rttavg += n >> 2;
 
 
 705}
 706
 707static struct aoetgt *
 708gettgt(struct aoedev *d, char *addr)
 709{
 710	struct aoetgt **t, **e;
 711
 712	t = d->targets;
 713	e = t + NTARGETS;
 714	for (; t < e && *t; t++)
 715		if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
 716			return *t;
 717	return NULL;
 718}
 719
 720static inline void
 721diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
 722{
 723	unsigned long n_sect = bio->bi_size >> 9;
 724	const int rw = bio_data_dir(bio);
 725	struct hd_struct *part;
 726	int cpu;
 727
 728	cpu = part_stat_lock();
 729	part = disk_map_sector_rcu(disk, sector);
 730
 731	part_stat_inc(cpu, part, ios[rw]);
 732	part_stat_add(cpu, part, ticks[rw], duration);
 733	part_stat_add(cpu, part, sectors[rw], n_sect);
 734	part_stat_add(cpu, part, io_ticks, duration);
 735
 736	part_stat_unlock();
 
 
 
 
 
 737}
 738
 739void
 740aoecmd_ata_rsp(struct sk_buff *skb)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 741{
 742	struct sk_buff_head queue;
 743	struct aoedev *d;
 744	struct aoe_hdr *hin, *hout;
 745	struct aoe_atahdr *ahin, *ahout;
 746	struct frame *f;
 747	struct buf *buf;
 
 748	struct aoetgt *t;
 749	struct aoeif *ifp;
 750	register long n;
 751	ulong flags;
 752	char ebuf[128];
 753	u16 aoemajor;
 754
 755	hin = (struct aoe_hdr *) skb_mac_header(skb);
 756	aoemajor = get_unaligned_be16(&hin->major);
 757	d = aoedev_by_aoeaddr(aoemajor, hin->minor);
 758	if (d == NULL) {
 759		snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
 760			"for unknown device %d.%d\n",
 761			 aoemajor, hin->minor);
 762		aoechr_error(ebuf);
 763		return;
 764	}
 765
 766	spin_lock_irqsave(&d->lock, flags);
 767
 768	n = get_unaligned_be32(&hin->tag);
 769	t = gettgt(d, hin->src);
 770	if (t == NULL) {
 771		printk(KERN_INFO "aoe: can't find target e%ld.%d:%pm\n",
 772			d->aoemajor, d->aoeminor, hin->src);
 773		spin_unlock_irqrestore(&d->lock, flags);
 774		return;
 775	}
 776	f = getframe(t, n);
 777	if (f == NULL) {
 778		calc_rttavg(d, -tsince(n));
 779		spin_unlock_irqrestore(&d->lock, flags);
 780		snprintf(ebuf, sizeof ebuf,
 781			"%15s e%d.%d    tag=%08x@%08lx\n",
 782			"unexpected rsp",
 783			get_unaligned_be16(&hin->major),
 784			hin->minor,
 785			get_unaligned_be32(&hin->tag),
 786			jiffies);
 787		aoechr_error(ebuf);
 788		return;
 789	}
 790
 791	calc_rttavg(d, tsince(f->tag));
 792
 793	ahin = (struct aoe_atahdr *) (hin+1);
 794	hout = (struct aoe_hdr *) skb_mac_header(f->skb);
 795	ahout = (struct aoe_atahdr *) (hout+1);
 796	buf = f->buf;
 797
 
 
 
 
 798	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
 799		printk(KERN_ERR
 800			"aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
 801			ahout->cmdstat, ahin->cmdstat,
 802			d->aoemajor, d->aoeminor);
 803		if (buf)
 804			buf->flags |= BUFFL_FAIL;
 805	} else {
 806		if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */
 807			d->htgt = NULL;
 808		n = ahout->scnt << 9;
 809		switch (ahout->cmdstat) {
 810		case ATA_CMD_PIO_READ:
 811		case ATA_CMD_PIO_READ_EXT:
 812			if (skb->len - sizeof *hin - sizeof *ahin < n) {
 813				printk(KERN_ERR
 814					"aoe: %s.  skb->len=%d need=%ld\n",
 815					"runt data size in read", skb->len, n);
 816				/* fail frame f?  just returning will rexmit. */
 817				spin_unlock_irqrestore(&d->lock, flags);
 818				return;
 819			}
 820			memcpy(f->bufaddr, ahin+1, n);
 821		case ATA_CMD_PIO_WRITE:
 822		case ATA_CMD_PIO_WRITE_EXT:
 823			ifp = getif(t, skb->dev);
 824			if (ifp) {
 825				ifp->lost = 0;
 826				if (n > DEFAULTBCNT)
 827					ifp->lostjumbo = 0;
 828			}
 829			if (f->bcnt -= n) {
 830				f->lba += n >> 9;
 831				f->bufaddr += n;
 832				resend(d, t, f);
 833				goto xmit;
 834			}
 835			break;
 836		case ATA_CMD_ID_ATA:
 837			if (skb->len - sizeof *hin - sizeof *ahin < 512) {
 838				printk(KERN_INFO
 839					"aoe: runt data size in ataid.  skb->len=%d\n",
 840					skb->len);
 841				spin_unlock_irqrestore(&d->lock, flags);
 842				return;
 843			}
 844			ataid_complete(d, t, (char *) (ahin+1));
 845			break;
 846		default:
 847			printk(KERN_INFO
 848				"aoe: unrecognized ata command %2.2Xh for %d.%d\n",
 849				ahout->cmdstat,
 850				get_unaligned_be16(&hin->major),
 851				hin->minor);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 852		}
 853	}
 854
 855	if (buf && --buf->nframesout == 0 && buf->resid == 0) {
 856		diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
 857		if (buf->flags & BUFFL_FAIL)
 858			bio_endio(buf->bio, -EIO);
 859		else {
 860			bio_flush_dcache_pages(buf->bio);
 861			bio_endio(buf->bio, 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 862		}
 863		mempool_free(buf, d->bufpool);
 864	}
 
 865
 866	f->buf = NULL;
 867	f->tag = FREETAG;
 868	t->nout--;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 870	aoecmd_work(d);
 871xmit:
 872	__skb_queue_head_init(&queue);
 873	skb_queue_splice_init(&d->sendq, &queue);
 874
 875	spin_unlock_irqrestore(&d->lock, flags);
 876	aoenet_xmit(&queue);
 
 
 
 
 
 
 
 877}
 878
 879void
 880aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
 881{
 882	struct sk_buff_head queue;
 883
 884	__skb_queue_head_init(&queue);
 885	aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
 886	aoenet_xmit(&queue);
 887}
 888 
 889struct sk_buff *
 890aoecmd_ata_id(struct aoedev *d)
 891{
 892	struct aoe_hdr *h;
 893	struct aoe_atahdr *ah;
 894	struct frame *f;
 895	struct sk_buff *skb;
 896	struct aoetgt *t;
 897
 898	f = freeframe(d);
 899	if (f == NULL)
 900		return NULL;
 901
 902	t = *d->tgt;
 903
 904	/* initialize the headers & frame */
 905	skb = f->skb;
 906	h = (struct aoe_hdr *) skb_mac_header(skb);
 907	ah = (struct aoe_atahdr *) (h+1);
 908	skb_put(skb, sizeof *h + sizeof *ah);
 909	memset(h, 0, skb->len);
 910	f->tag = aoehdr_atainit(d, t, h);
 
 911	t->nout++;
 912	f->waited = 0;
 
 913
 914	/* set up ata header */
 915	ah->scnt = 1;
 916	ah->cmdstat = ATA_CMD_ID_ATA;
 917	ah->lba3 = 0xa0;
 918
 919	skb->dev = t->ifp->nd;
 920
 921	d->rttavg = MAXTIMER;
 
 922	d->timer.function = rexmit_timer;
 923
 924	return skb_clone(skb, GFP_ATOMIC);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 925}
 926 
 927static struct aoetgt *
 928addtgt(struct aoedev *d, char *addr, ulong nframes)
 929{
 930	struct aoetgt *t, **tt, **te;
 931	struct frame *f, *e;
 932
 933	tt = d->targets;
 934	te = tt + NTARGETS;
 935	for (; tt < te && *tt; tt++)
 936		;
 937
 938	if (tt == te) {
 939		printk(KERN_INFO
 940			"aoe: device addtgt failure; too many targets\n");
 941		return NULL;
 942	}
 943	t = kcalloc(1, sizeof *t, GFP_ATOMIC);
 944	f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
 945	if (!t || !f) {
 946		kfree(f);
 947		kfree(t);
 948		printk(KERN_INFO "aoe: cannot allocate memory to add target\n");
 949		return NULL;
 950	}
 951
 952	t->nframes = nframes;
 953	t->frames = f;
 954	e = f + nframes;
 955	for (; f < e; f++)
 956		f->tag = FREETAG;
 957	memcpy(t->addr, addr, sizeof t->addr);
 958	t->ifp = t->ifs;
 959	t->maxout = t->nframes;
 
 
 960	return *tt = t;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 961}
 962
 963void
 964aoecmd_cfg_rsp(struct sk_buff *skb)
 965{
 966	struct aoedev *d;
 967	struct aoe_hdr *h;
 968	struct aoe_cfghdr *ch;
 969	struct aoetgt *t;
 970	struct aoeif *ifp;
 971	ulong flags, sysminor, aoemajor;
 972	struct sk_buff *sl;
 
 973	u16 n;
 974
 
 975	h = (struct aoe_hdr *) skb_mac_header(skb);
 976	ch = (struct aoe_cfghdr *) (h+1);
 977
 978	/*
 979	 * Enough people have their dip switches set backwards to
 980	 * warrant a loud message for this special case.
 981	 */
 982	aoemajor = get_unaligned_be16(&h->major);
 983	if (aoemajor == 0xfff) {
 984		printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
 985			"Check shelf dip switches.\n");
 986		return;
 987	}
 988
 989	sysminor = SYSMINOR(aoemajor, h->minor);
 990	if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
 991		printk(KERN_INFO "aoe: e%ld.%d: minor number too large\n",
 
 
 
 992			aoemajor, (int) h->minor);
 993		return;
 994	}
 995
 996	n = be16_to_cpu(ch->bufcnt);
 997	if (n > aoe_maxout)	/* keep it reasonable */
 998		n = aoe_maxout;
 999
1000	d = aoedev_by_sysminor_m(sysminor);
1001	if (d == NULL) {
1002		printk(KERN_INFO "aoe: device sysminor_m failure\n");
1003		return;
1004	}
1005
1006	spin_lock_irqsave(&d->lock, flags);
1007
1008	t = gettgt(d, h->src);
1009	if (!t) {
 
 
 
 
1010		t = addtgt(d, h->src, n);
1011		if (!t) {
1012			spin_unlock_irqrestore(&d->lock, flags);
1013			return;
1014		}
1015	}
1016	ifp = getif(t, skb->dev);
1017	if (!ifp) {
1018		ifp = addif(t, skb->dev);
1019		if (!ifp) {
1020			printk(KERN_INFO
1021				"aoe: device addif failure; "
1022				"too many interfaces?\n");
1023			spin_unlock_irqrestore(&d->lock, flags);
1024			return;
1025		}
1026	}
1027	if (ifp->maxbcnt) {
1028		n = ifp->nd->mtu;
1029		n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
1030		n /= 512;
1031		if (n > ch->scnt)
1032			n = ch->scnt;
1033		n = n ? n * 512 : DEFAULTBCNT;
1034		if (n != ifp->maxbcnt) {
1035			printk(KERN_INFO
1036				"aoe: e%ld.%d: setting %d%s%s:%pm\n",
1037				d->aoemajor, d->aoeminor, n,
1038				" byte data frames on ", ifp->nd->name,
1039				t->addr);
1040			ifp->maxbcnt = n;
1041		}
1042	}
 
 
 
 
 
 
 
1043
1044	/* don't change users' perspective */
1045	if (d->nopen) {
1046		spin_unlock_irqrestore(&d->lock, flags);
1047		return;
1048	}
1049	d->fw_ver = be16_to_cpu(ch->fwver);
1050
1051	sl = aoecmd_ata_id(d);
1052
1053	spin_unlock_irqrestore(&d->lock, flags);
1054
1055	if (sl) {
1056		struct sk_buff_head queue;
1057		__skb_queue_head_init(&queue);
1058		__skb_queue_tail(&queue, sl);
1059		aoenet_xmit(&queue);
1060	}
1061}
1062
1063void
 
 
 
 
 
 
 
 
1064aoecmd_cleanslate(struct aoedev *d)
1065{
1066	struct aoetgt **t, **te;
1067	struct aoeif *p, *e;
1068
1069	d->mintimer = MINTIMER;
 
 
1070
1071	t = d->targets;
1072	te = t + NTARGETS;
1073	for (; t < te && *t; t++) {
1074		(*t)->maxout = (*t)->nframes;
1075		p = (*t)->ifs;
1076		e = p + NAOEIFS;
1077		for (; p < e; p++) {
1078			p->lostjumbo = 0;
1079			p->lost = 0;
1080			p->maxbcnt = DEFAULTBCNT;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1081		}
 
 
 
 
1082	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1083}
v5.4
   1/* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
   2/*
   3 * aoecmd.c
   4 * Filesystem request handling methods
   5 */
   6
   7#include <linux/ata.h>
   8#include <linux/slab.h>
   9#include <linux/hdreg.h>
  10#include <linux/blk-mq.h>
  11#include <linux/skbuff.h>
  12#include <linux/netdevice.h>
  13#include <linux/genhd.h>
  14#include <linux/moduleparam.h>
  15#include <linux/workqueue.h>
  16#include <linux/kthread.h>
  17#include <net/net_namespace.h>
  18#include <asm/unaligned.h>
  19#include <linux/uio.h>
  20#include "aoe.h"
  21
  22#define MAXIOC (8192)	/* default meant to avoid most soft lockups */
  23
  24static void ktcomplete(struct frame *, struct sk_buff *);
  25static int count_targets(struct aoedev *d, int *untainted);
  26
  27static struct buf *nextbuf(struct aoedev *);
  28
  29static int aoe_deadsecs = 60 * 3;
  30module_param(aoe_deadsecs, int, 0644);
  31MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
  32
  33static int aoe_maxout = 64;
  34module_param(aoe_maxout, int, 0644);
  35MODULE_PARM_DESC(aoe_maxout,
  36	"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
  37
  38/* The number of online cpus during module initialization gives us a
  39 * convenient heuristic cap on the parallelism used for ktio threads
  40 * doing I/O completion.  It is not important that the cap equal the
  41 * actual number of running CPUs at any given time, but because of CPU
  42 * hotplug, we take care to use ncpus instead of using
  43 * num_online_cpus() after module initialization.
  44 */
  45static int ncpus;
  46
  47/* mutex lock used for synchronization while thread spawning */
  48static DEFINE_MUTEX(ktio_spawn_lock);
  49
  50static wait_queue_head_t *ktiowq;
  51static struct ktstate *kts;
  52
  53/* io completion queue */
  54struct iocq_ktio {
  55	struct list_head head;
  56	spinlock_t lock;
  57};
  58static struct iocq_ktio *iocq;
  59
  60static struct page *empty_page;
  61
  62static struct sk_buff *
  63new_skb(ulong len)
  64{
  65	struct sk_buff *skb;
  66
  67	skb = alloc_skb(len + MAX_HEADER, GFP_ATOMIC);
  68	if (skb) {
  69		skb_reserve(skb, MAX_HEADER);
  70		skb_reset_mac_header(skb);
  71		skb_reset_network_header(skb);
  72		skb->protocol = __constant_htons(ETH_P_AOE);
  73		skb_checksum_none_assert(skb);
  74	}
  75	return skb;
  76}
  77
  78static struct frame *
  79getframe_deferred(struct aoedev *d, u32 tag)
  80{
  81	struct list_head *head, *pos, *nx;
  82	struct frame *f;
  83
  84	head = &d->rexmitq;
  85	list_for_each_safe(pos, nx, head) {
  86		f = list_entry(pos, struct frame, head);
  87		if (f->tag == tag) {
  88			list_del(pos);
  89			return f;
  90		}
  91	}
  92	return NULL;
  93}
  94
  95static struct frame *
  96getframe(struct aoedev *d, u32 tag)
  97{
  98	struct frame *f;
  99	struct list_head *head, *pos, *nx;
 100	u32 n;
 101
 102	n = tag % NFACTIVE;
 103	head = &d->factive[n];
 104	list_for_each_safe(pos, nx, head) {
 105		f = list_entry(pos, struct frame, head);
 106		if (f->tag == tag) {
 107			list_del(pos);
 108			return f;
 109		}
 110	}
 111	return NULL;
 112}
 113
 114/*
 115 * Leave the top bit clear so we have tagspace for userland.
 116 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
 117 * This driver reserves tag -1 to mean "unused frame."
 118 */
 119static int
 120newtag(struct aoedev *d)
 121{
 122	register ulong n;
 123
 124	n = jiffies & 0xffff;
 125	return n |= (++d->lasttag & 0x7fff) << 16;
 126}
 127
 128static u32
 129aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
 130{
 131	u32 host_tag = newtag(d);
 132
 133	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
 134	memcpy(h->dst, t->addr, sizeof h->dst);
 135	h->type = __constant_cpu_to_be16(ETH_P_AOE);
 136	h->verfl = AOE_HVER;
 137	h->major = cpu_to_be16(d->aoemajor);
 138	h->minor = d->aoeminor;
 139	h->cmd = AOECMD_ATA;
 140	h->tag = cpu_to_be32(host_tag);
 141
 142	return host_tag;
 143}
 144
 145static inline void
 146put_lba(struct aoe_atahdr *ah, sector_t lba)
 147{
 148	ah->lba0 = lba;
 149	ah->lba1 = lba >>= 8;
 150	ah->lba2 = lba >>= 8;
 151	ah->lba3 = lba >>= 8;
 152	ah->lba4 = lba >>= 8;
 153	ah->lba5 = lba >>= 8;
 154}
 155
 156static struct aoeif *
 157ifrotate(struct aoetgt *t)
 158{
 159	struct aoeif *ifp;
 160
 161	ifp = t->ifp;
 162	ifp++;
 163	if (ifp >= &t->ifs[NAOEIFS] || ifp->nd == NULL)
 164		ifp = t->ifs;
 165	if (ifp->nd == NULL)
 166		return NULL;
 167	return t->ifp = ifp;
 168}
 169
 170static void
 171skb_pool_put(struct aoedev *d, struct sk_buff *skb)
 172{
 173	__skb_queue_tail(&d->skbpool, skb);
 174}
 175
 176static struct sk_buff *
 177skb_pool_get(struct aoedev *d)
 178{
 179	struct sk_buff *skb = skb_peek(&d->skbpool);
 180
 181	if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
 182		__skb_unlink(skb, &d->skbpool);
 183		return skb;
 184	}
 185	if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX &&
 186	    (skb = new_skb(ETH_ZLEN)))
 187		return skb;
 188
 189	return NULL;
 190}
 191
 192void
 193aoe_freetframe(struct frame *f)
 194{
 195	struct aoetgt *t;
 196
 197	t = f->t;
 198	f->buf = NULL;
 199	memset(&f->iter, 0, sizeof(f->iter));
 200	f->r_skb = NULL;
 201	f->flags = 0;
 202	list_add(&f->head, &t->ffree);
 203}
 204
 205static struct frame *
 206newtframe(struct aoedev *d, struct aoetgt *t)
 207{
 208	struct frame *f;
 
 209	struct sk_buff *skb;
 210	struct list_head *pos;
 211
 212	if (list_empty(&t->ffree)) {
 213		if (t->falloc >= NSKBPOOLMAX*2)
 214			return NULL;
 215		f = kcalloc(1, sizeof(*f), GFP_ATOMIC);
 216		if (f == NULL)
 217			return NULL;
 218		t->falloc++;
 219		f->t = t;
 220	} else {
 221		pos = t->ffree.next;
 222		list_del(pos);
 223		f = list_entry(pos, struct frame, head);
 224	}
 225
 226	skb = f->skb;
 227	if (skb == NULL) {
 228		f->skb = skb = new_skb(ETH_ZLEN);
 229		if (!skb) {
 230bail:			aoe_freetframe(f);
 231			return NULL;
 232		}
 233	}
 234
 235	if (atomic_read(&skb_shinfo(skb)->dataref) != 1) {
 236		skb = skb_pool_get(d);
 237		if (skb == NULL)
 238			goto bail;
 239		skb_pool_put(d, f->skb);
 240		f->skb = skb;
 241	}
 242
 243	skb->truesize -= skb->data_len;
 244	skb_shinfo(skb)->nr_frags = skb->data_len = 0;
 245	skb_trim(skb, 0);
 246	return f;
 247}
 248
 249static struct frame *
 250newframe(struct aoedev *d)
 251{
 252	struct frame *f;
 253	struct aoetgt *t, **tt;
 254	int totout = 0;
 255	int use_tainted;
 256	int has_untainted;
 257
 258	if (!d->targets || !d->targets[0]) {
 259		printk(KERN_ERR "aoe: NULL TARGETS!\n");
 260		return NULL;
 261	}
 262	tt = d->tgt;	/* last used target */
 263	for (use_tainted = 0, has_untainted = 0;;) {
 264		tt++;
 265		if (tt >= &d->targets[d->ntargets] || !*tt)
 266			tt = d->targets;
 267		t = *tt;
 268		if (!t->taint) {
 269			has_untainted = 1;
 270			totout += t->nout;
 271		}
 272		if (t->nout < t->maxout
 273		&& (use_tainted || !t->taint)
 274		&& t->ifp->nd) {
 275			f = newtframe(d, t);
 276			if (f) {
 277				ifrotate(t);
 278				d->tgt = tt;
 
 
 
 
 
 
 
 
 
 
 
 279				return f;
 280			}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 281		}
 282		if (tt == d->tgt) {	/* we've looped and found nada */
 283			if (!use_tainted && !has_untainted)
 284				use_tainted = 1;
 285			else
 286				break;
 287		}
 288	}
 289	if (totout == 0) {
 290		d->kicked++;
 291		d->flags |= DEVFL_KICKME;
 292	}
 293	return NULL;
 294}
 295
 296static void
 297skb_fillup(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter)
 298{
 299	int frag = 0;
 300	struct bio_vec bv;
 301
 302	__bio_for_each_segment(bv, bio, iter, iter)
 303		skb_fill_page_desc(skb, frag++, bv.bv_page,
 304				   bv.bv_offset, bv.bv_len);
 305}
 306
 307static void
 308fhash(struct frame *f)
 309{
 310	struct aoedev *d = f->t->d;
 311	u32 n;
 312
 313	n = f->tag % NFACTIVE;
 314	list_add_tail(&f->head, &d->factive[n]);
 315}
 316
 317static void
 318ata_rw_frameinit(struct frame *f)
 319{
 320	struct aoetgt *t;
 321	struct aoe_hdr *h;
 322	struct aoe_atahdr *ah;
 
 
 
 323	struct sk_buff *skb;
 
 324	char writebit, extbit;
 325
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 326	skb = f->skb;
 327	h = (struct aoe_hdr *) skb_mac_header(skb);
 328	ah = (struct aoe_atahdr *) (h + 1);
 329	skb_put(skb, sizeof(*h) + sizeof(*ah));
 330	memset(h, 0, skb->len);
 331
 332	writebit = 0x10;
 333	extbit = 0x4;
 334
 335	t = f->t;
 336	f->tag = aoehdr_atainit(t->d, t, h);
 337	fhash(f);
 338	t->nout++;
 339	f->waited = 0;
 340	f->waited_total = 0;
 
 
 
 341
 342	/* set up ata header */
 343	ah->scnt = f->iter.bi_size >> 9;
 344	put_lba(ah, f->iter.bi_sector);
 345	if (t->d->flags & DEVFL_EXT) {
 346		ah->aflags |= AOEAFL_EXT;
 347	} else {
 348		extbit = 0;
 349		ah->lba3 &= 0x0f;
 350		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
 351	}
 352	if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
 353		skb_fillup(skb, f->buf->bio, f->iter);
 354		ah->aflags |= AOEAFL_WRITE;
 355		skb->len += f->iter.bi_size;
 356		skb->data_len = f->iter.bi_size;
 357		skb->truesize += f->iter.bi_size;
 358		t->wpkts++;
 359	} else {
 360		t->rpkts++;
 361		writebit = 0;
 362	}
 363
 364	ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
 365	skb->dev = t->ifp->nd;
 366}
 367
 368static int
 369aoecmd_ata_rw(struct aoedev *d)
 370{
 371	struct frame *f;
 372	struct buf *buf;
 373	struct sk_buff *skb;
 374	struct sk_buff_head queue;
 375
 376	buf = nextbuf(d);
 377	if (buf == NULL)
 378		return 0;
 379	f = newframe(d);
 380	if (f == NULL)
 381		return 0;
 382
 383	/* initialize the headers & frame */
 384	f->buf = buf;
 385	f->iter = buf->iter;
 386	f->iter.bi_size = min_t(unsigned long,
 387				d->maxbcnt ?: DEFAULTBCNT,
 388				f->iter.bi_size);
 389	bio_advance_iter(buf->bio, &buf->iter, f->iter.bi_size);
 390
 391	if (!buf->iter.bi_size)
 392		d->ip.buf = NULL;
 393
 394	/* mark all tracking fields and load out */
 395	buf->nframesout += 1;
 
 
 
 
 
 
 
 
 
 
 
 
 396
 397	ata_rw_frameinit(f);
 398
 399	skb = skb_clone(f->skb, GFP_ATOMIC);
 400	if (skb) {
 401		f->sent = ktime_get();
 402		__skb_queue_head_init(&queue);
 403		__skb_queue_tail(&queue, skb);
 404		aoenet_xmit(&queue);
 405	}
 406	return 1;
 407}
 408
 409/* some callers cannot sleep, and they can call this function,
 410 * transmitting the packets later, when interrupts are on
 411 */
 412static void
 413aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue)
 414{
 415	struct aoe_hdr *h;
 416	struct aoe_cfghdr *ch;
 417	struct sk_buff *skb;
 418	struct net_device *ifp;
 419
 420	rcu_read_lock();
 421	for_each_netdev_rcu(&init_net, ifp) {
 422		dev_hold(ifp);
 423		if (!is_aoe_netif(ifp))
 424			goto cont;
 425
 426		skb = new_skb(sizeof *h + sizeof *ch);
 427		if (skb == NULL) {
 428			printk(KERN_INFO "aoe: skb alloc failure\n");
 429			goto cont;
 430		}
 431		skb_put(skb, sizeof *h + sizeof *ch);
 432		skb->dev = ifp;
 433		__skb_queue_tail(queue, skb);
 434		h = (struct aoe_hdr *) skb_mac_header(skb);
 435		memset(h, 0, sizeof *h + sizeof *ch);
 436
 437		memset(h->dst, 0xff, sizeof h->dst);
 438		memcpy(h->src, ifp->dev_addr, sizeof h->src);
 439		h->type = __constant_cpu_to_be16(ETH_P_AOE);
 440		h->verfl = AOE_HVER;
 441		h->major = cpu_to_be16(aoemajor);
 442		h->minor = aoeminor;
 443		h->cmd = AOECMD_CFG;
 444
 445cont:
 446		dev_put(ifp);
 447	}
 448	rcu_read_unlock();
 449}
 450
 451static void
 452resend(struct aoedev *d, struct frame *f)
 453{
 454	struct sk_buff *skb;
 455	struct sk_buff_head queue;
 456	struct aoe_hdr *h;
 457	struct aoetgt *t;
 458	char buf[128];
 459	u32 n;
 460
 461	t = f->t;
 462	n = newtag(d);
 463	skb = f->skb;
 464	if (ifrotate(t) == NULL) {
 465		/* probably can't happen, but set it up to fail anyway */
 466		pr_info("aoe: resend: no interfaces to rotate to.\n");
 467		ktcomplete(f, NULL);
 468		return;
 469	}
 470	h = (struct aoe_hdr *) skb_mac_header(skb);
 
 471
 472	if (!(f->flags & FFL_PROBE)) {
 473		snprintf(buf, sizeof(buf),
 474			"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
 475			"retransmit", d->aoemajor, d->aoeminor,
 476			f->tag, jiffies, n,
 477			h->src, h->dst, t->nout);
 478		aoechr_error(buf);
 479	}
 480
 481	f->tag = n;
 482	fhash(f);
 483	h->tag = cpu_to_be32(n);
 484	memcpy(h->dst, t->addr, sizeof h->dst);
 485	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
 486
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 487	skb->dev = t->ifp->nd;
 488	skb = skb_clone(skb, GFP_ATOMIC);
 489	if (skb == NULL)
 490		return;
 491	f->sent = ktime_get();
 492	__skb_queue_head_init(&queue);
 493	__skb_queue_tail(&queue, skb);
 494	aoenet_xmit(&queue);
 495}
 496
 497static int
 498tsince_hr(struct frame *f)
 499{
 500	u64 delta = ktime_to_ns(ktime_sub(ktime_get(), f->sent));
 501
 502	/* delta is normally under 4.2 seconds, avoid 64-bit division */
 503	if (likely(delta <= UINT_MAX))
 504		return (u32)delta / NSEC_PER_USEC;
 505
 506	/* avoid overflow after 71 minutes */
 507	if (delta > ((u64)INT_MAX * NSEC_PER_USEC))
 508		return INT_MAX;
 509
 510	return div_u64(delta, NSEC_PER_USEC);
 511}
 512
 513static int
 514tsince(u32 tag)
 515{
 516	int n;
 517
 518	n = jiffies & 0xffff;
 519	n -= tag & 0xffff;
 520	if (n < 0)
 521		n += 1<<16;
 522	return jiffies_to_usecs(n + 1);
 523}
 524
 525static struct aoeif *
 526getif(struct aoetgt *t, struct net_device *nd)
 527{
 528	struct aoeif *p, *e;
 529
 530	p = t->ifs;
 531	e = p + NAOEIFS;
 532	for (; p < e; p++)
 533		if (p->nd == nd)
 534			return p;
 535	return NULL;
 536}
 537
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 538static void
 539ejectif(struct aoetgt *t, struct aoeif *ifp)
 540{
 541	struct aoeif *e;
 542	struct net_device *nd;
 543	ulong n;
 544
 545	nd = ifp->nd;
 546	e = t->ifs + NAOEIFS - 1;
 547	n = (e - ifp) * sizeof *ifp;
 548	memmove(ifp, ifp+1, n);
 549	e->nd = NULL;
 550	dev_put(nd);
 551}
 552
 553static struct frame *
 554reassign_frame(struct frame *f)
 555{
 556	struct frame *nf;
 557	struct sk_buff *skb;
 558
 559	nf = newframe(f->t->d);
 560	if (!nf)
 561		return NULL;
 562	if (nf->t == f->t) {
 563		aoe_freetframe(nf);
 564		return NULL;
 565	}
 566
 567	skb = nf->skb;
 568	nf->skb = f->skb;
 569	nf->buf = f->buf;
 570	nf->iter = f->iter;
 571	nf->waited = 0;
 572	nf->waited_total = f->waited_total;
 573	nf->sent = f->sent;
 574	f->skb = skb;
 575
 576	return nf;
 577}
 578
 579static void
 580probe(struct aoetgt *t)
 581{
 582	struct aoedev *d;
 583	struct frame *f;
 584	struct sk_buff *skb;
 585	struct sk_buff_head queue;
 586	size_t n, m;
 587	int frag;
 588
 589	d = t->d;
 590	f = newtframe(d, t);
 591	if (!f) {
 592		pr_err("%s %pm for e%ld.%d: %s\n",
 593			"aoe: cannot probe remote address",
 594			t->addr,
 595			(long) d->aoemajor, d->aoeminor,
 596			"no frame available");
 597		return;
 598	}
 599	f->flags |= FFL_PROBE;
 600	ifrotate(t);
 601	f->iter.bi_size = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
 602	ata_rw_frameinit(f);
 603	skb = f->skb;
 604	for (frag = 0, n = f->iter.bi_size; n > 0; ++frag, n -= m) {
 605		if (n < PAGE_SIZE)
 606			m = n;
 607		else
 608			m = PAGE_SIZE;
 609		skb_fill_page_desc(skb, frag, empty_page, 0, m);
 610	}
 611	skb->len += f->iter.bi_size;
 612	skb->data_len = f->iter.bi_size;
 613	skb->truesize += f->iter.bi_size;
 614
 615	skb = skb_clone(f->skb, GFP_ATOMIC);
 616	if (skb) {
 617		f->sent = ktime_get();
 618		__skb_queue_head_init(&queue);
 619		__skb_queue_tail(&queue, skb);
 620		aoenet_xmit(&queue);
 621	}
 622}
 623
 624static long
 625rto(struct aoedev *d)
 626{
 627	long t;
 628
 629	t = 2 * d->rttavg >> RTTSCALE;
 630	t += 8 * d->rttdev >> RTTDSCALE;
 631	if (t == 0)
 632		t = 1;
 633
 634	return t;
 635}
 636
 637static void
 638rexmit_deferred(struct aoedev *d)
 639{
 640	struct aoetgt *t;
 641	struct frame *f;
 642	struct frame *nf;
 643	struct list_head *pos, *nx, *head;
 644	int since;
 645	int untainted;
 646
 647	count_targets(d, &untainted);
 648
 649	head = &d->rexmitq;
 650	list_for_each_safe(pos, nx, head) {
 651		f = list_entry(pos, struct frame, head);
 652		t = f->t;
 653		if (t->taint) {
 654			if (!(f->flags & FFL_PROBE)) {
 655				nf = reassign_frame(f);
 656				if (nf) {
 657					if (t->nout_probes == 0
 658					&& untainted > 0) {
 659						probe(t);
 660						t->nout_probes++;
 661					}
 662					list_replace(&f->head, &nf->head);
 663					pos = &nf->head;
 664					aoe_freetframe(f);
 665					f = nf;
 666					t = f->t;
 667				}
 668			} else if (untainted < 1) {
 669				/* don't probe w/o other untainted aoetgts */
 670				goto stop_probe;
 671			} else if (tsince_hr(f) < t->taint * rto(d)) {
 672				/* reprobe slowly when taint is high */
 673				continue;
 674			}
 675		} else if (f->flags & FFL_PROBE) {
 676stop_probe:		/* don't probe untainted aoetgts */
 677			list_del(pos);
 678			aoe_freetframe(f);
 679			/* leaving d->kicked, because this is routine */
 680			f->t->d->flags |= DEVFL_KICKME;
 681			continue;
 682		}
 683		if (t->nout >= t->maxout)
 684			continue;
 685		list_del(pos);
 686		t->nout++;
 687		if (f->flags & FFL_PROBE)
 688			t->nout_probes++;
 689		since = tsince_hr(f);
 690		f->waited += since;
 691		f->waited_total += since;
 692		resend(d, f);
 693	}
 
 
 
 
 694}
 695
 696/* An aoetgt accumulates demerits quickly, and successful
 697 * probing redeems the aoetgt slowly.
 698 */
 699static void
 700scorn(struct aoetgt *t)
 701{
 702	int n;
 703
 704	n = t->taint++;
 705	t->taint += t->taint * 2;
 706	if (n > t->taint)
 707		t->taint = n;
 708	if (t->taint > MAX_TAINT)
 709		t->taint = MAX_TAINT;
 710}
 711
 712static int
 713count_targets(struct aoedev *d, int *untainted)
 714{
 715	int i, good;
 716
 717	for (i = good = 0; i < d->ntargets && d->targets[i]; ++i)
 718		if (d->targets[i]->taint == 0)
 719			good++;
 720
 721	if (untainted)
 722		*untainted = good;
 723	return i;
 724}
 725
 726static void
 727rexmit_timer(struct timer_list *timer)
 728{
 
 729	struct aoedev *d;
 730	struct aoetgt *t;
 731	struct aoeif *ifp;
 732	struct frame *f;
 733	struct list_head *head, *pos, *nx;
 734	LIST_HEAD(flist);
 735	register long timeout;
 736	ulong flags, n;
 737	int i;
 738	int utgts;	/* number of aoetgt descriptors (not slots) */
 739	int since;
 740
 741	d = from_timer(d, timer, timer);
 
 
 
 
 742
 743	spin_lock_irqsave(&d->lock, flags);
 744
 745	/* timeout based on observed timings and variations */
 746	timeout = rto(d);
 747
 748	utgts = count_targets(d, NULL);
 749
 750	if (d->flags & DEVFL_TKILL) {
 751		spin_unlock_irqrestore(&d->lock, flags);
 752		return;
 753	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 754
 755	/* collect all frames to rexmit into flist */
 756	for (i = 0; i < NFACTIVE; i++) {
 757		head = &d->factive[i];
 758		list_for_each_safe(pos, nx, head) {
 759			f = list_entry(pos, struct frame, head);
 760			if (tsince_hr(f) < timeout)
 761				break;	/* end of expired frames */
 762			/* move to flist for later processing */
 763			list_move_tail(pos, &flist);
 764		}
 765	}
 766
 767	/* process expired frames */
 768	while (!list_empty(&flist)) {
 769		pos = flist.next;
 770		f = list_entry(pos, struct frame, head);
 771		since = tsince_hr(f);
 772		n = f->waited_total + since;
 773		n /= USEC_PER_SEC;
 774		if (aoe_deadsecs
 775		&& n > aoe_deadsecs
 776		&& !(f->flags & FFL_PROBE)) {
 777			/* Waited too long.  Device failure.
 778			 * Hang all frames on first hash bucket for downdev
 779			 * to clean up.
 780			 */
 781			list_splice(&flist, &d->factive[0]);
 782			aoedev_downdev(d);
 783			goto out;
 784		}
 785
 786		t = f->t;
 787		n = f->waited + since;
 788		n /= USEC_PER_SEC;
 789		if (aoe_deadsecs && utgts > 0
 790		&& (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS))
 791			scorn(t); /* avoid this target */
 792
 793		if (t->maxout != 1) {
 794			t->ssthresh = t->maxout / 2;
 795			t->maxout = 1;
 796		}
 797
 798		if (f->flags & FFL_PROBE) {
 799			t->nout_probes--;
 800		} else {
 801			ifp = getif(t, f->skb->dev);
 802			if (ifp && ++ifp->lost > (t->nframes << 1)
 803			&& (ifp != t->ifs || t->ifs[1].nd)) {
 804				ejectif(t, ifp);
 805				ifp = NULL;
 806			}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 807		}
 808		list_move_tail(pos, &d->rexmitq);
 809		t->nout--;
 810	}
 811	rexmit_deferred(d);
 812
 813out:
 814	if ((d->flags & DEVFL_KICKME) && d->blkq) {
 
 
 
 
 
 815		d->flags &= ~DEVFL_KICKME;
 816		blk_mq_run_hw_queues(d->blkq, true);
 817	}
 818
 
 
 
 819	d->timer.expires = jiffies + TIMERTICK;
 820	add_timer(&d->timer);
 821
 822	spin_unlock_irqrestore(&d->lock, flags);
 823}
 824
 825static void
 826bufinit(struct buf *buf, struct request *rq, struct bio *bio)
 827{
 828	memset(buf, 0, sizeof(*buf));
 829	buf->rq = rq;
 830	buf->bio = bio;
 831	buf->iter = bio->bi_iter;
 832}
 833
 834static struct buf *
 835nextbuf(struct aoedev *d)
 836{
 837	struct request *rq;
 838	struct request_queue *q;
 839	struct aoe_req *req;
 840	struct buf *buf;
 841	struct bio *bio;
 842
 843	q = d->blkq;
 844	if (q == NULL)
 845		return NULL;	/* initializing */
 846	if (d->ip.buf)
 847		return d->ip.buf;
 848	rq = d->ip.rq;
 849	if (rq == NULL) {
 850		rq = list_first_entry_or_null(&d->rq_list, struct request,
 851						queuelist);
 852		if (rq == NULL)
 853			return NULL;
 854		list_del_init(&rq->queuelist);
 855		blk_mq_start_request(rq);
 856		d->ip.rq = rq;
 857		d->ip.nxbio = rq->bio;
 858
 859		req = blk_mq_rq_to_pdu(rq);
 860		req->nr_bios = 0;
 861		__rq_for_each_bio(bio, rq)
 862			req->nr_bios++;
 863	}
 864	buf = mempool_alloc(d->bufpool, GFP_ATOMIC);
 865	if (buf == NULL) {
 866		pr_err("aoe: nextbuf: unable to mempool_alloc!\n");
 867		return NULL;
 868	}
 869	bio = d->ip.nxbio;
 870	bufinit(buf, rq, bio);
 871	bio = bio->bi_next;
 872	d->ip.nxbio = bio;
 873	if (bio == NULL)
 874		d->ip.rq = NULL;
 875	return d->ip.buf = buf;
 876}
 877
 878/* enters with d->lock held */
 879void
 880aoecmd_work(struct aoedev *d)
 881{
 882	rexmit_deferred(d);
 883	while (aoecmd_ata_rw(d))
 884		;
 
 
 
 
 
 
 
 
 
 
 885}
 886
 887/* this function performs work that has been deferred until sleeping is OK
 888 */
 889void
 890aoecmd_sleepwork(struct work_struct *work)
 891{
 892	struct aoedev *d = container_of(work, struct aoedev, work);
 893	struct block_device *bd;
 894	u64 ssize;
 895
 896	if (d->flags & DEVFL_GDALLOC)
 897		aoeblk_gdalloc(d);
 898
 899	if (d->flags & DEVFL_NEWSIZE) {
 
 
 
 
 900		ssize = get_capacity(d->gd);
 901		bd = bdget_disk(d->gd, 0);
 
 902		if (bd) {
 903			inode_lock(bd->bd_inode);
 904			i_size_write(bd->bd_inode, (loff_t)ssize<<9);
 905			inode_unlock(bd->bd_inode);
 906			bdput(bd);
 907		}
 908		spin_lock_irq(&d->lock);
 909		d->flags |= DEVFL_UP;
 910		d->flags &= ~DEVFL_NEWSIZE;
 911		spin_unlock_irq(&d->lock);
 912	}
 913}
 914
 915static void
 916ata_ident_fixstring(u16 *id, int ns)
 917{
 918	u16 s;
 919
 920	while (ns-- > 0) {
 921		s = *id;
 922		*id++ = s >> 8 | s << 8;
 923	}
 924}
 925
 926static void
 927ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
 928{
 929	u64 ssize;
 930	u16 n;
 931
 932	/* word 83: command set supported */
 933	n = get_unaligned_le16(&id[83 << 1]);
 934
 935	/* word 86: command set/feature enabled */
 936	n |= get_unaligned_le16(&id[86 << 1]);
 937
 938	if (n & (1<<10)) {	/* bit 10: LBA 48 */
 939		d->flags |= DEVFL_EXT;
 940
 941		/* word 100: number lba48 sectors */
 942		ssize = get_unaligned_le64(&id[100 << 1]);
 943
 944		/* set as in ide-disk.c:init_idedisk_capacity */
 945		d->geo.cylinders = ssize;
 946		d->geo.cylinders /= (255 * 63);
 947		d->geo.heads = 255;
 948		d->geo.sectors = 63;
 949	} else {
 950		d->flags &= ~DEVFL_EXT;
 951
 952		/* number lba28 sectors */
 953		ssize = get_unaligned_le32(&id[60 << 1]);
 954
 955		/* NOTE: obsolete in ATA 6 */
 956		d->geo.cylinders = get_unaligned_le16(&id[54 << 1]);
 957		d->geo.heads = get_unaligned_le16(&id[55 << 1]);
 958		d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
 959	}
 960
 961	ata_ident_fixstring((u16 *) &id[10<<1], 10);	/* serial */
 962	ata_ident_fixstring((u16 *) &id[23<<1], 4);	/* firmware */
 963	ata_ident_fixstring((u16 *) &id[27<<1], 20);	/* model */
 964	memcpy(d->ident, id, sizeof(d->ident));
 965
 966	if (d->ssize != ssize)
 967		printk(KERN_INFO
 968			"aoe: %pm e%ld.%d v%04x has %llu sectors\n",
 969			t->addr,
 970			d->aoemajor, d->aoeminor,
 971			d->fw_ver, (long long)ssize);
 972	d->ssize = ssize;
 973	d->geo.start = 0;
 974	if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
 975		return;
 976	if (d->gd != NULL) {
 977		set_capacity(d->gd, ssize);
 978		d->flags |= DEVFL_NEWSIZE;
 979	} else
 980		d->flags |= DEVFL_GDALLOC;
 981	schedule_work(&d->work);
 982}
 983
 984static void
 985calc_rttavg(struct aoedev *d, struct aoetgt *t, int rtt)
 986{
 987	register long n;
 988
 989	n = rtt;
 990
 991	/* cf. Congestion Avoidance and Control, Jacobson & Karels, 1988 */
 992	n -= d->rttavg >> RTTSCALE;
 993	d->rttavg += n;
 994	if (n < 0)
 995		n = -n;
 996	n -= d->rttdev >> RTTDSCALE;
 997	d->rttdev += n;
 998
 999	if (!t || t->maxout >= t->nframes)
1000		return;
1001	if (t->maxout < t->ssthresh)
1002		t->maxout += 1;
1003	else if (t->nout == t->maxout && t->next_cwnd-- == 0) {
1004		t->maxout += 1;
1005		t->next_cwnd = t->maxout;
1006	}
1007}
1008
1009static struct aoetgt *
1010gettgt(struct aoedev *d, char *addr)
1011{
1012	struct aoetgt **t, **e;
1013
1014	t = d->targets;
1015	e = t + d->ntargets;
1016	for (; t < e && *t; t++)
1017		if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
1018			return *t;
1019	return NULL;
1020}
1021
1022static void
1023bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
1024{
1025	int soff = 0;
1026	struct bio_vec bv;
1027
1028	iter.bi_size = cnt;
 
 
 
 
 
 
 
 
1029
1030	__bio_for_each_segment(bv, bio, iter, iter) {
1031		char *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
1032		skb_copy_bits(skb, soff, p, bv.bv_len);
1033		kunmap_atomic(p);
1034		soff += bv.bv_len;
1035	}
1036}
1037
1038void
1039aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
1040{
1041	struct bio *bio;
1042	int bok;
1043	struct request_queue *q;
1044	blk_status_t err = BLK_STS_OK;
1045
1046	q = d->blkq;
1047	if (rq == d->ip.rq)
1048		d->ip.rq = NULL;
1049	do {
1050		bio = rq->bio;
1051		bok = !fastfail && !bio->bi_status;
1052		if (!bok)
1053			err = BLK_STS_IOERR;
1054	} while (blk_update_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
1055
1056	__blk_mq_end_request(rq, err);
1057
1058	/* cf. http://lkml.org/lkml/2006/10/31/28 */
1059	if (!fastfail)
1060		blk_mq_run_hw_queues(q, true);
1061}
1062
1063static void
1064aoe_end_buf(struct aoedev *d, struct buf *buf)
1065{
1066	struct request *rq = buf->rq;
1067	struct aoe_req *req = blk_mq_rq_to_pdu(rq);
1068
1069	if (buf == d->ip.buf)
1070		d->ip.buf = NULL;
1071	mempool_free(buf, d->bufpool);
1072	if (--req->nr_bios == 0)
1073		aoe_end_request(d, rq, 0);
1074}
1075
1076static void
1077ktiocomplete(struct frame *f)
1078{
 
 
1079	struct aoe_hdr *hin, *hout;
1080	struct aoe_atahdr *ahin, *ahout;
 
1081	struct buf *buf;
1082	struct sk_buff *skb;
1083	struct aoetgt *t;
1084	struct aoeif *ifp;
1085	struct aoedev *d;
1086	long n;
1087	int untainted;
 
1088
1089	if (f == NULL)
 
 
 
 
 
 
 
1090		return;
 
 
 
1091
1092	t = f->t;
1093	d = t->d;
1094	skb = f->r_skb;
1095	buf = f->buf;
1096	if (f->flags & FFL_PROBE)
1097		goto out;
1098	if (!skb)		/* just fail the buf. */
1099		goto noskb;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1100
 
1101	hout = (struct aoe_hdr *) skb_mac_header(f->skb);
1102	ahout = (struct aoe_atahdr *) (hout+1);
 
1103
1104	hin = (struct aoe_hdr *) skb->data;
1105	skb_pull(skb, sizeof(*hin));
1106	ahin = (struct aoe_atahdr *) skb->data;
1107	skb_pull(skb, sizeof(*ahin));
1108	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
1109		pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
 
1110			ahout->cmdstat, ahin->cmdstat,
1111			d->aoemajor, d->aoeminor);
1112noskb:		if (buf)
1113			buf->bio->bi_status = BLK_STS_IOERR;
1114		goto out;
1115	}
1116
1117	n = ahout->scnt << 9;
1118	switch (ahout->cmdstat) {
1119	case ATA_CMD_PIO_READ:
1120	case ATA_CMD_PIO_READ_EXT:
1121		if (skb->len < n) {
1122			pr_err("%s e%ld.%d.  skb->len=%d need=%ld\n",
1123				"aoe: runt data size in read from",
1124				(long) d->aoemajor, d->aoeminor,
1125			       skb->len, n);
1126			buf->bio->bi_status = BLK_STS_IOERR;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1127			break;
1128		}
1129		if (n > f->iter.bi_size) {
1130			pr_err_ratelimited("%s e%ld.%d.  bytes=%ld need=%u\n",
1131				"aoe: too-large data size in read from",
1132				(long) d->aoemajor, d->aoeminor,
1133				n, f->iter.bi_size);
1134			buf->bio->bi_status = BLK_STS_IOERR;
 
 
1135			break;
1136		}
1137		bvcpy(skb, f->buf->bio, f->iter, n);
1138		/* fall through */
1139	case ATA_CMD_PIO_WRITE:
1140	case ATA_CMD_PIO_WRITE_EXT:
1141		spin_lock_irq(&d->lock);
1142		ifp = getif(t, skb->dev);
1143		if (ifp)
1144			ifp->lost = 0;
1145		spin_unlock_irq(&d->lock);
1146		break;
1147	case ATA_CMD_ID_ATA:
1148		if (skb->len < 512) {
1149			pr_info("%s e%ld.%d.  skb->len=%d need=512\n",
1150				"aoe: runt data size in ataid from",
1151				(long) d->aoemajor, d->aoeminor,
1152				skb->len);
1153			break;
1154		}
1155		if (skb_linearize(skb))
1156			break;
1157		spin_lock_irq(&d->lock);
1158		ataid_complete(d, t, skb->data);
1159		spin_unlock_irq(&d->lock);
1160		break;
1161	default:
1162		pr_info("aoe: unrecognized ata command %2.2Xh for %d.%d\n",
1163			ahout->cmdstat,
1164			be16_to_cpu(get_unaligned(&hin->major)),
1165			hin->minor);
1166	}
1167out:
1168	spin_lock_irq(&d->lock);
1169	if (t->taint > 0
1170	&& --t->taint > 0
1171	&& t->nout_probes == 0) {
1172		count_targets(d, &untainted);
1173		if (untainted > 0) {
1174			probe(t);
1175			t->nout_probes++;
1176		}
1177	}
1178
1179	aoe_freetframe(f);
1180
1181	if (buf && --buf->nframesout == 0 && buf->iter.bi_size == 0)
1182		aoe_end_buf(d, buf);
1183
1184	spin_unlock_irq(&d->lock);
1185	aoedev_put(d);
1186	dev_kfree_skb(skb);
1187}
1188
1189/* Enters with iocq.lock held.
1190 * Returns true iff responses needing processing remain.
1191 */
1192static int
1193ktio(int id)
1194{
1195	struct frame *f;
1196	struct list_head *pos;
1197	int i;
1198	int actual_id;
1199
1200	for (i = 0; ; ++i) {
1201		if (i == MAXIOC)
1202			return 1;
1203		if (list_empty(&iocq[id].head))
1204			return 0;
1205		pos = iocq[id].head.next;
1206		list_del(pos);
1207		f = list_entry(pos, struct frame, head);
1208		spin_unlock_irq(&iocq[id].lock);
1209		ktiocomplete(f);
1210
1211		/* Figure out if extra threads are required. */
1212		actual_id = f->t->d->aoeminor % ncpus;
1213
1214		if (!kts[actual_id].active) {
1215			BUG_ON(id != 0);
1216			mutex_lock(&ktio_spawn_lock);
1217			if (!kts[actual_id].active
1218				&& aoe_ktstart(&kts[actual_id]) == 0)
1219				kts[actual_id].active = 1;
1220			mutex_unlock(&ktio_spawn_lock);
1221		}
1222		spin_lock_irq(&iocq[id].lock);
1223	}
1224}
1225
1226static int
1227kthread(void *vp)
1228{
1229	struct ktstate *k;
1230	DECLARE_WAITQUEUE(wait, current);
1231	int more;
1232
1233	k = vp;
1234	current->flags |= PF_NOFREEZE;
1235	set_user_nice(current, -10);
1236	complete(&k->rendez);	/* tell spawner we're running */
1237	do {
1238		spin_lock_irq(k->lock);
1239		more = k->fn(k->id);
1240		if (!more) {
1241			add_wait_queue(k->waitq, &wait);
1242			__set_current_state(TASK_INTERRUPTIBLE);
1243		}
1244		spin_unlock_irq(k->lock);
1245		if (!more) {
1246			schedule();
1247			remove_wait_queue(k->waitq, &wait);
1248		} else
1249			cond_resched();
1250	} while (!kthread_should_stop());
1251	complete(&k->rendez);	/* tell spawner we're stopping */
1252	return 0;
1253}
1254
1255void
1256aoe_ktstop(struct ktstate *k)
1257{
1258	kthread_stop(k->task);
1259	wait_for_completion(&k->rendez);
1260}
1261
1262int
1263aoe_ktstart(struct ktstate *k)
1264{
1265	struct task_struct *task;
1266
1267	init_completion(&k->rendez);
1268	task = kthread_run(kthread, k, "%s", k->name);
1269	if (task == NULL || IS_ERR(task))
1270		return -ENOMEM;
1271	k->task = task;
1272	wait_for_completion(&k->rendez); /* allow kthread to start */
1273	init_completion(&k->rendez);	/* for waiting for exit later */
1274	return 0;
1275}
1276
1277/* pass it off to kthreads for processing */
1278static void
1279ktcomplete(struct frame *f, struct sk_buff *skb)
1280{
1281	int id;
1282	ulong flags;
1283
1284	f->r_skb = skb;
1285	id = f->t->d->aoeminor % ncpus;
1286	spin_lock_irqsave(&iocq[id].lock, flags);
1287	if (!kts[id].active) {
1288		spin_unlock_irqrestore(&iocq[id].lock, flags);
1289		/* The thread with id has not been spawned yet,
1290		 * so delegate the work to the main thread and
1291		 * try spawning a new thread.
1292		 */
1293		id = 0;
1294		spin_lock_irqsave(&iocq[id].lock, flags);
1295	}
1296	list_add_tail(&f->head, &iocq[id].head);
1297	spin_unlock_irqrestore(&iocq[id].lock, flags);
1298	wake_up(&ktiowq[id]);
1299}
1300
1301struct sk_buff *
1302aoecmd_ata_rsp(struct sk_buff *skb)
1303{
1304	struct aoedev *d;
1305	struct aoe_hdr *h;
1306	struct frame *f;
1307	u32 n;
1308	ulong flags;
1309	char ebuf[128];
1310	u16 aoemajor;
1311
1312	h = (struct aoe_hdr *) skb->data;
1313	aoemajor = be16_to_cpu(get_unaligned(&h->major));
1314	d = aoedev_by_aoeaddr(aoemajor, h->minor, 0);
1315	if (d == NULL) {
1316		snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
1317			"for unknown device %d.%d\n",
1318			aoemajor, h->minor);
1319		aoechr_error(ebuf);
1320		return skb;
1321	}
1322
1323	spin_lock_irqsave(&d->lock, flags);
1324
1325	n = be32_to_cpu(get_unaligned(&h->tag));
1326	f = getframe(d, n);
1327	if (f) {
1328		calc_rttavg(d, f->t, tsince_hr(f));
1329		f->t->nout--;
1330		if (f->flags & FFL_PROBE)
1331			f->t->nout_probes--;
1332	} else {
1333		f = getframe_deferred(d, n);
1334		if (f) {
1335			calc_rttavg(d, NULL, tsince_hr(f));
1336		} else {
1337			calc_rttavg(d, NULL, tsince(n));
1338			spin_unlock_irqrestore(&d->lock, flags);
1339			aoedev_put(d);
1340			snprintf(ebuf, sizeof(ebuf),
1341				 "%15s e%d.%d    tag=%08x@%08lx s=%pm d=%pm\n",
1342				 "unexpected rsp",
1343				 get_unaligned_be16(&h->major),
1344				 h->minor,
1345				 get_unaligned_be32(&h->tag),
1346				 jiffies,
1347				 h->src,
1348				 h->dst);
1349			aoechr_error(ebuf);
1350			return skb;
1351		}
1352	}
1353	aoecmd_work(d);
 
 
 
1354
1355	spin_unlock_irqrestore(&d->lock, flags);
1356
1357	ktcomplete(f, skb);
1358
1359	/*
1360	 * Note here that we do not perform an aoedev_put, as we are
1361	 * leaving this reference for the ktio to release.
1362	 */
1363	return NULL;
1364}
1365
1366void
1367aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
1368{
1369	struct sk_buff_head queue;
1370
1371	__skb_queue_head_init(&queue);
1372	aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
1373	aoenet_xmit(&queue);
1374}
1375
1376struct sk_buff *
1377aoecmd_ata_id(struct aoedev *d)
1378{
1379	struct aoe_hdr *h;
1380	struct aoe_atahdr *ah;
1381	struct frame *f;
1382	struct sk_buff *skb;
1383	struct aoetgt *t;
1384
1385	f = newframe(d);
1386	if (f == NULL)
1387		return NULL;
1388
1389	t = *d->tgt;
1390
1391	/* initialize the headers & frame */
1392	skb = f->skb;
1393	h = (struct aoe_hdr *) skb_mac_header(skb);
1394	ah = (struct aoe_atahdr *) (h+1);
1395	skb_put(skb, sizeof *h + sizeof *ah);
1396	memset(h, 0, skb->len);
1397	f->tag = aoehdr_atainit(d, t, h);
1398	fhash(f);
1399	t->nout++;
1400	f->waited = 0;
1401	f->waited_total = 0;
1402
1403	/* set up ata header */
1404	ah->scnt = 1;
1405	ah->cmdstat = ATA_CMD_ID_ATA;
1406	ah->lba3 = 0xa0;
1407
1408	skb->dev = t->ifp->nd;
1409
1410	d->rttavg = RTTAVG_INIT;
1411	d->rttdev = RTTDEV_INIT;
1412	d->timer.function = rexmit_timer;
1413
1414	skb = skb_clone(skb, GFP_ATOMIC);
1415	if (skb)
1416		f->sent = ktime_get();
1417
1418	return skb;
1419}
1420
1421static struct aoetgt **
1422grow_targets(struct aoedev *d)
1423{
1424	ulong oldn, newn;
1425	struct aoetgt **tt;
1426
1427	oldn = d->ntargets;
1428	newn = oldn * 2;
1429	tt = kcalloc(newn, sizeof(*d->targets), GFP_ATOMIC);
1430	if (!tt)
1431		return NULL;
1432	memmove(tt, d->targets, sizeof(*d->targets) * oldn);
1433	d->tgt = tt + (d->tgt - d->targets);
1434	kfree(d->targets);
1435	d->targets = tt;
1436	d->ntargets = newn;
1437
1438	return &d->targets[oldn];
1439}
1440
1441static struct aoetgt *
1442addtgt(struct aoedev *d, char *addr, ulong nframes)
1443{
1444	struct aoetgt *t, **tt, **te;
 
1445
1446	tt = d->targets;
1447	te = tt + d->ntargets;
1448	for (; tt < te && *tt; tt++)
1449		;
1450
1451	if (tt == te) {
1452		tt = grow_targets(d);
1453		if (!tt)
1454			goto nomem;
1455	}
1456	t = kzalloc(sizeof(*t), GFP_ATOMIC);
1457	if (!t)
1458		goto nomem;
 
 
 
 
 
 
1459	t->nframes = nframes;
1460	t->d = d;
 
 
 
1461	memcpy(t->addr, addr, sizeof t->addr);
1462	t->ifp = t->ifs;
1463	aoecmd_wreset(t);
1464	t->maxout = t->nframes / 2;
1465	INIT_LIST_HEAD(&t->ffree);
1466	return *tt = t;
1467
1468 nomem:
1469	pr_info("aoe: cannot allocate memory to add target\n");
1470	return NULL;
1471}
1472
1473static void
1474setdbcnt(struct aoedev *d)
1475{
1476	struct aoetgt **t, **e;
1477	int bcnt = 0;
1478
1479	t = d->targets;
1480	e = t + d->ntargets;
1481	for (; t < e && *t; t++)
1482		if (bcnt == 0 || bcnt > (*t)->minbcnt)
1483			bcnt = (*t)->minbcnt;
1484	if (bcnt != d->maxbcnt) {
1485		d->maxbcnt = bcnt;
1486		pr_info("aoe: e%ld.%d: setting %d byte data frames\n",
1487			d->aoemajor, d->aoeminor, bcnt);
1488	}
1489}
1490
1491static void
1492setifbcnt(struct aoetgt *t, struct net_device *nd, int bcnt)
1493{
1494	struct aoedev *d;
1495	struct aoeif *p, *e;
1496	int minbcnt;
1497
1498	d = t->d;
1499	minbcnt = bcnt;
1500	p = t->ifs;
1501	e = p + NAOEIFS;
1502	for (; p < e; p++) {
1503		if (p->nd == NULL)
1504			break;		/* end of the valid interfaces */
1505		if (p->nd == nd) {
1506			p->bcnt = bcnt;	/* we're updating */
1507			nd = NULL;
1508		} else if (minbcnt > p->bcnt)
1509			minbcnt = p->bcnt; /* find the min interface */
1510	}
1511	if (nd) {
1512		if (p == e) {
1513			pr_err("aoe: device setifbcnt failure; too many interfaces.\n");
1514			return;
1515		}
1516		dev_hold(nd);
1517		p->nd = nd;
1518		p->bcnt = bcnt;
1519	}
1520	t->minbcnt = minbcnt;
1521	setdbcnt(d);
1522}
1523
1524void
1525aoecmd_cfg_rsp(struct sk_buff *skb)
1526{
1527	struct aoedev *d;
1528	struct aoe_hdr *h;
1529	struct aoe_cfghdr *ch;
1530	struct aoetgt *t;
1531	ulong flags, aoemajor;
 
1532	struct sk_buff *sl;
1533	struct sk_buff_head queue;
1534	u16 n;
1535
1536	sl = NULL;
1537	h = (struct aoe_hdr *) skb_mac_header(skb);
1538	ch = (struct aoe_cfghdr *) (h+1);
1539
1540	/*
1541	 * Enough people have their dip switches set backwards to
1542	 * warrant a loud message for this special case.
1543	 */
1544	aoemajor = get_unaligned_be16(&h->major);
1545	if (aoemajor == 0xfff) {
1546		printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
1547			"Check shelf dip switches.\n");
1548		return;
1549	}
1550	if (aoemajor == 0xffff) {
1551		pr_info("aoe: e%ld.%d: broadcast shelf number invalid\n",
1552			aoemajor, (int) h->minor);
1553		return;
1554	}
1555	if (h->minor == 0xff) {
1556		pr_info("aoe: e%ld.%d: broadcast slot number invalid\n",
1557			aoemajor, (int) h->minor);
1558		return;
1559	}
1560
1561	n = be16_to_cpu(ch->bufcnt);
1562	if (n > aoe_maxout)	/* keep it reasonable */
1563		n = aoe_maxout;
1564
1565	d = aoedev_by_aoeaddr(aoemajor, h->minor, 1);
1566	if (d == NULL) {
1567		pr_info("aoe: device allocation failure\n");
1568		return;
1569	}
1570
1571	spin_lock_irqsave(&d->lock, flags);
1572
1573	t = gettgt(d, h->src);
1574	if (t) {
1575		t->nframes = n;
1576		if (n < t->maxout)
1577			aoecmd_wreset(t);
1578	} else {
1579		t = addtgt(d, h->src, n);
1580		if (!t)
1581			goto bail;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1582	}
1583	n = skb->dev->mtu;
1584	n -= sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
1585	n /= 512;
1586	if (n > ch->scnt)
1587		n = ch->scnt;
1588	n = n ? n * 512 : DEFAULTBCNT;
1589	setifbcnt(t, skb->dev, n);
1590
1591	/* don't change users' perspective */
1592	if (d->nopen == 0) {
1593		d->fw_ver = be16_to_cpu(ch->fwver);
1594		sl = aoecmd_ata_id(d);
1595	}
1596bail:
 
 
 
1597	spin_unlock_irqrestore(&d->lock, flags);
1598	aoedev_put(d);
1599	if (sl) {
 
1600		__skb_queue_head_init(&queue);
1601		__skb_queue_tail(&queue, sl);
1602		aoenet_xmit(&queue);
1603	}
1604}
1605
1606void
1607aoecmd_wreset(struct aoetgt *t)
1608{
1609	t->maxout = 1;
1610	t->ssthresh = t->nframes / 2;
1611	t->next_cwnd = t->nframes;
1612}
1613
1614void
1615aoecmd_cleanslate(struct aoedev *d)
1616{
1617	struct aoetgt **t, **te;
 
1618
1619	d->rttavg = RTTAVG_INIT;
1620	d->rttdev = RTTDEV_INIT;
1621	d->maxbcnt = 0;
1622
1623	t = d->targets;
1624	te = t + d->ntargets;
1625	for (; t < te && *t; t++)
1626		aoecmd_wreset(*t);
1627}
1628
1629void
1630aoe_failbuf(struct aoedev *d, struct buf *buf)
1631{
1632	if (buf == NULL)
1633		return;
1634	buf->iter.bi_size = 0;
1635	buf->bio->bi_status = BLK_STS_IOERR;
1636	if (buf->nframesout == 0)
1637		aoe_end_buf(d, buf);
1638}
1639
1640void
1641aoe_flush_iocq(void)
1642{
1643	int i;
1644
1645	for (i = 0; i < ncpus; i++) {
1646		if (kts[i].active)
1647			aoe_flush_iocq_by_index(i);
1648	}
1649}
1650
1651void
1652aoe_flush_iocq_by_index(int id)
1653{
1654	struct frame *f;
1655	struct aoedev *d;
1656	LIST_HEAD(flist);
1657	struct list_head *pos;
1658	struct sk_buff *skb;
1659	ulong flags;
1660
1661	spin_lock_irqsave(&iocq[id].lock, flags);
1662	list_splice_init(&iocq[id].head, &flist);
1663	spin_unlock_irqrestore(&iocq[id].lock, flags);
1664	while (!list_empty(&flist)) {
1665		pos = flist.next;
1666		list_del(pos);
1667		f = list_entry(pos, struct frame, head);
1668		d = f->t->d;
1669		skb = f->r_skb;
1670		spin_lock_irqsave(&d->lock, flags);
1671		if (f->buf) {
1672			f->buf->nframesout--;
1673			aoe_failbuf(d, f->buf);
1674		}
1675		aoe_freetframe(f);
1676		spin_unlock_irqrestore(&d->lock, flags);
1677		dev_kfree_skb(skb);
1678		aoedev_put(d);
1679	}
1680}
1681
1682int __init
1683aoecmd_init(void)
1684{
1685	void *p;
1686	int i;
1687	int ret;
1688
1689	/* get_zeroed_page returns page with ref count 1 */
1690	p = (void *) get_zeroed_page(GFP_KERNEL);
1691	if (!p)
1692		return -ENOMEM;
1693	empty_page = virt_to_page(p);
1694
1695	ncpus = num_online_cpus();
1696
1697	iocq = kcalloc(ncpus, sizeof(struct iocq_ktio), GFP_KERNEL);
1698	if (!iocq)
1699		return -ENOMEM;
1700
1701	kts = kcalloc(ncpus, sizeof(struct ktstate), GFP_KERNEL);
1702	if (!kts) {
1703		ret = -ENOMEM;
1704		goto kts_fail;
1705	}
1706
1707	ktiowq = kcalloc(ncpus, sizeof(wait_queue_head_t), GFP_KERNEL);
1708	if (!ktiowq) {
1709		ret = -ENOMEM;
1710		goto ktiowq_fail;
1711	}
1712
1713	mutex_init(&ktio_spawn_lock);
1714
1715	for (i = 0; i < ncpus; i++) {
1716		INIT_LIST_HEAD(&iocq[i].head);
1717		spin_lock_init(&iocq[i].lock);
1718		init_waitqueue_head(&ktiowq[i]);
1719		snprintf(kts[i].name, sizeof(kts[i].name), "aoe_ktio%d", i);
1720		kts[i].fn = ktio;
1721		kts[i].waitq = &ktiowq[i];
1722		kts[i].lock = &iocq[i].lock;
1723		kts[i].id = i;
1724		kts[i].active = 0;
1725	}
1726	kts[0].active = 1;
1727	if (aoe_ktstart(&kts[0])) {
1728		ret = -ENOMEM;
1729		goto ktstart_fail;
1730	}
1731	return 0;
1732
1733ktstart_fail:
1734	kfree(ktiowq);
1735ktiowq_fail:
1736	kfree(kts);
1737kts_fail:
1738	kfree(iocq);
1739
1740	return ret;
1741}
1742
1743void
1744aoecmd_exit(void)
1745{
1746	int i;
1747
1748	for (i = 0; i < ncpus; i++)
1749		if (kts[i].active)
1750			aoe_ktstop(&kts[i]);
1751
1752	aoe_flush_iocq();
1753
1754	/* Free up the iocq and thread speicific configuration
1755	* allocated during startup.
1756	*/
1757	kfree(iocq);
1758	kfree(kts);
1759	kfree(ktiowq);
1760
1761	free_page((unsigned long) page_address(empty_page));
1762	empty_page = NULL;
1763}