grukservices.c - drivers/misc/sgi-gru/grukservices.c - Linux diff v3.1

 
   1/*
   2 * SN Platform GRU Driver
   3 *
   4 *              KERNEL SERVICES THAT USE THE GRU
   5 *
   6 *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
   7 *
   8 *  This program is free software; you can redistribute it and/or modify
   9 *  it under the terms of the GNU General Public License as published by
  10 *  the Free Software Foundation; either version 2 of the License, or
  11 *  (at your option) any later version.
  12 *
  13 *  This program is distributed in the hope that it will be useful,
  14 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 *  GNU General Public License for more details.
  17 *
  18 *  You should have received a copy of the GNU General Public License
  19 *  along with this program; if not, write to the Free Software
  20 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  21 */
  22
  23#include <linux/kernel.h>
  24#include <linux/errno.h>
  25#include <linux/slab.h>
  26#include <linux/mm.h>
  27#include <linux/spinlock.h>
  28#include <linux/device.h>
  29#include <linux/miscdevice.h>
  30#include <linux/proc_fs.h>
  31#include <linux/interrupt.h>
 
  32#include <linux/uaccess.h>
  33#include <linux/delay.h>
 
  34#include <asm/io_apic.h>
  35#include "gru.h"
  36#include "grulib.h"
  37#include "grutables.h"
  38#include "grukservices.h"
  39#include "gru_instructions.h"
  40#include <asm/uv/uv_hub.h>
  41
  42/*
  43 * Kernel GRU Usage
  44 *
  45 * The following is an interim algorithm for management of kernel GRU
  46 * resources. This will likely be replaced when we better understand the
  47 * kernel/user requirements.
  48 *
  49 * Blade percpu resources reserved for kernel use. These resources are
  50 * reserved whenever the the kernel context for the blade is loaded. Note
  51 * that the kernel context is not guaranteed to be always available. It is
  52 * loaded on demand & can be stolen by a user if the user demand exceeds the
  53 * kernel demand. The kernel can always reload the kernel context but
  54 * a SLEEP may be required!!!.
  55 *
  56 * Async Overview:
  57 *
  58 * 	Each blade has one "kernel context" that owns GRU kernel resources
  59 * 	located on the blade. Kernel drivers use GRU resources in this context
  60 * 	for sending messages, zeroing memory, etc.
  61 *
  62 * 	The kernel context is dynamically loaded on demand. If it is not in
  63 * 	use by the kernel, the kernel context can be unloaded & given to a user.
  64 * 	The kernel context will be reloaded when needed. This may require that
  65 * 	a context be stolen from a user.
  66 * 		NOTE: frequent unloading/reloading of the kernel context is
  67 * 		expensive. We are depending on batch schedulers, cpusets, sane
  68 * 		drivers or some other mechanism to prevent the need for frequent
  69 *	 	stealing/reloading.
  70 *
  71 * 	The kernel context consists of two parts:
  72 * 		- 1 CB & a few DSRs that are reserved for each cpu on the blade.
  73 * 		  Each cpu has it's own private resources & does not share them
  74 * 		  with other cpus. These resources are used serially, ie,
  75 * 		  locked, used & unlocked  on each call to a function in
  76 * 		  grukservices.
  77 * 		  	(Now that we have dynamic loading of kernel contexts, I
  78 * 		  	 may rethink this & allow sharing between cpus....)
  79 *
  80 *		- Additional resources can be reserved long term & used directly
  81 *		  by UV drivers located in the kernel. Drivers using these GRU
  82 *		  resources can use asynchronous GRU instructions that send
  83 *		  interrupts on completion.
  84 *		  	- these resources must be explicitly locked/unlocked
  85 *		  	- locked resources prevent (obviously) the kernel
  86 *		  	  context from being unloaded.
  87 *			- drivers using these resource directly issue their own
  88 *			  GRU instruction and must wait/check completion.
  89 *
  90 * 		  When these resources are reserved, the caller can optionally
  91 * 		  associate a wait_queue with the resources and use asynchronous
  92 * 		  GRU instructions. When an async GRU instruction completes, the
  93 * 		  driver will do a wakeup on the event.
  94 *
  95 */
  96
  97
  98#define ASYNC_HAN_TO_BID(h)	((h) - 1)
  99#define ASYNC_BID_TO_HAN(b)	((b) + 1)
 100#define ASYNC_HAN_TO_BS(h)	gru_base[ASYNC_HAN_TO_BID(h)]
 101
 102#define GRU_NUM_KERNEL_CBR	1
 103#define GRU_NUM_KERNEL_DSR_BYTES 256
 104#define GRU_NUM_KERNEL_DSR_CL	(GRU_NUM_KERNEL_DSR_BYTES /		\
 105					GRU_CACHE_LINE_BYTES)
 106
 107/* GRU instruction attributes for all instructions */
 108#define IMA			IMA_CB_DELAY
 109
 110/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
 111#define __gru_cacheline_aligned__                               \
 112	__attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
 113
 114#define MAGIC	0x1234567887654321UL
 115
 116/* Default retry count for GRU errors on kernel instructions */
 117#define EXCEPTION_RETRY_LIMIT	3
 118
 119/* Status of message queue sections */
 120#define MQS_EMPTY		0
 121#define MQS_FULL		1
 122#define MQS_NOOP		2
 123
 124/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
 125/* optimized for x86_64 */
 126struct message_queue {
 127	union gru_mesqhead	head __gru_cacheline_aligned__;	/* CL 0 */
 128	int			qlines;				/* DW 1 */
 129	long 			hstatus[2];
 130	void 			*next __gru_cacheline_aligned__;/* CL 1 */
 131	void 			*limit;
 132	void 			*start;
 133	void 			*start2;
 134	char			data ____cacheline_aligned;	/* CL 2 */
 135};
 136
 137/* First word in every message - used by mesq interface */
 138struct message_header {
 139	char	present;
 140	char	present2;
 141	char 	lines;
 142	char	fill;
 143};
 144
 145#define HSTATUS(mq, h)	((mq) + offsetof(struct message_queue, hstatus[h]))
 146
 147/*
 148 * Reload the blade's kernel context into a GRU chiplet. Called holding
 149 * the bs_kgts_sema for READ. Will steal user contexts if necessary.
 150 */
 151static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
 152{
 153	struct gru_state *gru;
 154	struct gru_thread_state *kgts;
 155	void *vaddr;
 156	int ctxnum, ncpus;
 157
 158	up_read(&bs->bs_kgts_sema);
 159	down_write(&bs->bs_kgts_sema);
 160
 161	if (!bs->bs_kgts) {
 162		bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
 
 
 
 
 
 163		bs->bs_kgts->ts_user_blade_id = blade_id;
 164	}
 165	kgts = bs->bs_kgts;
 166
 167	if (!kgts->ts_gru) {
 168		STAT(load_kernel_context);
 169		ncpus = uv_blade_nr_possible_cpus(blade_id);
 170		kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(
 171			GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);
 172		kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
 173			GRU_NUM_KERNEL_DSR_BYTES * ncpus +
 174				bs->bs_async_dsr_bytes);
 175		while (!gru_assign_gru_context(kgts)) {
 176			msleep(1);
 177			gru_steal_context(kgts);
 178		}
 179		gru_load_context(kgts);
 180		gru = bs->bs_kgts->ts_gru;
 181		vaddr = gru->gs_gru_base_vaddr;
 182		ctxnum = kgts->ts_ctxnum;
 183		bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0);
 184		bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0);
 185	}
 186	downgrade_write(&bs->bs_kgts_sema);
 187}
 188
 189/*
 190 * Free all kernel contexts that are not currently in use.
 191 *   Returns 0 if all freed, else number of inuse context.
 192 */
 193static int gru_free_kernel_contexts(void)
 194{
 195	struct gru_blade_state *bs;
 196	struct gru_thread_state *kgts;
 197	int bid, ret = 0;
 198
 199	for (bid = 0; bid < GRU_MAX_BLADES; bid++) {
 200		bs = gru_base[bid];
 201		if (!bs)
 202			continue;
 203
 204		/* Ignore busy contexts. Don't want to block here.  */
 205		if (down_write_trylock(&bs->bs_kgts_sema)) {
 206			kgts = bs->bs_kgts;
 207			if (kgts && kgts->ts_gru)
 208				gru_unload_context(kgts, 0);
 209			bs->bs_kgts = NULL;
 210			up_write(&bs->bs_kgts_sema);
 211			kfree(kgts);
 212		} else {
 213			ret++;
 214		}
 215	}
 216	return ret;
 217}
 218
 219/*
 220 * Lock & load the kernel context for the specified blade.
 221 */
 222static struct gru_blade_state *gru_lock_kernel_context(int blade_id)
 223{
 224	struct gru_blade_state *bs;
 225	int bid;
 226
 227	STAT(lock_kernel_context);
 228again:
 229	bid = blade_id < 0 ? uv_numa_blade_id() : blade_id;
 230	bs = gru_base[bid];
 231
 232	/* Handle the case where migration occurred while waiting for the sema */
 233	down_read(&bs->bs_kgts_sema);
 234	if (blade_id < 0 && bid != uv_numa_blade_id()) {
 235		up_read(&bs->bs_kgts_sema);
 236		goto again;
 237	}
 238	if (!bs->bs_kgts || !bs->bs_kgts->ts_gru)
 239		gru_load_kernel_context(bs, bid);
 240	return bs;
 241
 242}
 243
 244/*
 245 * Unlock the kernel context for the specified blade. Context is not
 246 * unloaded but may be stolen before next use.
 247 */
 248static void gru_unlock_kernel_context(int blade_id)
 249{
 250	struct gru_blade_state *bs;
 251
 252	bs = gru_base[blade_id];
 253	up_read(&bs->bs_kgts_sema);
 254	STAT(unlock_kernel_context);
 255}
 256
 257/*
 258 * Reserve & get pointers to the DSR/CBRs reserved for the current cpu.
 259 * 	- returns with preemption disabled
 260 */
 261static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
 262{
 263	struct gru_blade_state *bs;
 264	int lcpu;
 265
 266	BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
 267	preempt_disable();
 268	bs = gru_lock_kernel_context(-1);
 269	lcpu = uv_blade_processor_id();
 270	*cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
 271	*dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
 272	return 0;
 273}
 274
 275/*
 276 * Free the current cpus reserved DSR/CBR resources.
 277 */
 278static void gru_free_cpu_resources(void *cb, void *dsr)
 279{
 280	gru_unlock_kernel_context(uv_numa_blade_id());
 281	preempt_enable();
 282}
 283
 284/*
 285 * Reserve GRU resources to be used asynchronously.
 286 *   Note: currently supports only 1 reservation per blade.
 287 *
 288 * 	input:
 289 * 		blade_id  - blade on which resources should be reserved
 290 * 		cbrs	  - number of CBRs
 291 * 		dsr_bytes - number of DSR bytes needed
 292 *	output:
 293 *		handle to identify resource
 294 *		(0 = async resources already reserved)
 295 */
 296unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
 297			struct completion *cmp)
 298{
 299	struct gru_blade_state *bs;
 300	struct gru_thread_state *kgts;
 301	int ret = 0;
 302
 303	bs = gru_base[blade_id];
 304
 305	down_write(&bs->bs_kgts_sema);
 306
 307	/* Verify no resources already reserved */
 308	if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs)
 309		goto done;
 310	bs->bs_async_dsr_bytes = dsr_bytes;
 311	bs->bs_async_cbrs = cbrs;
 312	bs->bs_async_wq = cmp;
 313	kgts = bs->bs_kgts;
 314
 315	/* Resources changed. Unload context if already loaded */
 316	if (kgts && kgts->ts_gru)
 317		gru_unload_context(kgts, 0);
 318	ret = ASYNC_BID_TO_HAN(blade_id);
 319
 320done:
 321	up_write(&bs->bs_kgts_sema);
 322	return ret;
 323}
 324
 325/*
 326 * Release async resources previously reserved.
 327 *
 328 *	input:
 329 *		han - handle to identify resources
 330 */
 331void gru_release_async_resources(unsigned long han)
 332{
 333	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
 334
 335	down_write(&bs->bs_kgts_sema);
 336	bs->bs_async_dsr_bytes = 0;
 337	bs->bs_async_cbrs = 0;
 338	bs->bs_async_wq = NULL;
 339	up_write(&bs->bs_kgts_sema);
 340}
 341
 342/*
 343 * Wait for async GRU instructions to complete.
 344 *
 345 *	input:
 346 *		han - handle to identify resources
 347 */
 348void gru_wait_async_cbr(unsigned long han)
 349{
 350	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
 351
 352	wait_for_completion(bs->bs_async_wq);
 353	mb();
 354}
 355
 356/*
 357 * Lock previous reserved async GRU resources
 358 *
 359 *	input:
 360 *		han - handle to identify resources
 361 *	output:
 362 *		cb  - pointer to first CBR
 363 *		dsr - pointer to first DSR
 364 */
 365void gru_lock_async_resource(unsigned long han,  void **cb, void **dsr)
 366{
 367	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
 368	int blade_id = ASYNC_HAN_TO_BID(han);
 369	int ncpus;
 370
 371	gru_lock_kernel_context(blade_id);
 372	ncpus = uv_blade_nr_possible_cpus(blade_id);
 373	if (cb)
 374		*cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE;
 375	if (dsr)
 376		*dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES;
 377}
 378
 379/*
 380 * Unlock previous reserved async GRU resources
 381 *
 382 *	input:
 383 *		han - handle to identify resources
 384 */
 385void gru_unlock_async_resource(unsigned long han)
 386{
 387	int blade_id = ASYNC_HAN_TO_BID(han);
 388
 389	gru_unlock_kernel_context(blade_id);
 390}
 391
 392/*----------------------------------------------------------------------*/
 393int gru_get_cb_exception_detail(void *cb,
 394		struct control_block_extended_exc_detail *excdet)
 395{
 396	struct gru_control_block_extended *cbe;
 397	struct gru_thread_state *kgts = NULL;
 398	unsigned long off;
 399	int cbrnum, bid;
 400
 401	/*
 402	 * Locate kgts for cb. This algorithm is SLOW but
 403	 * this function is rarely called (ie., almost never).
 404	 * Performance does not matter.
 405	 */
 406	for_each_possible_blade(bid) {
 407		if (!gru_base[bid])
 408			break;
 409		kgts = gru_base[bid]->bs_kgts;
 410		if (!kgts || !kgts->ts_gru)
 411			continue;
 412		off = cb - kgts->ts_gru->gs_gru_base_vaddr;
 413		if (off < GRU_SIZE)
 414			break;
 415		kgts = NULL;
 416	}
 417	BUG_ON(!kgts);
 418	cbrnum = thread_cbr_number(kgts, get_cb_number(cb));
 419	cbe = get_cbe(GRUBASE(cb), cbrnum);
 420	gru_flush_cache(cbe);	/* CBE not coherent */
 421	sync_core();
 422	excdet->opc = cbe->opccpy;
 423	excdet->exopc = cbe->exopccpy;
 424	excdet->ecause = cbe->ecause;
 425	excdet->exceptdet0 = cbe->idef1upd;
 426	excdet->exceptdet1 = cbe->idef3upd;
 427	gru_flush_cache(cbe);
 428	return 0;
 429}
 430
 431char *gru_get_cb_exception_detail_str(int ret, void *cb,
 432				      char *buf, int size)
 433{
 434	struct gru_control_block_status *gen = (void *)cb;
 435	struct control_block_extended_exc_detail excdet;
 436
 437	if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
 438		gru_get_cb_exception_detail(cb, &excdet);
 439		snprintf(buf, size,
 440			"GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
 441			"excdet0 0x%lx, excdet1 0x%x", smp_processor_id(),
 442			gen, excdet.opc, excdet.exopc, excdet.ecause,
 443			excdet.exceptdet0, excdet.exceptdet1);
 444	} else {
 445		snprintf(buf, size, "No exception");
 446	}
 447	return buf;
 448}
 449
 450static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)
 451{
 452	while (gen->istatus >= CBS_ACTIVE) {
 453		cpu_relax();
 454		barrier();
 455	}
 456	return gen->istatus;
 457}
 458
 459static int gru_retry_exception(void *cb)
 460{
 461	struct gru_control_block_status *gen = (void *)cb;
 462	struct control_block_extended_exc_detail excdet;
 463	int retry = EXCEPTION_RETRY_LIMIT;
 464
 465	while (1)  {
 466		if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
 467			return CBS_IDLE;
 468		if (gru_get_cb_message_queue_substatus(cb))
 469			return CBS_EXCEPTION;
 470		gru_get_cb_exception_detail(cb, &excdet);
 471		if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) ||
 472				(excdet.cbrexecstatus & CBR_EXS_ABORT_OCC))
 473			break;
 474		if (retry-- == 0)
 475			break;
 476		gen->icmd = 1;
 477		gru_flush_cache(gen);
 478	}
 479	return CBS_EXCEPTION;
 480}
 481
 482int gru_check_status_proc(void *cb)
 483{
 484	struct gru_control_block_status *gen = (void *)cb;
 485	int ret;
 486
 487	ret = gen->istatus;
 488	if (ret == CBS_EXCEPTION)
 489		ret = gru_retry_exception(cb);
 490	rmb();
 491	return ret;
 492
 493}
 494
 495int gru_wait_proc(void *cb)
 496{
 497	struct gru_control_block_status *gen = (void *)cb;
 498	int ret;
 499
 500	ret = gru_wait_idle_or_exception(gen);
 501	if (ret == CBS_EXCEPTION)
 502		ret = gru_retry_exception(cb);
 503	rmb();
 504	return ret;
 505}
 506
 507void gru_abort(int ret, void *cb, char *str)
 508{
 509	char buf[GRU_EXC_STR_SIZE];
 510
 511	panic("GRU FATAL ERROR: %s - %s\n", str,
 512	      gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf)));
 513}
 514
 515void gru_wait_abort_proc(void *cb)
 516{
 517	int ret;
 518
 519	ret = gru_wait_proc(cb);
 520	if (ret)
 521		gru_abort(ret, cb, "gru_wait_abort");
 522}
 523
 524
 525/*------------------------------ MESSAGE QUEUES -----------------------------*/
 526
 527/* Internal status . These are NOT returned to the user. */
 528#define MQIE_AGAIN		-1	/* try again */
 529
 530
 531/*
 532 * Save/restore the "present" flag that is in the second line of 2-line
 533 * messages
 534 */
 535static inline int get_present2(void *p)
 536{
 537	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
 538	return mhdr->present;
 539}
 540
 541static inline void restore_present2(void *p, int val)
 542{
 543	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
 544	mhdr->present = val;
 545}
 546
 547/*
 548 * Create a message queue.
 549 * 	qlines - message queue size in cache lines. Includes 2-line header.
 550 */
 551int gru_create_message_queue(struct gru_message_queue_desc *mqd,
 552		void *p, unsigned int bytes, int nasid, int vector, int apicid)
 553{
 554	struct message_queue *mq = p;
 555	unsigned int qlines;
 556
 557	qlines = bytes / GRU_CACHE_LINE_BYTES - 2;
 558	memset(mq, 0, bytes);
 559	mq->start = &mq->data;
 560	mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES;
 561	mq->next = &mq->data;
 562	mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES;
 563	mq->qlines = qlines;
 564	mq->hstatus[0] = 0;
 565	mq->hstatus[1] = 1;
 566	mq->head = gru_mesq_head(2, qlines / 2 + 1);
 567	mqd->mq = mq;
 568	mqd->mq_gpa = uv_gpa(mq);
 569	mqd->qlines = qlines;
 570	mqd->interrupt_pnode = nasid >> 1;
 571	mqd->interrupt_vector = vector;
 572	mqd->interrupt_apicid = apicid;
 573	return 0;
 574}
 575EXPORT_SYMBOL_GPL(gru_create_message_queue);
 576
 577/*
 578 * Send a NOOP message to a message queue
 579 * 	Returns:
 580 * 		 0 - if queue is full after the send. This is the normal case
 581 * 		     but various races can change this.
 582 *		-1 - if mesq sent successfully but queue not full
 583 *		>0 - unexpected error. MQE_xxx returned
 584 */
 585static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd,
 586				void *mesg)
 587{
 588	const struct message_header noop_header = {
 589					.present = MQS_NOOP, .lines = 1};
 590	unsigned long m;
 591	int substatus, ret;
 592	struct message_header save_mhdr, *mhdr = mesg;
 593
 594	STAT(mesq_noop);
 595	save_mhdr = *mhdr;
 596	*mhdr = noop_header;
 597	gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA);
 598	ret = gru_wait(cb);
 599
 600	if (ret) {
 601		substatus = gru_get_cb_message_queue_substatus(cb);
 602		switch (substatus) {
 603		case CBSS_NO_ERROR:
 604			STAT(mesq_noop_unexpected_error);
 605			ret = MQE_UNEXPECTED_CB_ERR;
 606			break;
 607		case CBSS_LB_OVERFLOWED:
 608			STAT(mesq_noop_lb_overflow);
 609			ret = MQE_CONGESTION;
 610			break;
 611		case CBSS_QLIMIT_REACHED:
 612			STAT(mesq_noop_qlimit_reached);
 613			ret = 0;
 614			break;
 615		case CBSS_AMO_NACKED:
 616			STAT(mesq_noop_amo_nacked);
 617			ret = MQE_CONGESTION;
 618			break;
 619		case CBSS_PUT_NACKED:
 620			STAT(mesq_noop_put_nacked);
 621			m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
 622			gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,
 623						IMA);
 624			if (gru_wait(cb) == CBS_IDLE)
 625				ret = MQIE_AGAIN;
 626			else
 627				ret = MQE_UNEXPECTED_CB_ERR;
 628			break;
 629		case CBSS_PAGE_OVERFLOW:
 630			STAT(mesq_noop_page_overflow);
 631			/* fallthru */
 632		default:
 633			BUG();
 634		}
 635	}
 636	*mhdr = save_mhdr;
 637	return ret;
 638}
 639
 640/*
 641 * Handle a gru_mesq full.
 642 */
 643static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd,
 644				void *mesg, int lines)
 645{
 646	union gru_mesqhead mqh;
 647	unsigned int limit, head;
 648	unsigned long avalue;
 649	int half, qlines;
 650
 651	/* Determine if switching to first/second half of q */
 652	avalue = gru_get_amo_value(cb);
 653	head = gru_get_amo_value_head(cb);
 654	limit = gru_get_amo_value_limit(cb);
 655
 656	qlines = mqd->qlines;
 657	half = (limit != qlines);
 658
 659	if (half)
 660		mqh = gru_mesq_head(qlines / 2 + 1, qlines);
 661	else
 662		mqh = gru_mesq_head(2, qlines / 2 + 1);
 663
 664	/* Try to get lock for switching head pointer */
 665	gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA);
 666	if (gru_wait(cb) != CBS_IDLE)
 667		goto cberr;
 668	if (!gru_get_amo_value(cb)) {
 669		STAT(mesq_qf_locked);
 670		return MQE_QUEUE_FULL;
 671	}
 672
 673	/* Got the lock. Send optional NOP if queue not full, */
 674	if (head != limit) {
 675		if (send_noop_message(cb, mqd, mesg)) {
 676			gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half),
 677					XTYPE_DW, IMA);
 678			if (gru_wait(cb) != CBS_IDLE)
 679				goto cberr;
 680			STAT(mesq_qf_noop_not_full);
 681			return MQIE_AGAIN;
 682		}
 683		avalue++;
 684	}
 685
 686	/* Then flip queuehead to other half of queue. */
 687	gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue,
 688							IMA);
 689	if (gru_wait(cb) != CBS_IDLE)
 690		goto cberr;
 691
 692	/* If not successfully in swapping queue head, clear the hstatus lock */
 693	if (gru_get_amo_value(cb) != avalue) {
 694		STAT(mesq_qf_switch_head_failed);
 695		gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW,
 696							IMA);
 697		if (gru_wait(cb) != CBS_IDLE)
 698			goto cberr;
 699	}
 700	return MQIE_AGAIN;
 701cberr:
 702	STAT(mesq_qf_unexpected_error);
 703	return MQE_UNEXPECTED_CB_ERR;
 704}
 705
 706/*
 707 * Handle a PUT failure. Note: if message was a 2-line message, one of the
 708 * lines might have successfully have been written. Before sending the
 709 * message, "present" must be cleared in BOTH lines to prevent the receiver
 710 * from prematurely seeing the full message.
 711 */
 712static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
 713			void *mesg, int lines)
 714{
 715	unsigned long m, *val = mesg, gpa, save;
 716	int ret;
 717
 718	m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
 719	if (lines == 2) {
 720		gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA);
 721		if (gru_wait(cb) != CBS_IDLE)
 722			return MQE_UNEXPECTED_CB_ERR;
 723	}
 724	gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
 725	if (gru_wait(cb) != CBS_IDLE)
 726		return MQE_UNEXPECTED_CB_ERR;
 727
 728	if (!mqd->interrupt_vector)
 729		return MQE_OK;
 730
 731	/*
 732	 * Send a cross-partition interrupt to the SSI that contains the target
 733	 * message queue. Normally, the interrupt is automatically delivered by
 734	 * hardware but some error conditions require explicit delivery.
 735	 * Use the GRU to deliver the interrupt. Otherwise partition failures
 
 736	 * could cause unrecovered errors.
 737	 */
 738	gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT);
 739	save = *val;
 740	*val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector,
 741				dest_Fixed);
 742	gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA);
 743	ret = gru_wait(cb);
 744	*val = save;
 745	if (ret != CBS_IDLE)
 746		return MQE_UNEXPECTED_CB_ERR;
 747	return MQE_OK;
 
 
 
 
 
 748}
 749
 750/*
 751 * Handle a gru_mesq failure. Some of these failures are software recoverable
 752 * or retryable.
 753 */
 754static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
 755				void *mesg, int lines)
 756{
 757	int substatus, ret = 0;
 758
 759	substatus = gru_get_cb_message_queue_substatus(cb);
 760	switch (substatus) {
 761	case CBSS_NO_ERROR:
 762		STAT(mesq_send_unexpected_error);
 763		ret = MQE_UNEXPECTED_CB_ERR;
 764		break;
 765	case CBSS_LB_OVERFLOWED:
 766		STAT(mesq_send_lb_overflow);
 767		ret = MQE_CONGESTION;
 768		break;
 769	case CBSS_QLIMIT_REACHED:
 770		STAT(mesq_send_qlimit_reached);
 771		ret = send_message_queue_full(cb, mqd, mesg, lines);
 772		break;
 773	case CBSS_AMO_NACKED:
 774		STAT(mesq_send_amo_nacked);
 775		ret = MQE_CONGESTION;
 776		break;
 777	case CBSS_PUT_NACKED:
 778		STAT(mesq_send_put_nacked);
 779		ret = send_message_put_nacked(cb, mqd, mesg, lines);
 780		break;
 781	case CBSS_PAGE_OVERFLOW:
 782		STAT(mesq_page_overflow);
 783		/* fallthru */
 784	default:
 785		BUG();
 786	}
 787	return ret;
 788}
 789
 790/*
 791 * Send a message to a message queue
 792 * 	mqd	message queue descriptor
 793 * 	mesg	message. ust be vaddr within a GSEG
 794 * 	bytes	message size (<= 2 CL)
 795 */
 796int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg,
 797				unsigned int bytes)
 798{
 799	struct message_header *mhdr;
 800	void *cb;
 801	void *dsr;
 802	int istatus, clines, ret;
 803
 804	STAT(mesq_send);
 805	BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES);
 806
 807	clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES);
 808	if (gru_get_cpu_resources(bytes, &cb, &dsr))
 809		return MQE_BUG_NO_RESOURCES;
 810	memcpy(dsr, mesg, bytes);
 811	mhdr = dsr;
 812	mhdr->present = MQS_FULL;
 813	mhdr->lines = clines;
 814	if (clines == 2) {
 815		mhdr->present2 = get_present2(mhdr);
 816		restore_present2(mhdr, MQS_FULL);
 817	}
 818
 819	do {
 820		ret = MQE_OK;
 821		gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA);
 822		istatus = gru_wait(cb);
 823		if (istatus != CBS_IDLE)
 824			ret = send_message_failure(cb, mqd, dsr, clines);
 825	} while (ret == MQIE_AGAIN);
 826	gru_free_cpu_resources(cb, dsr);
 827
 828	if (ret)
 829		STAT(mesq_send_failed);
 830	return ret;
 831}
 832EXPORT_SYMBOL_GPL(gru_send_message_gpa);
 833
 834/*
 835 * Advance the receive pointer for the queue to the next message.
 836 */
 837void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg)
 838{
 839	struct message_queue *mq = mqd->mq;
 840	struct message_header *mhdr = mq->next;
 841	void *next, *pnext;
 842	int half = -1;
 843	int lines = mhdr->lines;
 844
 845	if (lines == 2)
 846		restore_present2(mhdr, MQS_EMPTY);
 847	mhdr->present = MQS_EMPTY;
 848
 849	pnext = mq->next;
 850	next = pnext + GRU_CACHE_LINE_BYTES * lines;
 851	if (next == mq->limit) {
 852		next = mq->start;
 853		half = 1;
 854	} else if (pnext < mq->start2 && next >= mq->start2) {
 855		half = 0;
 856	}
 857
 858	if (half >= 0)
 859		mq->hstatus[half] = 1;
 860	mq->next = next;
 861}
 862EXPORT_SYMBOL_GPL(gru_free_message);
 863
 864/*
 865 * Get next message from message queue. Return NULL if no message
 866 * present. User must call next_message() to move to next message.
 867 * 	rmq	message queue
 868 */
 869void *gru_get_next_message(struct gru_message_queue_desc *mqd)
 870{
 871	struct message_queue *mq = mqd->mq;
 872	struct message_header *mhdr = mq->next;
 873	int present = mhdr->present;
 874
 875	/* skip NOOP messages */
 876	while (present == MQS_NOOP) {
 877		gru_free_message(mqd, mhdr);
 878		mhdr = mq->next;
 879		present = mhdr->present;
 880	}
 881
 882	/* Wait for both halves of 2 line messages */
 883	if (present == MQS_FULL && mhdr->lines == 2 &&
 884				get_present2(mhdr) == MQS_EMPTY)
 885		present = MQS_EMPTY;
 886
 887	if (!present) {
 888		STAT(mesq_receive_none);
 889		return NULL;
 890	}
 891
 892	if (mhdr->lines == 2)
 893		restore_present2(mhdr, mhdr->present2);
 894
 895	STAT(mesq_receive);
 896	return mhdr;
 897}
 898EXPORT_SYMBOL_GPL(gru_get_next_message);
 899
 900/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
 901
 902/*
 903 * Load a DW from a global GPA. The GPA can be a memory or MMR address.
 904 */
 905int gru_read_gpa(unsigned long *value, unsigned long gpa)
 906{
 907	void *cb;
 908	void *dsr;
 909	int ret, iaa;
 910
 911	STAT(read_gpa);
 912	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
 913		return MQE_BUG_NO_RESOURCES;
 914	iaa = gpa >> 62;
 915	gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA);
 916	ret = gru_wait(cb);
 917	if (ret == CBS_IDLE)
 918		*value = *(unsigned long *)dsr;
 919	gru_free_cpu_resources(cb, dsr);
 920	return ret;
 921}
 922EXPORT_SYMBOL_GPL(gru_read_gpa);
 923
 924
 925/*
 926 * Copy a block of data using the GRU resources
 927 */
 928int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
 929				unsigned int bytes)
 930{
 931	void *cb;
 932	void *dsr;
 933	int ret;
 934
 935	STAT(copy_gpa);
 936	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
 937		return MQE_BUG_NO_RESOURCES;
 938	gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),
 939		  XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA);
 940	ret = gru_wait(cb);
 941	gru_free_cpu_resources(cb, dsr);
 942	return ret;
 943}
 944EXPORT_SYMBOL_GPL(gru_copy_gpa);
 945
 946/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
 947/* 	Temp - will delete after we gain confidence in the GRU		*/
 948
 949static int quicktest0(unsigned long arg)
 950{
 951	unsigned long word0;
 952	unsigned long word1;
 953	void *cb;
 954	void *dsr;
 955	unsigned long *p;
 956	int ret = -EIO;
 957
 958	if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr))
 959		return MQE_BUG_NO_RESOURCES;
 960	p = dsr;
 961	word0 = MAGIC;
 962	word1 = 0;
 963
 964	gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
 965	if (gru_wait(cb) != CBS_IDLE) {
 966		printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id());
 967		goto done;
 968	}
 969
 970	if (*p != MAGIC) {
 971		printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p);
 972		goto done;
 973	}
 974	gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
 975	if (gru_wait(cb) != CBS_IDLE) {
 976		printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id());
 977		goto done;
 978	}
 979
 980	if (word0 != word1 || word1 != MAGIC) {
 981		printk(KERN_DEBUG
 982		       "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n",
 983		     smp_processor_id(), word1, MAGIC);
 984		goto done;
 985	}
 986	ret = 0;
 987
 988done:
 989	gru_free_cpu_resources(cb, dsr);
 990	return ret;
 991}
 992
 993#define ALIGNUP(p, q)	((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))
 994
 995static int quicktest1(unsigned long arg)
 996{
 997	struct gru_message_queue_desc mqd;
 998	void *p, *mq;
 999	unsigned long *dw;
1000	int i, ret = -EIO;
1001	char mes[GRU_CACHE_LINE_BYTES], *m;
1002
1003	/* Need  1K cacheline aligned that does not cross page boundary */
1004	p = kmalloc(4096, 0);
1005	if (p == NULL)
1006		return -ENOMEM;
1007	mq = ALIGNUP(p, 1024);
1008	memset(mes, 0xee, sizeof(mes));
1009	dw = mq;
1010
1011	gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
1012	for (i = 0; i < 6; i++) {
1013		mes[8] = i;
1014		do {
1015			ret = gru_send_message_gpa(&mqd, mes, sizeof(mes));
1016		} while (ret == MQE_CONGESTION);
1017		if (ret)
1018			break;
1019	}
1020	if (ret != MQE_QUEUE_FULL || i != 4) {
1021		printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n",
1022		       smp_processor_id(), ret, i);
1023		goto done;
1024	}
1025
1026	for (i = 0; i < 6; i++) {
1027		m = gru_get_next_message(&mqd);
1028		if (!m || m[8] != i)
1029			break;
1030		gru_free_message(&mqd, m);
1031	}
1032	if (i != 4) {
1033		printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n",
1034			smp_processor_id(), i, m, m ? m[8] : -1);
1035		goto done;
1036	}
1037	ret = 0;
1038
1039done:
1040	kfree(p);
1041	return ret;
1042}
1043
1044static int quicktest2(unsigned long arg)
1045{
1046	static DECLARE_COMPLETION(cmp);
1047	unsigned long han;
1048	int blade_id = 0;
1049	int numcb = 4;
1050	int ret = 0;
1051	unsigned long *buf;
1052	void *cb0, *cb;
1053	struct gru_control_block_status *gen;
1054	int i, k, istatus, bytes;
1055
1056	bytes = numcb * 4 * 8;
1057	buf = kmalloc(bytes, GFP_KERNEL);
1058	if (!buf)
1059		return -ENOMEM;
1060
1061	ret = -EBUSY;
1062	han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp);
1063	if (!han)
1064		goto done;
1065
1066	gru_lock_async_resource(han, &cb0, NULL);
1067	memset(buf, 0xee, bytes);
1068	for (i = 0; i < numcb; i++)
1069		gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0,
1070				XTYPE_DW, 4, 1, IMA_INTERRUPT);
1071
1072	ret = 0;
1073	k = numcb;
1074	do {
1075		gru_wait_async_cbr(han);
1076		for (i = 0; i < numcb; i++) {
1077			cb = cb0 + i * GRU_HANDLE_STRIDE;
1078			istatus = gru_check_status(cb);
1079			if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS)
1080				break;
1081		}
1082		if (i == numcb)
1083			continue;
1084		if (istatus != CBS_IDLE) {
1085			printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i);
1086			ret = -EFAULT;
1087		} else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] ||
1088				buf[4 * i + 3]) {
1089			printk(KERN_DEBUG "GRU:%d quicktest2:cb %d,  buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n",
1090			       smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]);
1091			ret = -EIO;
1092		}
1093		k--;
1094		gen = cb;
1095		gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */
1096	} while (k);
1097	BUG_ON(cmp.done);
1098
1099	gru_unlock_async_resource(han);
1100	gru_release_async_resources(han);
1101done:
1102	kfree(buf);
1103	return ret;
1104}
1105
1106#define BUFSIZE 200
1107static int quicktest3(unsigned long arg)
1108{
1109	char buf1[BUFSIZE], buf2[BUFSIZE];
1110	int ret = 0;
1111
1112	memset(buf2, 0, sizeof(buf2));
1113	memset(buf1, get_cycles() & 255, sizeof(buf1));
1114	gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE);
1115	if (memcmp(buf1, buf2, BUFSIZE)) {
1116		printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id());
1117		ret = -EIO;
1118	}
1119	return ret;
1120}
1121
1122/*
1123 * Debugging only. User hook for various kernel tests
1124 * of driver & gru.
1125 */
1126int gru_ktest(unsigned long arg)
1127{
1128	int ret = -EINVAL;
1129
1130	switch (arg & 0xff) {
1131	case 0:
1132		ret = quicktest0(arg);
1133		break;
1134	case 1:
1135		ret = quicktest1(arg);
1136		break;
1137	case 2:
1138		ret = quicktest2(arg);
1139		break;
1140	case 3:
1141		ret = quicktest3(arg);
1142		break;
1143	case 99:
1144		ret = gru_free_kernel_contexts();
1145		break;
1146	}
1147	return ret;
1148
1149}
1150
1151int gru_kservices_init(void)
1152{
1153	return 0;
1154}
1155
1156void gru_kservices_exit(void)
1157{
1158	if (gru_free_kernel_contexts())
1159		BUG();
1160}
1161

   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * SN Platform GRU Driver
   4 *
   5 *              KERNEL SERVICES THAT USE THE GRU
   6 *
   7 *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   8 */
   9
  10#include <linux/kernel.h>
  11#include <linux/errno.h>
  12#include <linux/slab.h>
  13#include <linux/mm.h>
  14#include <linux/spinlock.h>
  15#include <linux/device.h>
  16#include <linux/miscdevice.h>
  17#include <linux/proc_fs.h>
  18#include <linux/interrupt.h>
  19#include <linux/sync_core.h>
  20#include <linux/uaccess.h>
  21#include <linux/delay.h>
  22#include <linux/export.h>
  23#include <asm/io_apic.h>
  24#include "gru.h"
  25#include "grulib.h"
  26#include "grutables.h"
  27#include "grukservices.h"
  28#include "gru_instructions.h"
  29#include <asm/uv/uv_hub.h>
  30
  31/*
  32 * Kernel GRU Usage
  33 *
  34 * The following is an interim algorithm for management of kernel GRU
  35 * resources. This will likely be replaced when we better understand the
  36 * kernel/user requirements.
  37 *
  38 * Blade percpu resources reserved for kernel use. These resources are
  39 * reserved whenever the kernel context for the blade is loaded. Note
  40 * that the kernel context is not guaranteed to be always available. It is
  41 * loaded on demand & can be stolen by a user if the user demand exceeds the
  42 * kernel demand. The kernel can always reload the kernel context but
  43 * a SLEEP may be required!!!.
  44 *
  45 * Async Overview:
  46 *
  47 * 	Each blade has one "kernel context" that owns GRU kernel resources
  48 * 	located on the blade. Kernel drivers use GRU resources in this context
  49 * 	for sending messages, zeroing memory, etc.
  50 *
  51 * 	The kernel context is dynamically loaded on demand. If it is not in
  52 * 	use by the kernel, the kernel context can be unloaded & given to a user.
  53 * 	The kernel context will be reloaded when needed. This may require that
  54 * 	a context be stolen from a user.
  55 * 		NOTE: frequent unloading/reloading of the kernel context is
  56 * 		expensive. We are depending on batch schedulers, cpusets, sane
  57 * 		drivers or some other mechanism to prevent the need for frequent
  58 *	 	stealing/reloading.
  59 *
  60 * 	The kernel context consists of two parts:
  61 * 		- 1 CB & a few DSRs that are reserved for each cpu on the blade.
  62 * 		  Each cpu has it's own private resources & does not share them
  63 * 		  with other cpus. These resources are used serially, ie,
  64 * 		  locked, used & unlocked  on each call to a function in
  65 * 		  grukservices.
  66 * 		  	(Now that we have dynamic loading of kernel contexts, I
  67 * 		  	 may rethink this & allow sharing between cpus....)
  68 *
  69 *		- Additional resources can be reserved long term & used directly
  70 *		  by UV drivers located in the kernel. Drivers using these GRU
  71 *		  resources can use asynchronous GRU instructions that send
  72 *		  interrupts on completion.
  73 *		  	- these resources must be explicitly locked/unlocked
  74 *		  	- locked resources prevent (obviously) the kernel
  75 *		  	  context from being unloaded.
  76 *			- drivers using these resource directly issue their own
  77 *			  GRU instruction and must wait/check completion.
  78 *
  79 * 		  When these resources are reserved, the caller can optionally
  80 * 		  associate a wait_queue with the resources and use asynchronous
  81 * 		  GRU instructions. When an async GRU instruction completes, the
  82 * 		  driver will do a wakeup on the event.
  83 *
  84 */
  85
  86
  87#define ASYNC_HAN_TO_BID(h)	((h) - 1)
  88#define ASYNC_BID_TO_HAN(b)	((b) + 1)
  89#define ASYNC_HAN_TO_BS(h)	gru_base[ASYNC_HAN_TO_BID(h)]
  90
  91#define GRU_NUM_KERNEL_CBR	1
  92#define GRU_NUM_KERNEL_DSR_BYTES 256
  93#define GRU_NUM_KERNEL_DSR_CL	(GRU_NUM_KERNEL_DSR_BYTES /		\
  94					GRU_CACHE_LINE_BYTES)
  95
  96/* GRU instruction attributes for all instructions */
  97#define IMA			IMA_CB_DELAY
  98
  99/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
 100#define __gru_cacheline_aligned__                               \
 101	__attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
 102
 103#define MAGIC	0x1234567887654321UL
 104
 105/* Default retry count for GRU errors on kernel instructions */
 106#define EXCEPTION_RETRY_LIMIT	3
 107
 108/* Status of message queue sections */
 109#define MQS_EMPTY		0
 110#define MQS_FULL		1
 111#define MQS_NOOP		2
 112
 113/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
 114/* optimized for x86_64 */
 115struct message_queue {
 116	union gru_mesqhead	head __gru_cacheline_aligned__;	/* CL 0 */
 117	int			qlines;				/* DW 1 */
 118	long 			hstatus[2];
 119	void 			*next __gru_cacheline_aligned__;/* CL 1 */
 120	void 			*limit;
 121	void 			*start;
 122	void 			*start2;
 123	char			data ____cacheline_aligned;	/* CL 2 */
 124};
 125
 126/* First word in every message - used by mesq interface */
 127struct message_header {
 128	char	present;
 129	char	present2;
 130	char 	lines;
 131	char	fill;
 132};
 133
 134#define HSTATUS(mq, h)	((mq) + offsetof(struct message_queue, hstatus[h]))
 135
 136/*
 137 * Reload the blade's kernel context into a GRU chiplet. Called holding
 138 * the bs_kgts_sema for READ. Will steal user contexts if necessary.
 139 */
 140static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
 141{
 142	struct gru_state *gru;
 143	struct gru_thread_state *kgts;
 144	void *vaddr;
 145	int ctxnum, ncpus;
 146
 147	up_read(&bs->bs_kgts_sema);
 148	down_write(&bs->bs_kgts_sema);
 149
 150	if (!bs->bs_kgts) {
 151		do {
 152			bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
 153			if (!IS_ERR(bs->bs_kgts))
 154				break;
 155			msleep(1);
 156		} while (true);
 157		bs->bs_kgts->ts_user_blade_id = blade_id;
 158	}
 159	kgts = bs->bs_kgts;
 160
 161	if (!kgts->ts_gru) {
 162		STAT(load_kernel_context);
 163		ncpus = uv_blade_nr_possible_cpus(blade_id);
 164		kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(
 165			GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);
 166		kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
 167			GRU_NUM_KERNEL_DSR_BYTES * ncpus +
 168				bs->bs_async_dsr_bytes);
 169		while (!gru_assign_gru_context(kgts)) {
 170			msleep(1);
 171			gru_steal_context(kgts);
 172		}
 173		gru_load_context(kgts);
 174		gru = bs->bs_kgts->ts_gru;
 175		vaddr = gru->gs_gru_base_vaddr;
 176		ctxnum = kgts->ts_ctxnum;
 177		bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0);
 178		bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0);
 179	}
 180	downgrade_write(&bs->bs_kgts_sema);
 181}
 182
 183/*
 184 * Free all kernel contexts that are not currently in use.
 185 *   Returns 0 if all freed, else number of inuse context.
 186 */
 187static int gru_free_kernel_contexts(void)
 188{
 189	struct gru_blade_state *bs;
 190	struct gru_thread_state *kgts;
 191	int bid, ret = 0;
 192
 193	for (bid = 0; bid < GRU_MAX_BLADES; bid++) {
 194		bs = gru_base[bid];
 195		if (!bs)
 196			continue;
 197
 198		/* Ignore busy contexts. Don't want to block here.  */
 199		if (down_write_trylock(&bs->bs_kgts_sema)) {
 200			kgts = bs->bs_kgts;
 201			if (kgts && kgts->ts_gru)
 202				gru_unload_context(kgts, 0);
 203			bs->bs_kgts = NULL;
 204			up_write(&bs->bs_kgts_sema);
 205			kfree(kgts);
 206		} else {
 207			ret++;
 208		}
 209	}
 210	return ret;
 211}
 212
 213/*
 214 * Lock & load the kernel context for the specified blade.
 215 */
 216static struct gru_blade_state *gru_lock_kernel_context(int blade_id)
 217{
 218	struct gru_blade_state *bs;
 219	int bid;
 220
 221	STAT(lock_kernel_context);
 222again:
 223	bid = blade_id < 0 ? uv_numa_blade_id() : blade_id;
 224	bs = gru_base[bid];
 225
 226	/* Handle the case where migration occurred while waiting for the sema */
 227	down_read(&bs->bs_kgts_sema);
 228	if (blade_id < 0 && bid != uv_numa_blade_id()) {
 229		up_read(&bs->bs_kgts_sema);
 230		goto again;
 231	}
 232	if (!bs->bs_kgts || !bs->bs_kgts->ts_gru)
 233		gru_load_kernel_context(bs, bid);
 234	return bs;
 235
 236}
 237
 238/*
 239 * Unlock the kernel context for the specified blade. Context is not
 240 * unloaded but may be stolen before next use.
 241 */
 242static void gru_unlock_kernel_context(int blade_id)
 243{
 244	struct gru_blade_state *bs;
 245
 246	bs = gru_base[blade_id];
 247	up_read(&bs->bs_kgts_sema);
 248	STAT(unlock_kernel_context);
 249}
 250
 251/*
 252 * Reserve & get pointers to the DSR/CBRs reserved for the current cpu.
 253 * 	- returns with preemption disabled
 254 */
 255static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
 256{
 257	struct gru_blade_state *bs;
 258	int lcpu;
 259
 260	BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
 261	preempt_disable();
 262	bs = gru_lock_kernel_context(-1);
 263	lcpu = uv_blade_processor_id();
 264	*cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
 265	*dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
 266	return 0;
 267}
 268
 269/*
 270 * Free the current cpus reserved DSR/CBR resources.
 271 */
 272static void gru_free_cpu_resources(void *cb, void *dsr)
 273{
 274	gru_unlock_kernel_context(uv_numa_blade_id());
 275	preempt_enable();
 276}
 277
 278/*
 279 * Reserve GRU resources to be used asynchronously.
 280 *   Note: currently supports only 1 reservation per blade.
 281 *
 282 * 	input:
 283 * 		blade_id  - blade on which resources should be reserved
 284 * 		cbrs	  - number of CBRs
 285 * 		dsr_bytes - number of DSR bytes needed
 286 *	output:
 287 *		handle to identify resource
 288 *		(0 = async resources already reserved)
 289 */
 290unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
 291			struct completion *cmp)
 292{
 293	struct gru_blade_state *bs;
 294	struct gru_thread_state *kgts;
 295	int ret = 0;
 296
 297	bs = gru_base[blade_id];
 298
 299	down_write(&bs->bs_kgts_sema);
 300
 301	/* Verify no resources already reserved */
 302	if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs)
 303		goto done;
 304	bs->bs_async_dsr_bytes = dsr_bytes;
 305	bs->bs_async_cbrs = cbrs;
 306	bs->bs_async_wq = cmp;
 307	kgts = bs->bs_kgts;
 308
 309	/* Resources changed. Unload context if already loaded */
 310	if (kgts && kgts->ts_gru)
 311		gru_unload_context(kgts, 0);
 312	ret = ASYNC_BID_TO_HAN(blade_id);
 313
 314done:
 315	up_write(&bs->bs_kgts_sema);
 316	return ret;
 317}
 318
 319/*
 320 * Release async resources previously reserved.
 321 *
 322 *	input:
 323 *		han - handle to identify resources
 324 */
 325void gru_release_async_resources(unsigned long han)
 326{
 327	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
 328
 329	down_write(&bs->bs_kgts_sema);
 330	bs->bs_async_dsr_bytes = 0;
 331	bs->bs_async_cbrs = 0;
 332	bs->bs_async_wq = NULL;
 333	up_write(&bs->bs_kgts_sema);
 334}
 335
 336/*
 337 * Wait for async GRU instructions to complete.
 338 *
 339 *	input:
 340 *		han - handle to identify resources
 341 */
 342void gru_wait_async_cbr(unsigned long han)
 343{
 344	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
 345
 346	wait_for_completion(bs->bs_async_wq);
 347	mb();
 348}
 349
 350/*
 351 * Lock previous reserved async GRU resources
 352 *
 353 *	input:
 354 *		han - handle to identify resources
 355 *	output:
 356 *		cb  - pointer to first CBR
 357 *		dsr - pointer to first DSR
 358 */
 359void gru_lock_async_resource(unsigned long han,  void **cb, void **dsr)
 360{
 361	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
 362	int blade_id = ASYNC_HAN_TO_BID(han);
 363	int ncpus;
 364
 365	gru_lock_kernel_context(blade_id);
 366	ncpus = uv_blade_nr_possible_cpus(blade_id);
 367	if (cb)
 368		*cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE;
 369	if (dsr)
 370		*dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES;
 371}
 372
 373/*
 374 * Unlock previous reserved async GRU resources
 375 *
 376 *	input:
 377 *		han - handle to identify resources
 378 */
 379void gru_unlock_async_resource(unsigned long han)
 380{
 381	int blade_id = ASYNC_HAN_TO_BID(han);
 382
 383	gru_unlock_kernel_context(blade_id);
 384}
 385
 386/*----------------------------------------------------------------------*/
 387int gru_get_cb_exception_detail(void *cb,
 388		struct control_block_extended_exc_detail *excdet)
 389{
 390	struct gru_control_block_extended *cbe;
 391	struct gru_thread_state *kgts = NULL;
 392	unsigned long off;
 393	int cbrnum, bid;
 394
 395	/*
 396	 * Locate kgts for cb. This algorithm is SLOW but
 397	 * this function is rarely called (ie., almost never).
 398	 * Performance does not matter.
 399	 */
 400	for_each_possible_blade(bid) {
 401		if (!gru_base[bid])
 402			break;
 403		kgts = gru_base[bid]->bs_kgts;
 404		if (!kgts || !kgts->ts_gru)
 405			continue;
 406		off = cb - kgts->ts_gru->gs_gru_base_vaddr;
 407		if (off < GRU_SIZE)
 408			break;
 409		kgts = NULL;
 410	}
 411	BUG_ON(!kgts);
 412	cbrnum = thread_cbr_number(kgts, get_cb_number(cb));
 413	cbe = get_cbe(GRUBASE(cb), cbrnum);
 414	gru_flush_cache(cbe);	/* CBE not coherent */
 415	sync_core();
 416	excdet->opc = cbe->opccpy;
 417	excdet->exopc = cbe->exopccpy;
 418	excdet->ecause = cbe->ecause;
 419	excdet->exceptdet0 = cbe->idef1upd;
 420	excdet->exceptdet1 = cbe->idef3upd;
 421	gru_flush_cache(cbe);
 422	return 0;
 423}
 424
 425static char *gru_get_cb_exception_detail_str(int ret, void *cb,
 426					     char *buf, int size)
 427{
 428	struct gru_control_block_status *gen = cb;
 429	struct control_block_extended_exc_detail excdet;
 430
 431	if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
 432		gru_get_cb_exception_detail(cb, &excdet);
 433		snprintf(buf, size,
 434			"GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
 435			"excdet0 0x%lx, excdet1 0x%x", smp_processor_id(),
 436			gen, excdet.opc, excdet.exopc, excdet.ecause,
 437			excdet.exceptdet0, excdet.exceptdet1);
 438	} else {
 439		snprintf(buf, size, "No exception");
 440	}
 441	return buf;
 442}
 443
 444static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)
 445{
 446	while (gen->istatus >= CBS_ACTIVE) {
 447		cpu_relax();
 448		barrier();
 449	}
 450	return gen->istatus;
 451}
 452
 453static int gru_retry_exception(void *cb)
 454{
 455	struct gru_control_block_status *gen = cb;
 456	struct control_block_extended_exc_detail excdet;
 457	int retry = EXCEPTION_RETRY_LIMIT;
 458
 459	while (1)  {
 460		if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
 461			return CBS_IDLE;
 462		if (gru_get_cb_message_queue_substatus(cb))
 463			return CBS_EXCEPTION;
 464		gru_get_cb_exception_detail(cb, &excdet);
 465		if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) ||
 466				(excdet.cbrexecstatus & CBR_EXS_ABORT_OCC))
 467			break;
 468		if (retry-- == 0)
 469			break;
 470		gen->icmd = 1;
 471		gru_flush_cache(gen);
 472	}
 473	return CBS_EXCEPTION;
 474}
 475
 476int gru_check_status_proc(void *cb)
 477{
 478	struct gru_control_block_status *gen = cb;
 479	int ret;
 480
 481	ret = gen->istatus;
 482	if (ret == CBS_EXCEPTION)
 483		ret = gru_retry_exception(cb);
 484	rmb();
 485	return ret;
 486
 487}
 488
 489int gru_wait_proc(void *cb)
 490{
 491	struct gru_control_block_status *gen = cb;
 492	int ret;
 493
 494	ret = gru_wait_idle_or_exception(gen);
 495	if (ret == CBS_EXCEPTION)
 496		ret = gru_retry_exception(cb);
 497	rmb();
 498	return ret;
 499}
 500
 501static void gru_abort(int ret, void *cb, char *str)
 502{
 503	char buf[GRU_EXC_STR_SIZE];
 504
 505	panic("GRU FATAL ERROR: %s - %s\n", str,
 506	      gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf)));
 507}
 508
 509void gru_wait_abort_proc(void *cb)
 510{
 511	int ret;
 512
 513	ret = gru_wait_proc(cb);
 514	if (ret)
 515		gru_abort(ret, cb, "gru_wait_abort");
 516}
 517
 518
 519/*------------------------------ MESSAGE QUEUES -----------------------------*/
 520
 521/* Internal status . These are NOT returned to the user. */
 522#define MQIE_AGAIN		-1	/* try again */
 523
 524
 525/*
 526 * Save/restore the "present" flag that is in the second line of 2-line
 527 * messages
 528 */
 529static inline int get_present2(void *p)
 530{
 531	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
 532	return mhdr->present;
 533}
 534
 535static inline void restore_present2(void *p, int val)
 536{
 537	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
 538	mhdr->present = val;
 539}
 540
 541/*
 542 * Create a message queue.
 543 * 	qlines - message queue size in cache lines. Includes 2-line header.
 544 */
 545int gru_create_message_queue(struct gru_message_queue_desc *mqd,
 546		void *p, unsigned int bytes, int nasid, int vector, int apicid)
 547{
 548	struct message_queue *mq = p;
 549	unsigned int qlines;
 550
 551	qlines = bytes / GRU_CACHE_LINE_BYTES - 2;
 552	memset(mq, 0, bytes);
 553	mq->start = &mq->data;
 554	mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES;
 555	mq->next = &mq->data;
 556	mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES;
 557	mq->qlines = qlines;
 558	mq->hstatus[0] = 0;
 559	mq->hstatus[1] = 1;
 560	mq->head = gru_mesq_head(2, qlines / 2 + 1);
 561	mqd->mq = mq;
 562	mqd->mq_gpa = uv_gpa(mq);
 563	mqd->qlines = qlines;
 564	mqd->interrupt_pnode = nasid >> 1;
 565	mqd->interrupt_vector = vector;
 566	mqd->interrupt_apicid = apicid;
 567	return 0;
 568}
 569EXPORT_SYMBOL_GPL(gru_create_message_queue);
 570
 571/*
 572 * Send a NOOP message to a message queue
 573 * 	Returns:
 574 * 		 0 - if queue is full after the send. This is the normal case
 575 * 		     but various races can change this.
 576 *		-1 - if mesq sent successfully but queue not full
 577 *		>0 - unexpected error. MQE_xxx returned
 578 */
 579static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd,
 580				void *mesg)
 581{
 582	const struct message_header noop_header = {
 583					.present = MQS_NOOP, .lines = 1};
 584	unsigned long m;
 585	int substatus, ret;
 586	struct message_header save_mhdr, *mhdr = mesg;
 587
 588	STAT(mesq_noop);
 589	save_mhdr = *mhdr;
 590	*mhdr = noop_header;
 591	gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA);
 592	ret = gru_wait(cb);
 593
 594	if (ret) {
 595		substatus = gru_get_cb_message_queue_substatus(cb);
 596		switch (substatus) {
 597		case CBSS_NO_ERROR:
 598			STAT(mesq_noop_unexpected_error);
 599			ret = MQE_UNEXPECTED_CB_ERR;
 600			break;
 601		case CBSS_LB_OVERFLOWED:
 602			STAT(mesq_noop_lb_overflow);
 603			ret = MQE_CONGESTION;
 604			break;
 605		case CBSS_QLIMIT_REACHED:
 606			STAT(mesq_noop_qlimit_reached);
 607			ret = 0;
 608			break;
 609		case CBSS_AMO_NACKED:
 610			STAT(mesq_noop_amo_nacked);
 611			ret = MQE_CONGESTION;
 612			break;
 613		case CBSS_PUT_NACKED:
 614			STAT(mesq_noop_put_nacked);
 615			m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
 616			gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,
 617						IMA);
 618			if (gru_wait(cb) == CBS_IDLE)
 619				ret = MQIE_AGAIN;
 620			else
 621				ret = MQE_UNEXPECTED_CB_ERR;
 622			break;
 623		case CBSS_PAGE_OVERFLOW:
 624			STAT(mesq_noop_page_overflow);
 625			fallthrough;
 626		default:
 627			BUG();
 628		}
 629	}
 630	*mhdr = save_mhdr;
 631	return ret;
 632}
 633
 634/*
 635 * Handle a gru_mesq full.
 636 */
 637static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd,
 638				void *mesg, int lines)
 639{
 640	union gru_mesqhead mqh;
 641	unsigned int limit, head;
 642	unsigned long avalue;
 643	int half, qlines;
 644
 645	/* Determine if switching to first/second half of q */
 646	avalue = gru_get_amo_value(cb);
 647	head = gru_get_amo_value_head(cb);
 648	limit = gru_get_amo_value_limit(cb);
 649
 650	qlines = mqd->qlines;
 651	half = (limit != qlines);
 652
 653	if (half)
 654		mqh = gru_mesq_head(qlines / 2 + 1, qlines);
 655	else
 656		mqh = gru_mesq_head(2, qlines / 2 + 1);
 657
 658	/* Try to get lock for switching head pointer */
 659	gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA);
 660	if (gru_wait(cb) != CBS_IDLE)
 661		goto cberr;
 662	if (!gru_get_amo_value(cb)) {
 663		STAT(mesq_qf_locked);
 664		return MQE_QUEUE_FULL;
 665	}
 666
 667	/* Got the lock. Send optional NOP if queue not full, */
 668	if (head != limit) {
 669		if (send_noop_message(cb, mqd, mesg)) {
 670			gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half),
 671					XTYPE_DW, IMA);
 672			if (gru_wait(cb) != CBS_IDLE)
 673				goto cberr;
 674			STAT(mesq_qf_noop_not_full);
 675			return MQIE_AGAIN;
 676		}
 677		avalue++;
 678	}
 679
 680	/* Then flip queuehead to other half of queue. */
 681	gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue,
 682							IMA);
 683	if (gru_wait(cb) != CBS_IDLE)
 684		goto cberr;
 685
 686	/* If not successfully in swapping queue head, clear the hstatus lock */
 687	if (gru_get_amo_value(cb) != avalue) {
 688		STAT(mesq_qf_switch_head_failed);
 689		gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW,
 690							IMA);
 691		if (gru_wait(cb) != CBS_IDLE)
 692			goto cberr;
 693	}
 694	return MQIE_AGAIN;
 695cberr:
 696	STAT(mesq_qf_unexpected_error);
 697	return MQE_UNEXPECTED_CB_ERR;
 698}
 699
 700/*
 701 * Handle a PUT failure. Note: if message was a 2-line message, one of the
 702 * lines might have successfully have been written. Before sending the
 703 * message, "present" must be cleared in BOTH lines to prevent the receiver
 704 * from prematurely seeing the full message.
 705 */
 706static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
 707			void *mesg, int lines)
 708{
 709	unsigned long m;
 710	int ret, loops = 200;	/* experimentally determined */
 711
 712	m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
 713	if (lines == 2) {
 714		gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA);
 715		if (gru_wait(cb) != CBS_IDLE)
 716			return MQE_UNEXPECTED_CB_ERR;
 717	}
 718	gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
 719	if (gru_wait(cb) != CBS_IDLE)
 720		return MQE_UNEXPECTED_CB_ERR;
 721
 722	if (!mqd->interrupt_vector)
 723		return MQE_OK;
 724
 725	/*
 726	 * Send a noop message in order to deliver a cross-partition interrupt
 727	 * to the SSI that contains the target message queue. Normally, the
 728	 * interrupt is automatically delivered by hardware following mesq
 729	 * operations, but some error conditions require explicit delivery.
 730	 * The noop message will trigger delivery. Otherwise partition failures
 731	 * could cause unrecovered errors.
 732	 */
 733	do {
 734		ret = send_noop_message(cb, mqd, mesg);
 735	} while ((ret == MQIE_AGAIN || ret == MQE_CONGESTION) && (loops-- > 0));
 736
 737	if (ret == MQIE_AGAIN || ret == MQE_CONGESTION) {
 738		/*
 739		 * Don't indicate to the app to resend the message, as it's
 740		 * already been successfully sent.  We simply send an OK
 741		 * (rather than fail the send with MQE_UNEXPECTED_CB_ERR),
 742		 * assuming that the other side is receiving enough
 743		 * interrupts to get this message processed anyway.
 744		 */
 745		ret = MQE_OK;
 746	}
 747	return ret;
 748}
 749
 750/*
 751 * Handle a gru_mesq failure. Some of these failures are software recoverable
 752 * or retryable.
 753 */
 754static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
 755				void *mesg, int lines)
 756{
 757	int substatus, ret = 0;
 758
 759	substatus = gru_get_cb_message_queue_substatus(cb);
 760	switch (substatus) {
 761	case CBSS_NO_ERROR:
 762		STAT(mesq_send_unexpected_error);
 763		ret = MQE_UNEXPECTED_CB_ERR;
 764		break;
 765	case CBSS_LB_OVERFLOWED:
 766		STAT(mesq_send_lb_overflow);
 767		ret = MQE_CONGESTION;
 768		break;
 769	case CBSS_QLIMIT_REACHED:
 770		STAT(mesq_send_qlimit_reached);
 771		ret = send_message_queue_full(cb, mqd, mesg, lines);
 772		break;
 773	case CBSS_AMO_NACKED:
 774		STAT(mesq_send_amo_nacked);
 775		ret = MQE_CONGESTION;
 776		break;
 777	case CBSS_PUT_NACKED:
 778		STAT(mesq_send_put_nacked);
 779		ret = send_message_put_nacked(cb, mqd, mesg, lines);
 780		break;
 781	case CBSS_PAGE_OVERFLOW:
 782		STAT(mesq_page_overflow);
 783		fallthrough;
 784	default:
 785		BUG();
 786	}
 787	return ret;
 788}
 789
 790/*
 791 * Send a message to a message queue
 792 * 	mqd	message queue descriptor
 793 * 	mesg	message. ust be vaddr within a GSEG
 794 * 	bytes	message size (<= 2 CL)
 795 */
 796int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg,
 797				unsigned int bytes)
 798{
 799	struct message_header *mhdr;
 800	void *cb;
 801	void *dsr;
 802	int istatus, clines, ret;
 803
 804	STAT(mesq_send);
 805	BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES);
 806
 807	clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES);
 808	if (gru_get_cpu_resources(bytes, &cb, &dsr))
 809		return MQE_BUG_NO_RESOURCES;
 810	memcpy(dsr, mesg, bytes);
 811	mhdr = dsr;
 812	mhdr->present = MQS_FULL;
 813	mhdr->lines = clines;
 814	if (clines == 2) {
 815		mhdr->present2 = get_present2(mhdr);
 816		restore_present2(mhdr, MQS_FULL);
 817	}
 818
 819	do {
 820		ret = MQE_OK;
 821		gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA);
 822		istatus = gru_wait(cb);
 823		if (istatus != CBS_IDLE)
 824			ret = send_message_failure(cb, mqd, dsr, clines);
 825	} while (ret == MQIE_AGAIN);
 826	gru_free_cpu_resources(cb, dsr);
 827
 828	if (ret)
 829		STAT(mesq_send_failed);
 830	return ret;
 831}
 832EXPORT_SYMBOL_GPL(gru_send_message_gpa);
 833
 834/*
 835 * Advance the receive pointer for the queue to the next message.
 836 */
 837void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg)
 838{
 839	struct message_queue *mq = mqd->mq;
 840	struct message_header *mhdr = mq->next;
 841	void *next, *pnext;
 842	int half = -1;
 843	int lines = mhdr->lines;
 844
 845	if (lines == 2)
 846		restore_present2(mhdr, MQS_EMPTY);
 847	mhdr->present = MQS_EMPTY;
 848
 849	pnext = mq->next;
 850	next = pnext + GRU_CACHE_LINE_BYTES * lines;
 851	if (next == mq->limit) {
 852		next = mq->start;
 853		half = 1;
 854	} else if (pnext < mq->start2 && next >= mq->start2) {
 855		half = 0;
 856	}
 857
 858	if (half >= 0)
 859		mq->hstatus[half] = 1;
 860	mq->next = next;
 861}
 862EXPORT_SYMBOL_GPL(gru_free_message);
 863
 864/*
 865 * Get next message from message queue. Return NULL if no message
 866 * present. User must call next_message() to move to next message.
 867 * 	rmq	message queue
 868 */
 869void *gru_get_next_message(struct gru_message_queue_desc *mqd)
 870{
 871	struct message_queue *mq = mqd->mq;
 872	struct message_header *mhdr = mq->next;
 873	int present = mhdr->present;
 874
 875	/* skip NOOP messages */
 876	while (present == MQS_NOOP) {
 877		gru_free_message(mqd, mhdr);
 878		mhdr = mq->next;
 879		present = mhdr->present;
 880	}
 881
 882	/* Wait for both halves of 2 line messages */
 883	if (present == MQS_FULL && mhdr->lines == 2 &&
 884				get_present2(mhdr) == MQS_EMPTY)
 885		present = MQS_EMPTY;
 886
 887	if (!present) {
 888		STAT(mesq_receive_none);
 889		return NULL;
 890	}
 891
 892	if (mhdr->lines == 2)
 893		restore_present2(mhdr, mhdr->present2);
 894
 895	STAT(mesq_receive);
 896	return mhdr;
 897}
 898EXPORT_SYMBOL_GPL(gru_get_next_message);
 899
 900/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
 901
 902/*
 903 * Load a DW from a global GPA. The GPA can be a memory or MMR address.
 904 */
 905int gru_read_gpa(unsigned long *value, unsigned long gpa)
 906{
 907	void *cb;
 908	void *dsr;
 909	int ret, iaa;
 910
 911	STAT(read_gpa);
 912	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
 913		return MQE_BUG_NO_RESOURCES;
 914	iaa = gpa >> 62;
 915	gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA);
 916	ret = gru_wait(cb);
 917	if (ret == CBS_IDLE)
 918		*value = *(unsigned long *)dsr;
 919	gru_free_cpu_resources(cb, dsr);
 920	return ret;
 921}
 922EXPORT_SYMBOL_GPL(gru_read_gpa);
 923
 924
 925/*
 926 * Copy a block of data using the GRU resources
 927 */
 928int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
 929				unsigned int bytes)
 930{
 931	void *cb;
 932	void *dsr;
 933	int ret;
 934
 935	STAT(copy_gpa);
 936	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
 937		return MQE_BUG_NO_RESOURCES;
 938	gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),
 939		  XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA);
 940	ret = gru_wait(cb);
 941	gru_free_cpu_resources(cb, dsr);
 942	return ret;
 943}
 944EXPORT_SYMBOL_GPL(gru_copy_gpa);
 945
 946/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
 947/* 	Temp - will delete after we gain confidence in the GRU		*/
 948
 949static int quicktest0(unsigned long arg)
 950{
 951	unsigned long word0;
 952	unsigned long word1;
 953	void *cb;
 954	void *dsr;
 955	unsigned long *p;
 956	int ret = -EIO;
 957
 958	if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr))
 959		return MQE_BUG_NO_RESOURCES;
 960	p = dsr;
 961	word0 = MAGIC;
 962	word1 = 0;
 963
 964	gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
 965	if (gru_wait(cb) != CBS_IDLE) {
 966		printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id());
 967		goto done;
 968	}
 969
 970	if (*p != MAGIC) {
 971		printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p);
 972		goto done;
 973	}
 974	gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
 975	if (gru_wait(cb) != CBS_IDLE) {
 976		printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id());
 977		goto done;
 978	}
 979
 980	if (word0 != word1 || word1 != MAGIC) {
 981		printk(KERN_DEBUG
 982		       "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n",
 983		     smp_processor_id(), word1, MAGIC);
 984		goto done;
 985	}
 986	ret = 0;
 987
 988done:
 989	gru_free_cpu_resources(cb, dsr);
 990	return ret;
 991}
 992
 993#define ALIGNUP(p, q)	((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))
 994
 995static int quicktest1(unsigned long arg)
 996{
 997	struct gru_message_queue_desc mqd;
 998	void *p, *mq;
 
 999	int i, ret = -EIO;
1000	char mes[GRU_CACHE_LINE_BYTES], *m;
1001
1002	/* Need  1K cacheline aligned that does not cross page boundary */
1003	p = kmalloc(4096, 0);
1004	if (p == NULL)
1005		return -ENOMEM;
1006	mq = ALIGNUP(p, 1024);
1007	memset(mes, 0xee, sizeof(mes));
 
1008
1009	gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
1010	for (i = 0; i < 6; i++) {
1011		mes[8] = i;
1012		do {
1013			ret = gru_send_message_gpa(&mqd, mes, sizeof(mes));
1014		} while (ret == MQE_CONGESTION);
1015		if (ret)
1016			break;
1017	}
1018	if (ret != MQE_QUEUE_FULL || i != 4) {
1019		printk(KERN_DEBUG "GRU:%d quicktest1: unexpected status %d, i %d\n",
1020		       smp_processor_id(), ret, i);
1021		goto done;
1022	}
1023
1024	for (i = 0; i < 6; i++) {
1025		m = gru_get_next_message(&mqd);
1026		if (!m || m[8] != i)
1027			break;
1028		gru_free_message(&mqd, m);
1029	}
1030	if (i != 4) {
1031		printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n",
1032			smp_processor_id(), i, m, m ? m[8] : -1);
1033		goto done;
1034	}
1035	ret = 0;
1036
1037done:
1038	kfree(p);
1039	return ret;
1040}
1041
1042static int quicktest2(unsigned long arg)
1043{
1044	static DECLARE_COMPLETION(cmp);
1045	unsigned long han;
1046	int blade_id = 0;
1047	int numcb = 4;
1048	int ret = 0;
1049	unsigned long *buf;
1050	void *cb0, *cb;
1051	struct gru_control_block_status *gen;
1052	int i, k, istatus, bytes;
1053
1054	bytes = numcb * 4 * 8;
1055	buf = kmalloc(bytes, GFP_KERNEL);
1056	if (!buf)
1057		return -ENOMEM;
1058
1059	ret = -EBUSY;
1060	han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp);
1061	if (!han)
1062		goto done;
1063
1064	gru_lock_async_resource(han, &cb0, NULL);
1065	memset(buf, 0xee, bytes);
1066	for (i = 0; i < numcb; i++)
1067		gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0,
1068				XTYPE_DW, 4, 1, IMA_INTERRUPT);
1069
1070	ret = 0;
1071	k = numcb;
1072	do {
1073		gru_wait_async_cbr(han);
1074		for (i = 0; i < numcb; i++) {
1075			cb = cb0 + i * GRU_HANDLE_STRIDE;
1076			istatus = gru_check_status(cb);
1077			if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS)
1078				break;
1079		}
1080		if (i == numcb)
1081			continue;
1082		if (istatus != CBS_IDLE) {
1083			printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i);
1084			ret = -EFAULT;
1085		} else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] ||
1086				buf[4 * i + 3]) {
1087			printk(KERN_DEBUG "GRU:%d quicktest2:cb %d,  buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n",
1088			       smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]);
1089			ret = -EIO;
1090		}
1091		k--;
1092		gen = cb;
1093		gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */
1094	} while (k);
1095	BUG_ON(cmp.done);
1096
1097	gru_unlock_async_resource(han);
1098	gru_release_async_resources(han);
1099done:
1100	kfree(buf);
1101	return ret;
1102}
1103
1104#define BUFSIZE 200
1105static int quicktest3(unsigned long arg)
1106{
1107	char buf1[BUFSIZE], buf2[BUFSIZE];
1108	int ret = 0;
1109
1110	memset(buf2, 0, sizeof(buf2));
1111	memset(buf1, get_cycles() & 255, sizeof(buf1));
1112	gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE);
1113	if (memcmp(buf1, buf2, BUFSIZE)) {
1114		printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id());
1115		ret = -EIO;
1116	}
1117	return ret;
1118}
1119
1120/*
1121 * Debugging only. User hook for various kernel tests
1122 * of driver & gru.
1123 */
1124int gru_ktest(unsigned long arg)
1125{
1126	int ret = -EINVAL;
1127
1128	switch (arg & 0xff) {
1129	case 0:
1130		ret = quicktest0(arg);
1131		break;
1132	case 1:
1133		ret = quicktest1(arg);
1134		break;
1135	case 2:
1136		ret = quicktest2(arg);
1137		break;
1138	case 3:
1139		ret = quicktest3(arg);
1140		break;
1141	case 99:
1142		ret = gru_free_kernel_contexts();
1143		break;
1144	}
1145	return ret;
1146
1147}
1148
1149int gru_kservices_init(void)
1150{
1151	return 0;
1152}
1153
1154void gru_kservices_exit(void)
1155{
1156	if (gru_free_kernel_contexts())
1157		BUG();
1158}
1159