Linux Audio

Check our new training course

Loading...
   1/*
   2 * edac_mc kernel module
   3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4 * This file may be distributed under the terms of the
   5 * GNU General Public License.
   6 *
   7 * Written by Thayne Harbaugh
   8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9 *	http://www.anime.net/~goemon/linux-ecc/
  10 *
  11 * Modified by Dave Peterson and Doug Thompson
  12 *
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/proc_fs.h>
  17#include <linux/kernel.h>
  18#include <linux/types.h>
  19#include <linux/smp.h>
  20#include <linux/init.h>
  21#include <linux/sysctl.h>
  22#include <linux/highmem.h>
  23#include <linux/timer.h>
  24#include <linux/slab.h>
  25#include <linux/jiffies.h>
  26#include <linux/spinlock.h>
  27#include <linux/list.h>
  28#include <linux/ctype.h>
  29#include <linux/edac.h>
  30#include <asm/uaccess.h>
  31#include <asm/page.h>
  32#include <asm/edac.h>
  33#include "edac_core.h"
  34#include "edac_module.h"
  35
  36/* lock to memory controller's control array */
  37static DEFINE_MUTEX(mem_ctls_mutex);
  38static LIST_HEAD(mc_devices);
  39
  40#ifdef CONFIG_EDAC_DEBUG
  41
  42static void edac_mc_dump_channel(struct rank_info *chan)
  43{
  44	debugf4("\tchannel = %p\n", chan);
  45	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
  46	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
  47	debugf4("\tchannel->dimm = %p\n", chan->dimm);
  48}
  49
  50static void edac_mc_dump_dimm(struct dimm_info *dimm)
  51{
  52	int i;
  53
  54	debugf4("\tdimm = %p\n", dimm);
  55	debugf4("\tdimm->label = '%s'\n", dimm->label);
  56	debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
  57	debugf4("\tdimm location ");
  58	for (i = 0; i < dimm->mci->n_layers; i++) {
  59		printk(KERN_CONT "%d", dimm->location[i]);
  60		if (i < dimm->mci->n_layers - 1)
  61			printk(KERN_CONT ".");
  62	}
  63	printk(KERN_CONT "\n");
  64	debugf4("\tdimm->grain = %d\n", dimm->grain);
  65	debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
  66}
  67
  68static void edac_mc_dump_csrow(struct csrow_info *csrow)
  69{
  70	debugf4("\tcsrow = %p\n", csrow);
  71	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
  72	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
  73	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
  74	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
  75	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
  76	debugf4("\tcsrow->channels = %p\n", csrow->channels);
  77	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
  78}
  79
  80static void edac_mc_dump_mci(struct mem_ctl_info *mci)
  81{
  82	debugf3("\tmci = %p\n", mci);
  83	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
  84	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
  85	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
  86	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
  87	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
  88		mci->nr_csrows, mci->csrows);
  89	debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
  90		mci->tot_dimms, mci->dimms);
  91	debugf3("\tdev = %p\n", mci->dev);
  92	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
  93	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
  94}
  95
  96#endif				/* CONFIG_EDAC_DEBUG */
  97
  98/*
  99 * keep those in sync with the enum mem_type
 100 */
 101const char *edac_mem_types[] = {
 102	"Empty csrow",
 103	"Reserved csrow type",
 104	"Unknown csrow type",
 105	"Fast page mode RAM",
 106	"Extended data out RAM",
 107	"Burst Extended data out RAM",
 108	"Single data rate SDRAM",
 109	"Registered single data rate SDRAM",
 110	"Double data rate SDRAM",
 111	"Registered Double data rate SDRAM",
 112	"Rambus DRAM",
 113	"Unbuffered DDR2 RAM",
 114	"Fully buffered DDR2",
 115	"Registered DDR2 RAM",
 116	"Rambus XDR",
 117	"Unbuffered DDR3 RAM",
 118	"Registered DDR3 RAM",
 119};
 120EXPORT_SYMBOL_GPL(edac_mem_types);
 121
 122/**
 123 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
 124 * @p:		pointer to a pointer with the memory offset to be used. At
 125 *		return, this will be incremented to point to the next offset
 126 * @size:	Size of the data structure to be reserved
 127 * @n_elems:	Number of elements that should be reserved
 128 *
 129 * If 'size' is a constant, the compiler will optimize this whole function
 130 * down to either a no-op or the addition of a constant to the value of '*p'.
 131 *
 132 * The 'p' pointer is absolutely needed to keep the proper advancing
 133 * further in memory to the proper offsets when allocating the struct along
 134 * with its embedded structs, as edac_device_alloc_ctl_info() does it
 135 * above, for example.
 136 *
 137 * At return, the pointer 'p' will be incremented to be used on a next call
 138 * to this function.
 139 */
 140void *edac_align_ptr(void **p, unsigned size, int n_elems)
 141{
 142	unsigned align, r;
 143	void *ptr = *p;
 144
 145	*p += size * n_elems;
 146
 147	/*
 148	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
 149	 * 'size'.  Adjust 'p' so that its alignment is at least as
 150	 * stringent as what the compiler would provide for X and return
 151	 * the aligned result.
 152	 * Here we assume that the alignment of a "long long" is the most
 153	 * stringent alignment that the compiler will ever provide by default.
 154	 * As far as I know, this is a reasonable assumption.
 155	 */
 156	if (size > sizeof(long))
 157		align = sizeof(long long);
 158	else if (size > sizeof(int))
 159		align = sizeof(long);
 160	else if (size > sizeof(short))
 161		align = sizeof(int);
 162	else if (size > sizeof(char))
 163		align = sizeof(short);
 164	else
 165		return (char *)ptr;
 166
 167	r = (unsigned long)p % align;
 168
 169	if (r == 0)
 170		return (char *)ptr;
 171
 172	*p += align - r;
 173
 174	return (void *)(((unsigned long)ptr) + align - r);
 175}
 176
 177/**
 178 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
 179 * @mc_num:		Memory controller number
 180 * @n_layers:		Number of MC hierarchy layers
 181 * layers:		Describes each layer as seen by the Memory Controller
 182 * @size_pvt:		size of private storage needed
 183 *
 184 *
 185 * Everything is kmalloc'ed as one big chunk - more efficient.
 186 * Only can be used if all structures have the same lifetime - otherwise
 187 * you have to allocate and initialize your own structures.
 188 *
 189 * Use edac_mc_free() to free mc structures allocated by this function.
 190 *
 191 * NOTE: drivers handle multi-rank memories in different ways: in some
 192 * drivers, one multi-rank memory stick is mapped as one entry, while, in
 193 * others, a single multi-rank memory stick would be mapped into several
 194 * entries. Currently, this function will allocate multiple struct dimm_info
 195 * on such scenarios, as grouping the multiple ranks require drivers change.
 196 *
 197 * Returns:
 198 *	On failure: NULL
 199 *	On success: struct mem_ctl_info pointer
 200 */
 201struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 202				   unsigned n_layers,
 203				   struct edac_mc_layer *layers,
 204				   unsigned sz_pvt)
 205{
 206	struct mem_ctl_info *mci;
 207	struct edac_mc_layer *layer;
 208	struct csrow_info *csi, *csr;
 209	struct rank_info *chi, *chp, *chan;
 210	struct dimm_info *dimm;
 211	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 212	unsigned pos[EDAC_MAX_LAYERS];
 213	unsigned size, tot_dimms = 1, count = 1;
 214	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 215	void *pvt, *p, *ptr = NULL;
 216	int i, j, err, row, chn, n, len;
 217	bool per_rank = false;
 218
 219	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
 220	/*
 221	 * Calculate the total amount of dimms and csrows/cschannels while
 222	 * in the old API emulation mode
 223	 */
 224	for (i = 0; i < n_layers; i++) {
 225		tot_dimms *= layers[i].size;
 226		if (layers[i].is_virt_csrow)
 227			tot_csrows *= layers[i].size;
 228		else
 229			tot_channels *= layers[i].size;
 230
 231		if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
 232			per_rank = true;
 233	}
 234
 235	/* Figure out the offsets of the various items from the start of an mc
 236	 * structure.  We want the alignment of each item to be at least as
 237	 * stringent as what the compiler would provide if we could simply
 238	 * hardcode everything into a single struct.
 239	 */
 240	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
 241	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
 242	csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
 243	chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
 244	dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
 245	for (i = 0; i < n_layers; i++) {
 246		count *= layers[i].size;
 247		debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
 248		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 249		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 250		tot_errcount += 2 * count;
 251	}
 252
 253	debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
 254	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
 255	size = ((unsigned long)pvt) + sz_pvt;
 256
 257	debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
 258		__func__, size,
 259		tot_dimms,
 260		per_rank ? "ranks" : "dimms",
 261		tot_csrows * tot_channels);
 262	mci = kzalloc(size, GFP_KERNEL);
 263	if (mci == NULL)
 264		return NULL;
 265
 266	/* Adjust pointers so they point within the memory we just allocated
 267	 * rather than an imaginary chunk of memory located at address 0.
 268	 */
 269	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
 270	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
 271	chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
 272	dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
 273	for (i = 0; i < n_layers; i++) {
 274		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
 275		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
 276	}
 277	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 278
 279	/* setup index and various internal pointers */
 280	mci->mc_idx = mc_num;
 281	mci->csrows = csi;
 282	mci->dimms  = dimm;
 283	mci->tot_dimms = tot_dimms;
 284	mci->pvt_info = pvt;
 285	mci->n_layers = n_layers;
 286	mci->layers = layer;
 287	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
 288	mci->nr_csrows = tot_csrows;
 289	mci->num_cschannel = tot_channels;
 290	mci->mem_is_per_rank = per_rank;
 291
 292	/*
 293	 * Fill the csrow struct
 294	 */
 295	for (row = 0; row < tot_csrows; row++) {
 296		csr = &csi[row];
 297		csr->csrow_idx = row;
 298		csr->mci = mci;
 299		csr->nr_channels = tot_channels;
 300		chp = &chi[row * tot_channels];
 301		csr->channels = chp;
 302
 303		for (chn = 0; chn < tot_channels; chn++) {
 304			chan = &chp[chn];
 305			chan->chan_idx = chn;
 306			chan->csrow = csr;
 307		}
 308	}
 309
 310	/*
 311	 * Fill the dimm struct
 312	 */
 313	memset(&pos, 0, sizeof(pos));
 314	row = 0;
 315	chn = 0;
 316	debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
 317		per_rank ? "ranks" : "dimms");
 318	for (i = 0; i < tot_dimms; i++) {
 319		chan = &csi[row].channels[chn];
 320		dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
 321			       pos[0], pos[1], pos[2]);
 322		dimm->mci = mci;
 323
 324		debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
 325			i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
 326			pos[0], pos[1], pos[2], row, chn);
 327
 328		/*
 329		 * Copy DIMM location and initialize it.
 330		 */
 331		len = sizeof(dimm->label);
 332		p = dimm->label;
 333		n = snprintf(p, len, "mc#%u", mc_num);
 334		p += n;
 335		len -= n;
 336		for (j = 0; j < n_layers; j++) {
 337			n = snprintf(p, len, "%s#%u",
 338				     edac_layer_name[layers[j].type],
 339				     pos[j]);
 340			p += n;
 341			len -= n;
 342			dimm->location[j] = pos[j];
 343
 344			if (len <= 0)
 345				break;
 346		}
 347
 348		/* Link it to the csrows old API data */
 349		chan->dimm = dimm;
 350		dimm->csrow = row;
 351		dimm->cschannel = chn;
 352
 353		/* Increment csrow location */
 354		row++;
 355		if (row == tot_csrows) {
 356			row = 0;
 357			chn++;
 358		}
 359
 360		/* Increment dimm location */
 361		for (j = n_layers - 1; j >= 0; j--) {
 362			pos[j]++;
 363			if (pos[j] < layers[j].size)
 364				break;
 365			pos[j] = 0;
 366		}
 367	}
 368
 369	mci->op_state = OP_ALLOC;
 370	INIT_LIST_HEAD(&mci->grp_kobj_list);
 371
 372	/*
 373	 * Initialize the 'root' kobj for the edac_mc controller
 374	 */
 375	err = edac_mc_register_sysfs_main_kobj(mci);
 376	if (err) {
 377		kfree(mci);
 378		return NULL;
 379	}
 380
 381	/* at this point, the root kobj is valid, and in order to
 382	 * 'free' the object, then the function:
 383	 *      edac_mc_unregister_sysfs_main_kobj() must be called
 384	 * which will perform kobj unregistration and the actual free
 385	 * will occur during the kobject callback operation
 386	 */
 387	return mci;
 388}
 389EXPORT_SYMBOL_GPL(edac_mc_alloc);
 390
 391/**
 392 * edac_mc_free
 393 *	'Free' a previously allocated 'mci' structure
 394 * @mci: pointer to a struct mem_ctl_info structure
 395 */
 396void edac_mc_free(struct mem_ctl_info *mci)
 397{
 398	debugf1("%s()\n", __func__);
 399
 400	edac_mc_unregister_sysfs_main_kobj(mci);
 401
 402	/* free the mci instance memory here */
 403	kfree(mci);
 404}
 405EXPORT_SYMBOL_GPL(edac_mc_free);
 406
 407
 408/**
 409 * find_mci_by_dev
 410 *
 411 *	scan list of controllers looking for the one that manages
 412 *	the 'dev' device
 413 * @dev: pointer to a struct device related with the MCI
 414 */
 415struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 416{
 417	struct mem_ctl_info *mci;
 418	struct list_head *item;
 419
 420	debugf3("%s()\n", __func__);
 421
 422	list_for_each(item, &mc_devices) {
 423		mci = list_entry(item, struct mem_ctl_info, link);
 424
 425		if (mci->dev == dev)
 426			return mci;
 427	}
 428
 429	return NULL;
 430}
 431EXPORT_SYMBOL_GPL(find_mci_by_dev);
 432
 433/*
 434 * handler for EDAC to check if NMI type handler has asserted interrupt
 435 */
 436static int edac_mc_assert_error_check_and_clear(void)
 437{
 438	int old_state;
 439
 440	if (edac_op_state == EDAC_OPSTATE_POLL)
 441		return 1;
 442
 443	old_state = edac_err_assert;
 444	edac_err_assert = 0;
 445
 446	return old_state;
 447}
 448
 449/*
 450 * edac_mc_workq_function
 451 *	performs the operation scheduled by a workq request
 452 */
 453static void edac_mc_workq_function(struct work_struct *work_req)
 454{
 455	struct delayed_work *d_work = to_delayed_work(work_req);
 456	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 457
 458	mutex_lock(&mem_ctls_mutex);
 459
 460	/* if this control struct has movd to offline state, we are done */
 461	if (mci->op_state == OP_OFFLINE) {
 462		mutex_unlock(&mem_ctls_mutex);
 463		return;
 464	}
 465
 466	/* Only poll controllers that are running polled and have a check */
 467	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 468		mci->edac_check(mci);
 469
 470	mutex_unlock(&mem_ctls_mutex);
 471
 472	/* Reschedule */
 473	queue_delayed_work(edac_workqueue, &mci->work,
 474			msecs_to_jiffies(edac_mc_get_poll_msec()));
 475}
 476
 477/*
 478 * edac_mc_workq_setup
 479 *	initialize a workq item for this mci
 480 *	passing in the new delay period in msec
 481 *
 482 *	locking model:
 483 *
 484 *		called with the mem_ctls_mutex held
 485 */
 486static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 487{
 488	debugf0("%s()\n", __func__);
 489
 490	/* if this instance is not in the POLL state, then simply return */
 491	if (mci->op_state != OP_RUNNING_POLL)
 492		return;
 493
 494	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 495	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 496}
 497
 498/*
 499 * edac_mc_workq_teardown
 500 *	stop the workq processing on this mci
 501 *
 502 *	locking model:
 503 *
 504 *		called WITHOUT lock held
 505 */
 506static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 507{
 508	int status;
 509
 510	if (mci->op_state != OP_RUNNING_POLL)
 511		return;
 512
 513	status = cancel_delayed_work(&mci->work);
 514	if (status == 0) {
 515		debugf0("%s() not canceled, flush the queue\n",
 516			__func__);
 517
 518		/* workq instance might be running, wait for it */
 519		flush_workqueue(edac_workqueue);
 520	}
 521}
 522
 523/*
 524 * edac_mc_reset_delay_period(unsigned long value)
 525 *
 526 *	user space has updated our poll period value, need to
 527 *	reset our workq delays
 528 */
 529void edac_mc_reset_delay_period(int value)
 530{
 531	struct mem_ctl_info *mci;
 532	struct list_head *item;
 533
 534	mutex_lock(&mem_ctls_mutex);
 535
 536	/* scan the list and turn off all workq timers, doing so under lock
 537	 */
 538	list_for_each(item, &mc_devices) {
 539		mci = list_entry(item, struct mem_ctl_info, link);
 540
 541		if (mci->op_state == OP_RUNNING_POLL)
 542			cancel_delayed_work(&mci->work);
 543	}
 544
 545	mutex_unlock(&mem_ctls_mutex);
 546
 547
 548	/* re-walk the list, and reset the poll delay */
 549	mutex_lock(&mem_ctls_mutex);
 550
 551	list_for_each(item, &mc_devices) {
 552		mci = list_entry(item, struct mem_ctl_info, link);
 553
 554		edac_mc_workq_setup(mci, (unsigned long) value);
 555	}
 556
 557	mutex_unlock(&mem_ctls_mutex);
 558}
 559
 560
 561
 562/* Return 0 on success, 1 on failure.
 563 * Before calling this function, caller must
 564 * assign a unique value to mci->mc_idx.
 565 *
 566 *	locking model:
 567 *
 568 *		called with the mem_ctls_mutex lock held
 569 */
 570static int add_mc_to_global_list(struct mem_ctl_info *mci)
 571{
 572	struct list_head *item, *insert_before;
 573	struct mem_ctl_info *p;
 574
 575	insert_before = &mc_devices;
 576
 577	p = find_mci_by_dev(mci->dev);
 578	if (unlikely(p != NULL))
 579		goto fail0;
 580
 581	list_for_each(item, &mc_devices) {
 582		p = list_entry(item, struct mem_ctl_info, link);
 583
 584		if (p->mc_idx >= mci->mc_idx) {
 585			if (unlikely(p->mc_idx == mci->mc_idx))
 586				goto fail1;
 587
 588			insert_before = item;
 589			break;
 590		}
 591	}
 592
 593	list_add_tail_rcu(&mci->link, insert_before);
 594	atomic_inc(&edac_handlers);
 595	return 0;
 596
 597fail0:
 598	edac_printk(KERN_WARNING, EDAC_MC,
 599		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
 600		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 601	return 1;
 602
 603fail1:
 604	edac_printk(KERN_WARNING, EDAC_MC,
 605		"bug in low-level driver: attempt to assign\n"
 606		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 607	return 1;
 608}
 609
 610static void del_mc_from_global_list(struct mem_ctl_info *mci)
 611{
 612	atomic_dec(&edac_handlers);
 613	list_del_rcu(&mci->link);
 614
 615	/* these are for safe removal of devices from global list while
 616	 * NMI handlers may be traversing list
 617	 */
 618	synchronize_rcu();
 619	INIT_LIST_HEAD(&mci->link);
 620}
 621
 622/**
 623 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 624 *
 625 * If found, return a pointer to the structure.
 626 * Else return NULL.
 627 *
 628 * Caller must hold mem_ctls_mutex.
 629 */
 630struct mem_ctl_info *edac_mc_find(int idx)
 631{
 632	struct list_head *item;
 633	struct mem_ctl_info *mci;
 634
 635	list_for_each(item, &mc_devices) {
 636		mci = list_entry(item, struct mem_ctl_info, link);
 637
 638		if (mci->mc_idx >= idx) {
 639			if (mci->mc_idx == idx)
 640				return mci;
 641
 642			break;
 643		}
 644	}
 645
 646	return NULL;
 647}
 648EXPORT_SYMBOL(edac_mc_find);
 649
 650/**
 651 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 652 *                 create sysfs entries associated with mci structure
 653 * @mci: pointer to the mci structure to be added to the list
 654 *
 655 * Return:
 656 *	0	Success
 657 *	!0	Failure
 658 */
 659
 660/* FIXME - should a warning be printed if no error detection? correction? */
 661int edac_mc_add_mc(struct mem_ctl_info *mci)
 662{
 663	debugf0("%s()\n", __func__);
 664
 665#ifdef CONFIG_EDAC_DEBUG
 666	if (edac_debug_level >= 3)
 667		edac_mc_dump_mci(mci);
 668
 669	if (edac_debug_level >= 4) {
 670		int i;
 671
 672		for (i = 0; i < mci->nr_csrows; i++) {
 673			int j;
 674
 675			edac_mc_dump_csrow(&mci->csrows[i]);
 676			for (j = 0; j < mci->csrows[i].nr_channels; j++)
 677				edac_mc_dump_channel(&mci->csrows[i].
 678						channels[j]);
 679		}
 680		for (i = 0; i < mci->tot_dimms; i++)
 681			edac_mc_dump_dimm(&mci->dimms[i]);
 682	}
 683#endif
 684	mutex_lock(&mem_ctls_mutex);
 685
 686	if (add_mc_to_global_list(mci))
 687		goto fail0;
 688
 689	/* set load time so that error rate can be tracked */
 690	mci->start_time = jiffies;
 691
 692	if (edac_create_sysfs_mci_device(mci)) {
 693		edac_mc_printk(mci, KERN_WARNING,
 694			"failed to create sysfs device\n");
 695		goto fail1;
 696	}
 697
 698	/* If there IS a check routine, then we are running POLLED */
 699	if (mci->edac_check != NULL) {
 700		/* This instance is NOW RUNNING */
 701		mci->op_state = OP_RUNNING_POLL;
 702
 703		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
 704	} else {
 705		mci->op_state = OP_RUNNING_INTERRUPT;
 706	}
 707
 708	/* Report action taken */
 709	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 710		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 711
 712	mutex_unlock(&mem_ctls_mutex);
 713	return 0;
 714
 715fail1:
 716	del_mc_from_global_list(mci);
 717
 718fail0:
 719	mutex_unlock(&mem_ctls_mutex);
 720	return 1;
 721}
 722EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 723
 724/**
 725 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 726 *                 remove mci structure from global list
 727 * @pdev: Pointer to 'struct device' representing mci structure to remove.
 728 *
 729 * Return pointer to removed mci structure, or NULL if device not found.
 730 */
 731struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 732{
 733	struct mem_ctl_info *mci;
 734
 735	debugf0("%s()\n", __func__);
 736
 737	mutex_lock(&mem_ctls_mutex);
 738
 739	/* find the requested mci struct in the global list */
 740	mci = find_mci_by_dev(dev);
 741	if (mci == NULL) {
 742		mutex_unlock(&mem_ctls_mutex);
 743		return NULL;
 744	}
 745
 746	del_mc_from_global_list(mci);
 747	mutex_unlock(&mem_ctls_mutex);
 748
 749	/* flush workq processes */
 750	edac_mc_workq_teardown(mci);
 751
 752	/* marking MCI offline */
 753	mci->op_state = OP_OFFLINE;
 754
 755	/* remove from sysfs */
 756	edac_remove_sysfs_mci_device(mci);
 757
 758	edac_printk(KERN_INFO, EDAC_MC,
 759		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 760		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 761
 762	return mci;
 763}
 764EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 765
 766static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 767				u32 size)
 768{
 769	struct page *pg;
 770	void *virt_addr;
 771	unsigned long flags = 0;
 772
 773	debugf3("%s()\n", __func__);
 774
 775	/* ECC error page was not in our memory. Ignore it. */
 776	if (!pfn_valid(page))
 777		return;
 778
 779	/* Find the actual page structure then map it and fix */
 780	pg = pfn_to_page(page);
 781
 782	if (PageHighMem(pg))
 783		local_irq_save(flags);
 784
 785	virt_addr = kmap_atomic(pg);
 786
 787	/* Perform architecture specific atomic scrub operation */
 788	atomic_scrub(virt_addr + offset, size);
 789
 790	/* Unmap and complete */
 791	kunmap_atomic(virt_addr);
 792
 793	if (PageHighMem(pg))
 794		local_irq_restore(flags);
 795}
 796
 797/* FIXME - should return -1 */
 798int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 799{
 800	struct csrow_info *csrows = mci->csrows;
 801	int row, i, j, n;
 802
 803	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
 804	row = -1;
 805
 806	for (i = 0; i < mci->nr_csrows; i++) {
 807		struct csrow_info *csrow = &csrows[i];
 808		n = 0;
 809		for (j = 0; j < csrow->nr_channels; j++) {
 810			struct dimm_info *dimm = csrow->channels[j].dimm;
 811			n += dimm->nr_pages;
 812		}
 813		if (n == 0)
 814			continue;
 815
 816		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
 817			"mask(0x%lx)\n", mci->mc_idx, __func__,
 818			csrow->first_page, page, csrow->last_page,
 819			csrow->page_mask);
 820
 821		if ((page >= csrow->first_page) &&
 822		    (page <= csrow->last_page) &&
 823		    ((page & csrow->page_mask) ==
 824		     (csrow->first_page & csrow->page_mask))) {
 825			row = i;
 826			break;
 827		}
 828	}
 829
 830	if (row == -1)
 831		edac_mc_printk(mci, KERN_ERR,
 832			"could not look up page error address %lx\n",
 833			(unsigned long)page);
 834
 835	return row;
 836}
 837EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 838
 839const char *edac_layer_name[] = {
 840	[EDAC_MC_LAYER_BRANCH] = "branch",
 841	[EDAC_MC_LAYER_CHANNEL] = "channel",
 842	[EDAC_MC_LAYER_SLOT] = "slot",
 843	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
 844};
 845EXPORT_SYMBOL_GPL(edac_layer_name);
 846
 847static void edac_inc_ce_error(struct mem_ctl_info *mci,
 848				    bool enable_per_layer_report,
 849				    const int pos[EDAC_MAX_LAYERS])
 850{
 851	int i, index = 0;
 852
 853	mci->ce_mc++;
 854
 855	if (!enable_per_layer_report) {
 856		mci->ce_noinfo_count++;
 857		return;
 858	}
 859
 860	for (i = 0; i < mci->n_layers; i++) {
 861		if (pos[i] < 0)
 862			break;
 863		index += pos[i];
 864		mci->ce_per_layer[i][index]++;
 865
 866		if (i < mci->n_layers - 1)
 867			index *= mci->layers[i + 1].size;
 868	}
 869}
 870
 871static void edac_inc_ue_error(struct mem_ctl_info *mci,
 872				    bool enable_per_layer_report,
 873				    const int pos[EDAC_MAX_LAYERS])
 874{
 875	int i, index = 0;
 876
 877	mci->ue_mc++;
 878
 879	if (!enable_per_layer_report) {
 880		mci->ce_noinfo_count++;
 881		return;
 882	}
 883
 884	for (i = 0; i < mci->n_layers; i++) {
 885		if (pos[i] < 0)
 886			break;
 887		index += pos[i];
 888		mci->ue_per_layer[i][index]++;
 889
 890		if (i < mci->n_layers - 1)
 891			index *= mci->layers[i + 1].size;
 892	}
 893}
 894
 895static void edac_ce_error(struct mem_ctl_info *mci,
 896			  const int pos[EDAC_MAX_LAYERS],
 897			  const char *msg,
 898			  const char *location,
 899			  const char *label,
 900			  const char *detail,
 901			  const char *other_detail,
 902			  const bool enable_per_layer_report,
 903			  const unsigned long page_frame_number,
 904			  const unsigned long offset_in_page,
 905			  u32 grain)
 906{
 907	unsigned long remapped_page;
 908
 909	if (edac_mc_get_log_ce()) {
 910		if (other_detail && *other_detail)
 911			edac_mc_printk(mci, KERN_WARNING,
 912				       "CE %s on %s (%s%s - %s)\n",
 913				       msg, label, location,
 914				       detail, other_detail);
 915		else
 916			edac_mc_printk(mci, KERN_WARNING,
 917				       "CE %s on %s (%s%s)\n",
 918				       msg, label, location,
 919				       detail);
 920	}
 921	edac_inc_ce_error(mci, enable_per_layer_report, pos);
 922
 923	if (mci->scrub_mode & SCRUB_SW_SRC) {
 924		/*
 925			* Some memory controllers (called MCs below) can remap
 926			* memory so that it is still available at a different
 927			* address when PCI devices map into memory.
 928			* MC's that can't do this, lose the memory where PCI
 929			* devices are mapped. This mapping is MC-dependent
 930			* and so we call back into the MC driver for it to
 931			* map the MC page to a physical (CPU) page which can
 932			* then be mapped to a virtual page - which can then
 933			* be scrubbed.
 934			*/
 935		remapped_page = mci->ctl_page_to_phys ?
 936			mci->ctl_page_to_phys(mci, page_frame_number) :
 937			page_frame_number;
 938
 939		edac_mc_scrub_block(remapped_page,
 940					offset_in_page, grain);
 941	}
 942}
 943
 944static void edac_ue_error(struct mem_ctl_info *mci,
 945			  const int pos[EDAC_MAX_LAYERS],
 946			  const char *msg,
 947			  const char *location,
 948			  const char *label,
 949			  const char *detail,
 950			  const char *other_detail,
 951			  const bool enable_per_layer_report)
 952{
 953	if (edac_mc_get_log_ue()) {
 954		if (other_detail && *other_detail)
 955			edac_mc_printk(mci, KERN_WARNING,
 956				       "UE %s on %s (%s%s - %s)\n",
 957			               msg, label, location, detail,
 958				       other_detail);
 959		else
 960			edac_mc_printk(mci, KERN_WARNING,
 961				       "UE %s on %s (%s%s)\n",
 962			               msg, label, location, detail);
 963	}
 964
 965	if (edac_mc_get_panic_on_ue()) {
 966		if (other_detail && *other_detail)
 967			panic("UE %s on %s (%s%s - %s)\n",
 968			      msg, label, location, detail, other_detail);
 969		else
 970			panic("UE %s on %s (%s%s)\n",
 971			      msg, label, location, detail);
 972	}
 973
 974	edac_inc_ue_error(mci, enable_per_layer_report, pos);
 975}
 976
 977#define OTHER_LABEL " or "
 978void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 979			  struct mem_ctl_info *mci,
 980			  const unsigned long page_frame_number,
 981			  const unsigned long offset_in_page,
 982			  const unsigned long syndrome,
 983			  const int layer0,
 984			  const int layer1,
 985			  const int layer2,
 986			  const char *msg,
 987			  const char *other_detail,
 988			  const void *mcelog)
 989{
 990	/* FIXME: too much for stack: move it to some pre-alocated area */
 991	char detail[80], location[80];
 992	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
 993	char *p;
 994	int row = -1, chan = -1;
 995	int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 };
 996	int i;
 997	u32 grain;
 998	bool enable_per_layer_report = false;
 999
1000	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1001
1002	/*
1003	 * Check if the event report is consistent and if the memory
1004	 * location is known. If it is known, enable_per_layer_report will be
1005	 * true, the DIMM(s) label info will be filled and the per-layer
1006	 * error counters will be incremented.
1007	 */
1008	for (i = 0; i < mci->n_layers; i++) {
1009		if (pos[i] >= (int)mci->layers[i].size) {
1010			if (type == HW_EVENT_ERR_CORRECTED)
1011				p = "CE";
1012			else
1013				p = "UE";
1014
1015			edac_mc_printk(mci, KERN_ERR,
1016				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1017				       edac_layer_name[mci->layers[i].type],
1018				       pos[i], mci->layers[i].size);
1019			/*
1020			 * Instead of just returning it, let's use what's
1021			 * known about the error. The increment routines and
1022			 * the DIMM filter logic will do the right thing by
1023			 * pointing the likely damaged DIMMs.
1024			 */
1025			pos[i] = -1;
1026		}
1027		if (pos[i] >= 0)
1028			enable_per_layer_report = true;
1029	}
1030
1031	/*
1032	 * Get the dimm label/grain that applies to the match criteria.
1033	 * As the error algorithm may not be able to point to just one memory
1034	 * stick, the logic here will get all possible labels that could
1035	 * pottentially be affected by the error.
1036	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1037	 * to have only the MC channel and the MC dimm (also called "branch")
1038	 * but the channel is not known, as the memory is arranged in pairs,
1039	 * where each memory belongs to a separate channel within the same
1040	 * branch.
1041	 */
1042	grain = 0;
1043	p = label;
1044	*p = '\0';
1045	for (i = 0; i < mci->tot_dimms; i++) {
1046		struct dimm_info *dimm = &mci->dimms[i];
1047
1048		if (layer0 >= 0 && layer0 != dimm->location[0])
1049			continue;
1050		if (layer1 >= 0 && layer1 != dimm->location[1])
1051			continue;
1052		if (layer2 >= 0 && layer2 != dimm->location[2])
1053			continue;
1054
1055		/* get the max grain, over the error match range */
1056		if (dimm->grain > grain)
1057			grain = dimm->grain;
1058
1059		/*
1060		 * If the error is memory-controller wide, there's no need to
1061		 * seek for the affected DIMMs because the whole
1062		 * channel/memory controller/...  may be affected.
1063		 * Also, don't show errors for empty DIMM slots.
1064		 */
1065		if (enable_per_layer_report && dimm->nr_pages) {
1066			if (p != label) {
1067				strcpy(p, OTHER_LABEL);
1068				p += strlen(OTHER_LABEL);
1069			}
1070			strcpy(p, dimm->label);
1071			p += strlen(p);
1072			*p = '\0';
1073
1074			/*
1075			 * get csrow/channel of the DIMM, in order to allow
1076			 * incrementing the compat API counters
1077			 */
1078			debugf4("%s: %s csrows map: (%d,%d)\n",
1079				__func__,
1080				mci->mem_is_per_rank ? "rank" : "dimm",
1081				dimm->csrow, dimm->cschannel);
1082
1083			if (row == -1)
1084				row = dimm->csrow;
1085			else if (row >= 0 && row != dimm->csrow)
1086				row = -2;
1087
1088			if (chan == -1)
1089				chan = dimm->cschannel;
1090			else if (chan >= 0 && chan != dimm->cschannel)
1091				chan = -2;
1092		}
1093	}
1094
1095	if (!enable_per_layer_report) {
1096		strcpy(label, "any memory");
1097	} else {
1098		debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1099			__func__, row, chan);
1100		if (p == label)
1101			strcpy(label, "unknown memory");
1102		if (type == HW_EVENT_ERR_CORRECTED) {
1103			if (row >= 0) {
1104				mci->csrows[row].ce_count++;
1105				if (chan >= 0)
1106					mci->csrows[row].channels[chan].ce_count++;
1107			}
1108		} else
1109			if (row >= 0)
1110				mci->csrows[row].ue_count++;
1111	}
1112
1113	/* Fill the RAM location data */
1114	p = location;
1115	for (i = 0; i < mci->n_layers; i++) {
1116		if (pos[i] < 0)
1117			continue;
1118
1119		p += sprintf(p, "%s:%d ",
1120			     edac_layer_name[mci->layers[i].type],
1121			     pos[i]);
1122	}
1123
1124	/* Memory type dependent details about the error */
1125	if (type == HW_EVENT_ERR_CORRECTED) {
1126		snprintf(detail, sizeof(detail),
1127			"page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
1128			page_frame_number, offset_in_page,
1129			grain, syndrome);
1130		edac_ce_error(mci, pos, msg, location, label, detail,
1131			      other_detail, enable_per_layer_report,
1132			      page_frame_number, offset_in_page, grain);
1133	} else {
1134		snprintf(detail, sizeof(detail),
1135			"page:0x%lx offset:0x%lx grain:%d",
1136			page_frame_number, offset_in_page, grain);
1137
1138		edac_ue_error(mci, pos, msg, location, label, detail,
1139			      other_detail, enable_per_layer_report);
1140	}
1141}
1142EXPORT_SYMBOL_GPL(edac_mc_handle_error);