Linux Audio

Check our new training course

Loading...
v6.8
  1/*
  2 * Intel e7xxx Memory Controller kernel module
  3 * (C) 2003 Linux Networx (http://lnxi.com)
  4 * This file may be distributed under the terms of the
  5 * GNU General Public License.
  6 *
  7 * See "enum e7xxx_chips" below for supported chipsets
  8 *
  9 * Written by Thayne Harbaugh
 10 * Based on work by Dan Hollis <goemon at anime dot net> and others.
 11 *	http://www.anime.net/~goemon/linux-ecc/
 12 *
 13 * Datasheet:
 14 *	http://www.intel.com/content/www/us/en/chipsets/e7501-chipset-memory-controller-hub-datasheet.html
 15 *
 16 * Contributors:
 17 *	Eric Biederman (Linux Networx)
 18 *	Tom Zimmerman (Linux Networx)
 19 *	Jim Garlick (Lawrence Livermore National Labs)
 20 *	Dave Peterson (Lawrence Livermore National Labs)
 21 *	That One Guy (Some other place)
 22 *	Wang Zhenyu (intel.com)
 23 *
 24 * $Id: edac_e7xxx.c,v 1.5.2.9 2005/10/05 00:43:44 dsp_llnl Exp $
 25 *
 26 */
 27
 28#include <linux/module.h>
 29#include <linux/init.h>
 30#include <linux/pci.h>
 31#include <linux/pci_ids.h>
 32#include <linux/edac.h>
 33#include "edac_module.h"
 34
 
 35#define	EDAC_MOD_STR	"e7xxx_edac"
 36
 37#define e7xxx_printk(level, fmt, arg...) \
 38	edac_printk(level, "e7xxx", fmt, ##arg)
 39
 40#define e7xxx_mc_printk(mci, level, fmt, arg...) \
 41	edac_mc_chipset_printk(mci, level, "e7xxx", fmt, ##arg)
 42
 43#ifndef PCI_DEVICE_ID_INTEL_7205_0
 44#define PCI_DEVICE_ID_INTEL_7205_0	0x255d
 45#endif				/* PCI_DEVICE_ID_INTEL_7205_0 */
 46
 47#ifndef PCI_DEVICE_ID_INTEL_7205_1_ERR
 48#define PCI_DEVICE_ID_INTEL_7205_1_ERR	0x2551
 49#endif				/* PCI_DEVICE_ID_INTEL_7205_1_ERR */
 50
 51#ifndef PCI_DEVICE_ID_INTEL_7500_0
 52#define PCI_DEVICE_ID_INTEL_7500_0	0x2540
 53#endif				/* PCI_DEVICE_ID_INTEL_7500_0 */
 54
 55#ifndef PCI_DEVICE_ID_INTEL_7500_1_ERR
 56#define PCI_DEVICE_ID_INTEL_7500_1_ERR	0x2541
 57#endif				/* PCI_DEVICE_ID_INTEL_7500_1_ERR */
 58
 59#ifndef PCI_DEVICE_ID_INTEL_7501_0
 60#define PCI_DEVICE_ID_INTEL_7501_0	0x254c
 61#endif				/* PCI_DEVICE_ID_INTEL_7501_0 */
 62
 63#ifndef PCI_DEVICE_ID_INTEL_7501_1_ERR
 64#define PCI_DEVICE_ID_INTEL_7501_1_ERR	0x2541
 65#endif				/* PCI_DEVICE_ID_INTEL_7501_1_ERR */
 66
 67#ifndef PCI_DEVICE_ID_INTEL_7505_0
 68#define PCI_DEVICE_ID_INTEL_7505_0	0x2550
 69#endif				/* PCI_DEVICE_ID_INTEL_7505_0 */
 70
 71#ifndef PCI_DEVICE_ID_INTEL_7505_1_ERR
 72#define PCI_DEVICE_ID_INTEL_7505_1_ERR	0x2551
 73#endif				/* PCI_DEVICE_ID_INTEL_7505_1_ERR */
 74
 75#define E7XXX_NR_CSROWS		8	/* number of csrows */
 76#define E7XXX_NR_DIMMS		8	/* 2 channels, 4 dimms/channel */
 77
 78/* E7XXX register addresses - device 0 function 0 */
 79#define E7XXX_DRB		0x60	/* DRAM row boundary register (8b) */
 80#define E7XXX_DRA		0x70	/* DRAM row attribute register (8b) */
 81					/*
 82					 * 31   Device width row 7 0=x8 1=x4
 83					 * 27   Device width row 6
 84					 * 23   Device width row 5
 85					 * 19   Device width row 4
 86					 * 15   Device width row 3
 87					 * 11   Device width row 2
 88					 *  7   Device width row 1
 89					 *  3   Device width row 0
 90					 */
 91#define E7XXX_DRC		0x7C	/* DRAM controller mode reg (32b) */
 92					/*
 93					 * 22    Number channels 0=1,1=2
 94					 * 19:18 DRB Granularity 32/64MB
 95					 */
 96#define E7XXX_TOLM		0xC4	/* DRAM top of low memory reg (16b) */
 97#define E7XXX_REMAPBASE		0xC6	/* DRAM remap base address reg (16b) */
 98#define E7XXX_REMAPLIMIT	0xC8	/* DRAM remap limit address reg (16b) */
 99
100/* E7XXX register addresses - device 0 function 1 */
101#define E7XXX_DRAM_FERR		0x80	/* DRAM first error register (8b) */
102#define E7XXX_DRAM_NERR		0x82	/* DRAM next error register (8b) */
103#define E7XXX_DRAM_CELOG_ADD	0xA0	/* DRAM first correctable memory */
104					/*     error address register (32b) */
105					/*
106					 * 31:28 Reserved
107					 * 27:6  CE address (4k block 33:12)
108					 *  5:0  Reserved
109					 */
110#define E7XXX_DRAM_UELOG_ADD	0xB0	/* DRAM first uncorrectable memory */
111					/*     error address register (32b) */
112					/*
113					 * 31:28 Reserved
114					 * 27:6  CE address (4k block 33:12)
115					 *  5:0  Reserved
116					 */
117#define E7XXX_DRAM_CELOG_SYNDROME 0xD0	/* DRAM first correctable memory */
118					/*     error syndrome register (16b) */
119
120enum e7xxx_chips {
121	E7500 = 0,
122	E7501,
123	E7505,
124	E7205,
125};
126
127struct e7xxx_pvt {
128	struct pci_dev *bridge_ck;
129	u32 tolm;
130	u32 remapbase;
131	u32 remaplimit;
132	const struct e7xxx_dev_info *dev_info;
133};
134
135struct e7xxx_dev_info {
136	u16 err_dev;
137	const char *ctl_name;
138};
139
140struct e7xxx_error_info {
141	u8 dram_ferr;
142	u8 dram_nerr;
143	u32 dram_celog_add;
144	u16 dram_celog_syndrome;
145	u32 dram_uelog_add;
146};
147
148static struct edac_pci_ctl_info *e7xxx_pci;
149
150static const struct e7xxx_dev_info e7xxx_devs[] = {
151	[E7500] = {
152		.err_dev = PCI_DEVICE_ID_INTEL_7500_1_ERR,
153		.ctl_name = "E7500"},
154	[E7501] = {
155		.err_dev = PCI_DEVICE_ID_INTEL_7501_1_ERR,
156		.ctl_name = "E7501"},
157	[E7505] = {
158		.err_dev = PCI_DEVICE_ID_INTEL_7505_1_ERR,
159		.ctl_name = "E7505"},
160	[E7205] = {
161		.err_dev = PCI_DEVICE_ID_INTEL_7205_1_ERR,
162		.ctl_name = "E7205"},
163};
164
165/* FIXME - is this valid for both SECDED and S4ECD4ED? */
166static inline int e7xxx_find_channel(u16 syndrome)
167{
168	edac_dbg(3, "\n");
169
170	if ((syndrome & 0xff00) == 0)
171		return 0;
172
173	if ((syndrome & 0x00ff) == 0)
174		return 1;
175
176	if ((syndrome & 0xf000) == 0 || (syndrome & 0x0f00) == 0)
177		return 0;
178
179	return 1;
180}
181
182static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci,
183				unsigned long page)
184{
185	u32 remap;
186	struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info;
187
188	edac_dbg(3, "\n");
189
190	if ((page < pvt->tolm) ||
191		((page >= 0x100000) && (page < pvt->remapbase)))
192		return page;
193
194	remap = (page - pvt->tolm) + pvt->remapbase;
195
196	if (remap < pvt->remaplimit)
197		return remap;
198
199	e7xxx_printk(KERN_ERR, "Invalid page %lx - out of range\n", page);
200	return pvt->tolm - 1;
201}
202
203static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
204{
205	u32 error_1b, page;
206	u16 syndrome;
207	int row;
208	int channel;
209
210	edac_dbg(3, "\n");
211	/* read the error address */
212	error_1b = info->dram_celog_add;
213	/* FIXME - should use PAGE_SHIFT */
214	page = error_1b >> 6;	/* convert the address to 4k page */
215	/* read the syndrome */
216	syndrome = info->dram_celog_syndrome;
217	/* FIXME - check for -1 */
218	row = edac_mc_find_csrow_by_page(mci, page);
219	/* convert syndrome to channel */
220	channel = e7xxx_find_channel(syndrome);
221	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, page, 0, syndrome,
222			     row, channel, -1, "e7xxx CE", "");
223}
224
225static void process_ce_no_info(struct mem_ctl_info *mci)
226{
227	edac_dbg(3, "\n");
228	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
229			     "e7xxx CE log register overflow", "");
230}
231
232static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
233{
234	u32 error_2b, block_page;
235	int row;
236
237	edac_dbg(3, "\n");
238	/* read the error address */
239	error_2b = info->dram_uelog_add;
240	/* FIXME - should use PAGE_SHIFT */
241	block_page = error_2b >> 6;	/* convert to 4k address */
242	row = edac_mc_find_csrow_by_page(mci, block_page);
243
244	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, block_page, 0, 0,
245			     row, -1, -1, "e7xxx UE", "");
246}
247
248static void process_ue_no_info(struct mem_ctl_info *mci)
249{
250	edac_dbg(3, "\n");
251
252	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
253			     "e7xxx UE log register overflow", "");
254}
255
256static void e7xxx_get_error_info(struct mem_ctl_info *mci,
257				 struct e7xxx_error_info *info)
258{
259	struct e7xxx_pvt *pvt;
260
261	pvt = (struct e7xxx_pvt *)mci->pvt_info;
262	pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_FERR, &info->dram_ferr);
263	pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_NERR, &info->dram_nerr);
264
265	if ((info->dram_ferr & 1) || (info->dram_nerr & 1)) {
266		pci_read_config_dword(pvt->bridge_ck, E7XXX_DRAM_CELOG_ADD,
267				&info->dram_celog_add);
268		pci_read_config_word(pvt->bridge_ck,
269				E7XXX_DRAM_CELOG_SYNDROME,
270				&info->dram_celog_syndrome);
271	}
272
273	if ((info->dram_ferr & 2) || (info->dram_nerr & 2))
274		pci_read_config_dword(pvt->bridge_ck, E7XXX_DRAM_UELOG_ADD,
275				&info->dram_uelog_add);
276
277	if (info->dram_ferr & 3)
278		pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_FERR, 0x03, 0x03);
279
280	if (info->dram_nerr & 3)
281		pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_NERR, 0x03, 0x03);
282}
283
284static int e7xxx_process_error_info(struct mem_ctl_info *mci,
285				struct e7xxx_error_info *info,
286				int handle_errors)
287{
288	int error_found;
289
290	error_found = 0;
291
292	/* decode and report errors */
293	if (info->dram_ferr & 1) {	/* check first error correctable */
294		error_found = 1;
295
296		if (handle_errors)
297			process_ce(mci, info);
298	}
299
300	if (info->dram_ferr & 2) {	/* check first error uncorrectable */
301		error_found = 1;
302
303		if (handle_errors)
304			process_ue(mci, info);
305	}
306
307	if (info->dram_nerr & 1) {	/* check next error correctable */
308		error_found = 1;
309
310		if (handle_errors) {
311			if (info->dram_ferr & 1)
312				process_ce_no_info(mci);
313			else
314				process_ce(mci, info);
315		}
316	}
317
318	if (info->dram_nerr & 2) {	/* check next error uncorrectable */
319		error_found = 1;
320
321		if (handle_errors) {
322			if (info->dram_ferr & 2)
323				process_ue_no_info(mci);
324			else
325				process_ue(mci, info);
326		}
327	}
328
329	return error_found;
330}
331
332static void e7xxx_check(struct mem_ctl_info *mci)
333{
334	struct e7xxx_error_info info;
335
 
336	e7xxx_get_error_info(mci, &info);
337	e7xxx_process_error_info(mci, &info, 1);
338}
339
340/* Return 1 if dual channel mode is active.  Else return 0. */
341static inline int dual_channel_active(u32 drc, int dev_idx)
342{
343	return (dev_idx == E7501) ? ((drc >> 22) & 0x1) : 1;
344}
345
346/* Return DRB granularity (0=32mb, 1=64mb). */
347static inline int drb_granularity(u32 drc, int dev_idx)
348{
349	/* only e7501 can be single channel */
350	return (dev_idx == E7501) ? ((drc >> 18) & 0x3) : 1;
351}
352
353static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
354			int dev_idx, u32 drc)
355{
356	unsigned long last_cumul_size;
357	int index, j;
358	u8 value;
359	u32 dra, cumul_size, nr_pages;
360	int drc_chan, drc_drbg, drc_ddim, mem_dev;
361	struct csrow_info *csrow;
362	struct dimm_info *dimm;
363	enum edac_type edac_mode;
364
365	pci_read_config_dword(pdev, E7XXX_DRA, &dra);
366	drc_chan = dual_channel_active(drc, dev_idx);
367	drc_drbg = drb_granularity(drc, dev_idx);
368	drc_ddim = (drc >> 20) & 0x3;
369	last_cumul_size = 0;
370
371	/* The dram row boundary (DRB) reg values are boundary address
372	 * for each DRAM row with a granularity of 32 or 64MB (single/dual
373	 * channel operation).  DRB regs are cumulative; therefore DRB7 will
374	 * contain the total memory contained in all eight rows.
375	 */
376	for (index = 0; index < mci->nr_csrows; index++) {
377		/* mem_dev 0=x8, 1=x4 */
378		mem_dev = (dra >> (index * 4 + 3)) & 0x1;
379		csrow = mci->csrows[index];
380
381		pci_read_config_byte(pdev, E7XXX_DRB + index, &value);
382		/* convert a 64 or 32 MiB DRB to a page size. */
383		cumul_size = value << (25 + drc_drbg - PAGE_SHIFT);
384		edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
 
385		if (cumul_size == last_cumul_size)
386			continue;	/* not populated */
387
388		csrow->first_page = last_cumul_size;
389		csrow->last_page = cumul_size - 1;
390		nr_pages = cumul_size - last_cumul_size;
391		last_cumul_size = cumul_size;
 
 
 
392
393		/*
394		* if single channel or x8 devices then SECDED
395		* if dual channel and x4 then S4ECD4ED
396		*/
397		if (drc_ddim) {
398			if (drc_chan && mem_dev) {
399				edac_mode = EDAC_S4ECD4ED;
400				mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
401			} else {
402				edac_mode = EDAC_SECDED;
403				mci->edac_cap |= EDAC_FLAG_SECDED;
404			}
405		} else
406			edac_mode = EDAC_NONE;
407
408		for (j = 0; j < drc_chan + 1; j++) {
409			dimm = csrow->channels[j]->dimm;
410
411			dimm->nr_pages = nr_pages / (drc_chan + 1);
412			dimm->grain = 1 << 12;	/* 4KiB - resolution of CELOG */
413			dimm->mtype = MEM_RDDR;	/* only one type supported */
414			dimm->dtype = mem_dev ? DEV_X4 : DEV_X8;
415			dimm->edac_mode = edac_mode;
416		}
417	}
418}
419
420static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx)
421{
422	u16 pci_data;
423	struct mem_ctl_info *mci = NULL;
424	struct edac_mc_layer layers[2];
425	struct e7xxx_pvt *pvt = NULL;
426	u32 drc;
427	int drc_chan;
428	struct e7xxx_error_info discard;
429
430	edac_dbg(0, "mci\n");
431
432	pci_read_config_dword(pdev, E7XXX_DRC, &drc);
433
434	drc_chan = dual_channel_active(drc, dev_idx);
435	/*
436	 * According with the datasheet, this device has a maximum of
437	 * 4 DIMMS per channel, either single-rank or dual-rank. So, the
438	 * total amount of dimms is 8 (E7XXX_NR_DIMMS).
439	 * That means that the DIMM is mapped as CSROWs, and the channel
440	 * will map the rank. So, an error to either channel should be
441	 * attributed to the same dimm.
442	 */
443	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
444	layers[0].size = E7XXX_NR_CSROWS;
445	layers[0].is_virt_csrow = true;
446	layers[1].type = EDAC_MC_LAYER_CHANNEL;
447	layers[1].size = drc_chan + 1;
448	layers[1].is_virt_csrow = false;
449	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
450	if (mci == NULL)
451		return -ENOMEM;
452
453	edac_dbg(3, "init mci\n");
454	mci->mtype_cap = MEM_FLAG_RDDR;
455	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED |
456		EDAC_FLAG_S4ECD4ED;
457	/* FIXME - what if different memory types are in different csrows? */
458	mci->mod_name = EDAC_MOD_STR;
459	mci->pdev = &pdev->dev;
460	edac_dbg(3, "init pvt\n");
 
461	pvt = (struct e7xxx_pvt *)mci->pvt_info;
462	pvt->dev_info = &e7xxx_devs[dev_idx];
463	pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL,
464					pvt->dev_info->err_dev, pvt->bridge_ck);
465
466	if (!pvt->bridge_ck) {
467		e7xxx_printk(KERN_ERR, "error reporting device not found:"
468			"vendor %x device 0x%x (broken BIOS?)\n",
469			PCI_VENDOR_ID_INTEL, e7xxx_devs[dev_idx].err_dev);
470		goto fail0;
471	}
472
473	edac_dbg(3, "more mci init\n");
474	mci->ctl_name = pvt->dev_info->ctl_name;
475	mci->dev_name = pci_name(pdev);
476	mci->edac_check = e7xxx_check;
477	mci->ctl_page_to_phys = ctl_page_to_phys;
478	e7xxx_init_csrows(mci, pdev, dev_idx, drc);
479	mci->edac_cap |= EDAC_FLAG_NONE;
480	edac_dbg(3, "tolm, remapbase, remaplimit\n");
481	/* load the top of low memory, remap base, and remap limit vars */
482	pci_read_config_word(pdev, E7XXX_TOLM, &pci_data);
483	pvt->tolm = ((u32) pci_data) << 4;
484	pci_read_config_word(pdev, E7XXX_REMAPBASE, &pci_data);
485	pvt->remapbase = ((u32) pci_data) << 14;
486	pci_read_config_word(pdev, E7XXX_REMAPLIMIT, &pci_data);
487	pvt->remaplimit = ((u32) pci_data) << 14;
488	e7xxx_printk(KERN_INFO,
489		"tolm = %x, remapbase = %x, remaplimit = %x\n", pvt->tolm,
490		pvt->remapbase, pvt->remaplimit);
491
492	/* clear any pending errors, or initial state bits */
493	e7xxx_get_error_info(mci, &discard);
494
495	/* Here we assume that we will never see multiple instances of this
496	 * type of memory controller.  The ID is therefore hardcoded to 0.
497	 */
498	if (edac_mc_add_mc(mci)) {
499		edac_dbg(3, "failed edac_mc_add_mc()\n");
500		goto fail1;
501	}
502
503	/* allocating generic PCI control info */
504	e7xxx_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
505	if (!e7xxx_pci) {
506		printk(KERN_WARNING
507			"%s(): Unable to create PCI control\n",
508			__func__);
509		printk(KERN_WARNING
510			"%s(): PCI error report via EDAC not setup\n",
511			__func__);
512	}
513
514	/* get this far and it's successful */
515	edac_dbg(3, "success\n");
516	return 0;
517
518fail1:
519	pci_dev_put(pvt->bridge_ck);
520
521fail0:
522	edac_mc_free(mci);
523
524	return -ENODEV;
525}
526
527/* returns count (>= 0), or negative on error */
528static int e7xxx_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
529{
530	edac_dbg(0, "\n");
531
532	/* wake up and enable device */
533	return pci_enable_device(pdev) ?
534		-EIO : e7xxx_probe1(pdev, ent->driver_data);
535}
536
537static void e7xxx_remove_one(struct pci_dev *pdev)
538{
539	struct mem_ctl_info *mci;
540	struct e7xxx_pvt *pvt;
541
542	edac_dbg(0, "\n");
543
544	if (e7xxx_pci)
545		edac_pci_release_generic_ctl(e7xxx_pci);
546
547	if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
548		return;
549
550	pvt = (struct e7xxx_pvt *)mci->pvt_info;
551	pci_dev_put(pvt->bridge_ck);
552	edac_mc_free(mci);
553}
554
555static const struct pci_device_id e7xxx_pci_tbl[] = {
556	{
557	 PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
558	 E7205},
559	{
560	 PCI_VEND_DEV(INTEL, 7500_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
561	 E7500},
562	{
563	 PCI_VEND_DEV(INTEL, 7501_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
564	 E7501},
565	{
566	 PCI_VEND_DEV(INTEL, 7505_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
567	 E7505},
568	{
569	 0,
570	 }			/* 0 terminated list. */
571};
572
573MODULE_DEVICE_TABLE(pci, e7xxx_pci_tbl);
574
575static struct pci_driver e7xxx_driver = {
576	.name = EDAC_MOD_STR,
577	.probe = e7xxx_init_one,
578	.remove = e7xxx_remove_one,
579	.id_table = e7xxx_pci_tbl,
580};
581
582static int __init e7xxx_init(void)
583{
584       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
585       opstate_init();
586
587	return pci_register_driver(&e7xxx_driver);
588}
589
590static void __exit e7xxx_exit(void)
591{
592	pci_unregister_driver(&e7xxx_driver);
593}
594
595module_init(e7xxx_init);
596module_exit(e7xxx_exit);
597
598MODULE_LICENSE("GPL");
599MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al");
 
600MODULE_DESCRIPTION("MC support for Intel e7xxx memory controllers");
601module_param(edac_op_state, int, 0444);
602MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
v3.1
  1/*
  2 * Intel e7xxx Memory Controller kernel module
  3 * (C) 2003 Linux Networx (http://lnxi.com)
  4 * This file may be distributed under the terms of the
  5 * GNU General Public License.
  6 *
  7 * See "enum e7xxx_chips" below for supported chipsets
  8 *
  9 * Written by Thayne Harbaugh
 10 * Based on work by Dan Hollis <goemon at anime dot net> and others.
 11 *	http://www.anime.net/~goemon/linux-ecc/
 12 *
 
 
 
 13 * Contributors:
 14 *	Eric Biederman (Linux Networx)
 15 *	Tom Zimmerman (Linux Networx)
 16 *	Jim Garlick (Lawrence Livermore National Labs)
 17 *	Dave Peterson (Lawrence Livermore National Labs)
 18 *	That One Guy (Some other place)
 19 *	Wang Zhenyu (intel.com)
 20 *
 21 * $Id: edac_e7xxx.c,v 1.5.2.9 2005/10/05 00:43:44 dsp_llnl Exp $
 22 *
 23 */
 24
 25#include <linux/module.h>
 26#include <linux/init.h>
 27#include <linux/pci.h>
 28#include <linux/pci_ids.h>
 29#include <linux/edac.h>
 30#include "edac_core.h"
 31
 32#define	E7XXX_REVISION " Ver: 2.0.2"
 33#define	EDAC_MOD_STR	"e7xxx_edac"
 34
 35#define e7xxx_printk(level, fmt, arg...) \
 36	edac_printk(level, "e7xxx", fmt, ##arg)
 37
 38#define e7xxx_mc_printk(mci, level, fmt, arg...) \
 39	edac_mc_chipset_printk(mci, level, "e7xxx", fmt, ##arg)
 40
 41#ifndef PCI_DEVICE_ID_INTEL_7205_0
 42#define PCI_DEVICE_ID_INTEL_7205_0	0x255d
 43#endif				/* PCI_DEVICE_ID_INTEL_7205_0 */
 44
 45#ifndef PCI_DEVICE_ID_INTEL_7205_1_ERR
 46#define PCI_DEVICE_ID_INTEL_7205_1_ERR	0x2551
 47#endif				/* PCI_DEVICE_ID_INTEL_7205_1_ERR */
 48
 49#ifndef PCI_DEVICE_ID_INTEL_7500_0
 50#define PCI_DEVICE_ID_INTEL_7500_0	0x2540
 51#endif				/* PCI_DEVICE_ID_INTEL_7500_0 */
 52
 53#ifndef PCI_DEVICE_ID_INTEL_7500_1_ERR
 54#define PCI_DEVICE_ID_INTEL_7500_1_ERR	0x2541
 55#endif				/* PCI_DEVICE_ID_INTEL_7500_1_ERR */
 56
 57#ifndef PCI_DEVICE_ID_INTEL_7501_0
 58#define PCI_DEVICE_ID_INTEL_7501_0	0x254c
 59#endif				/* PCI_DEVICE_ID_INTEL_7501_0 */
 60
 61#ifndef PCI_DEVICE_ID_INTEL_7501_1_ERR
 62#define PCI_DEVICE_ID_INTEL_7501_1_ERR	0x2541
 63#endif				/* PCI_DEVICE_ID_INTEL_7501_1_ERR */
 64
 65#ifndef PCI_DEVICE_ID_INTEL_7505_0
 66#define PCI_DEVICE_ID_INTEL_7505_0	0x2550
 67#endif				/* PCI_DEVICE_ID_INTEL_7505_0 */
 68
 69#ifndef PCI_DEVICE_ID_INTEL_7505_1_ERR
 70#define PCI_DEVICE_ID_INTEL_7505_1_ERR	0x2551
 71#endif				/* PCI_DEVICE_ID_INTEL_7505_1_ERR */
 72
 73#define E7XXX_NR_CSROWS		8	/* number of csrows */
 74#define E7XXX_NR_DIMMS		8	/* FIXME - is this correct? */
 75
 76/* E7XXX register addresses - device 0 function 0 */
 77#define E7XXX_DRB		0x60	/* DRAM row boundary register (8b) */
 78#define E7XXX_DRA		0x70	/* DRAM row attribute register (8b) */
 79					/*
 80					 * 31   Device width row 7 0=x8 1=x4
 81					 * 27   Device width row 6
 82					 * 23   Device width row 5
 83					 * 19   Device width row 4
 84					 * 15   Device width row 3
 85					 * 11   Device width row 2
 86					 *  7   Device width row 1
 87					 *  3   Device width row 0
 88					 */
 89#define E7XXX_DRC		0x7C	/* DRAM controller mode reg (32b) */
 90					/*
 91					 * 22    Number channels 0=1,1=2
 92					 * 19:18 DRB Granularity 32/64MB
 93					 */
 94#define E7XXX_TOLM		0xC4	/* DRAM top of low memory reg (16b) */
 95#define E7XXX_REMAPBASE		0xC6	/* DRAM remap base address reg (16b) */
 96#define E7XXX_REMAPLIMIT	0xC8	/* DRAM remap limit address reg (16b) */
 97
 98/* E7XXX register addresses - device 0 function 1 */
 99#define E7XXX_DRAM_FERR		0x80	/* DRAM first error register (8b) */
100#define E7XXX_DRAM_NERR		0x82	/* DRAM next error register (8b) */
101#define E7XXX_DRAM_CELOG_ADD	0xA0	/* DRAM first correctable memory */
102					/*     error address register (32b) */
103					/*
104					 * 31:28 Reserved
105					 * 27:6  CE address (4k block 33:12)
106					 *  5:0  Reserved
107					 */
108#define E7XXX_DRAM_UELOG_ADD	0xB0	/* DRAM first uncorrectable memory */
109					/*     error address register (32b) */
110					/*
111					 * 31:28 Reserved
112					 * 27:6  CE address (4k block 33:12)
113					 *  5:0  Reserved
114					 */
115#define E7XXX_DRAM_CELOG_SYNDROME 0xD0	/* DRAM first correctable memory */
116					/*     error syndrome register (16b) */
117
118enum e7xxx_chips {
119	E7500 = 0,
120	E7501,
121	E7505,
122	E7205,
123};
124
125struct e7xxx_pvt {
126	struct pci_dev *bridge_ck;
127	u32 tolm;
128	u32 remapbase;
129	u32 remaplimit;
130	const struct e7xxx_dev_info *dev_info;
131};
132
133struct e7xxx_dev_info {
134	u16 err_dev;
135	const char *ctl_name;
136};
137
138struct e7xxx_error_info {
139	u8 dram_ferr;
140	u8 dram_nerr;
141	u32 dram_celog_add;
142	u16 dram_celog_syndrome;
143	u32 dram_uelog_add;
144};
145
146static struct edac_pci_ctl_info *e7xxx_pci;
147
148static const struct e7xxx_dev_info e7xxx_devs[] = {
149	[E7500] = {
150		.err_dev = PCI_DEVICE_ID_INTEL_7500_1_ERR,
151		.ctl_name = "E7500"},
152	[E7501] = {
153		.err_dev = PCI_DEVICE_ID_INTEL_7501_1_ERR,
154		.ctl_name = "E7501"},
155	[E7505] = {
156		.err_dev = PCI_DEVICE_ID_INTEL_7505_1_ERR,
157		.ctl_name = "E7505"},
158	[E7205] = {
159		.err_dev = PCI_DEVICE_ID_INTEL_7205_1_ERR,
160		.ctl_name = "E7205"},
161};
162
163/* FIXME - is this valid for both SECDED and S4ECD4ED? */
164static inline int e7xxx_find_channel(u16 syndrome)
165{
166	debugf3("%s()\n", __func__);
167
168	if ((syndrome & 0xff00) == 0)
169		return 0;
170
171	if ((syndrome & 0x00ff) == 0)
172		return 1;
173
174	if ((syndrome & 0xf000) == 0 || (syndrome & 0x0f00) == 0)
175		return 0;
176
177	return 1;
178}
179
180static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci,
181				unsigned long page)
182{
183	u32 remap;
184	struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info;
185
186	debugf3("%s()\n", __func__);
187
188	if ((page < pvt->tolm) ||
189		((page >= 0x100000) && (page < pvt->remapbase)))
190		return page;
191
192	remap = (page - pvt->tolm) + pvt->remapbase;
193
194	if (remap < pvt->remaplimit)
195		return remap;
196
197	e7xxx_printk(KERN_ERR, "Invalid page %lx - out of range\n", page);
198	return pvt->tolm - 1;
199}
200
201static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
202{
203	u32 error_1b, page;
204	u16 syndrome;
205	int row;
206	int channel;
207
208	debugf3("%s()\n", __func__);
209	/* read the error address */
210	error_1b = info->dram_celog_add;
211	/* FIXME - should use PAGE_SHIFT */
212	page = error_1b >> 6;	/* convert the address to 4k page */
213	/* read the syndrome */
214	syndrome = info->dram_celog_syndrome;
215	/* FIXME - check for -1 */
216	row = edac_mc_find_csrow_by_page(mci, page);
217	/* convert syndrome to channel */
218	channel = e7xxx_find_channel(syndrome);
219	edac_mc_handle_ce(mci, page, 0, syndrome, row, channel, "e7xxx CE");
 
220}
221
222static void process_ce_no_info(struct mem_ctl_info *mci)
223{
224	debugf3("%s()\n", __func__);
225	edac_mc_handle_ce_no_info(mci, "e7xxx CE log register overflow");
 
226}
227
228static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
229{
230	u32 error_2b, block_page;
231	int row;
232
233	debugf3("%s()\n", __func__);
234	/* read the error address */
235	error_2b = info->dram_uelog_add;
236	/* FIXME - should use PAGE_SHIFT */
237	block_page = error_2b >> 6;	/* convert to 4k address */
238	row = edac_mc_find_csrow_by_page(mci, block_page);
239	edac_mc_handle_ue(mci, block_page, 0, row, "e7xxx UE");
 
 
240}
241
242static void process_ue_no_info(struct mem_ctl_info *mci)
243{
244	debugf3("%s()\n", __func__);
245	edac_mc_handle_ue_no_info(mci, "e7xxx UE log register overflow");
 
 
246}
247
248static void e7xxx_get_error_info(struct mem_ctl_info *mci,
249				 struct e7xxx_error_info *info)
250{
251	struct e7xxx_pvt *pvt;
252
253	pvt = (struct e7xxx_pvt *)mci->pvt_info;
254	pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_FERR, &info->dram_ferr);
255	pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_NERR, &info->dram_nerr);
256
257	if ((info->dram_ferr & 1) || (info->dram_nerr & 1)) {
258		pci_read_config_dword(pvt->bridge_ck, E7XXX_DRAM_CELOG_ADD,
259				&info->dram_celog_add);
260		pci_read_config_word(pvt->bridge_ck,
261				E7XXX_DRAM_CELOG_SYNDROME,
262				&info->dram_celog_syndrome);
263	}
264
265	if ((info->dram_ferr & 2) || (info->dram_nerr & 2))
266		pci_read_config_dword(pvt->bridge_ck, E7XXX_DRAM_UELOG_ADD,
267				&info->dram_uelog_add);
268
269	if (info->dram_ferr & 3)
270		pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_FERR, 0x03, 0x03);
271
272	if (info->dram_nerr & 3)
273		pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_NERR, 0x03, 0x03);
274}
275
276static int e7xxx_process_error_info(struct mem_ctl_info *mci,
277				struct e7xxx_error_info *info,
278				int handle_errors)
279{
280	int error_found;
281
282	error_found = 0;
283
284	/* decode and report errors */
285	if (info->dram_ferr & 1) {	/* check first error correctable */
286		error_found = 1;
287
288		if (handle_errors)
289			process_ce(mci, info);
290	}
291
292	if (info->dram_ferr & 2) {	/* check first error uncorrectable */
293		error_found = 1;
294
295		if (handle_errors)
296			process_ue(mci, info);
297	}
298
299	if (info->dram_nerr & 1) {	/* check next error correctable */
300		error_found = 1;
301
302		if (handle_errors) {
303			if (info->dram_ferr & 1)
304				process_ce_no_info(mci);
305			else
306				process_ce(mci, info);
307		}
308	}
309
310	if (info->dram_nerr & 2) {	/* check next error uncorrectable */
311		error_found = 1;
312
313		if (handle_errors) {
314			if (info->dram_ferr & 2)
315				process_ue_no_info(mci);
316			else
317				process_ue(mci, info);
318		}
319	}
320
321	return error_found;
322}
323
324static void e7xxx_check(struct mem_ctl_info *mci)
325{
326	struct e7xxx_error_info info;
327
328	debugf3("%s()\n", __func__);
329	e7xxx_get_error_info(mci, &info);
330	e7xxx_process_error_info(mci, &info, 1);
331}
332
333/* Return 1 if dual channel mode is active.  Else return 0. */
334static inline int dual_channel_active(u32 drc, int dev_idx)
335{
336	return (dev_idx == E7501) ? ((drc >> 22) & 0x1) : 1;
337}
338
339/* Return DRB granularity (0=32mb, 1=64mb). */
340static inline int drb_granularity(u32 drc, int dev_idx)
341{
342	/* only e7501 can be single channel */
343	return (dev_idx == E7501) ? ((drc >> 18) & 0x3) : 1;
344}
345
346static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
347			int dev_idx, u32 drc)
348{
349	unsigned long last_cumul_size;
350	int index;
351	u8 value;
352	u32 dra, cumul_size;
353	int drc_chan, drc_drbg, drc_ddim, mem_dev;
354	struct csrow_info *csrow;
 
 
355
356	pci_read_config_dword(pdev, E7XXX_DRA, &dra);
357	drc_chan = dual_channel_active(drc, dev_idx);
358	drc_drbg = drb_granularity(drc, dev_idx);
359	drc_ddim = (drc >> 20) & 0x3;
360	last_cumul_size = 0;
361
362	/* The dram row boundary (DRB) reg values are boundary address
363	 * for each DRAM row with a granularity of 32 or 64MB (single/dual
364	 * channel operation).  DRB regs are cumulative; therefore DRB7 will
365	 * contain the total memory contained in all eight rows.
366	 */
367	for (index = 0; index < mci->nr_csrows; index++) {
368		/* mem_dev 0=x8, 1=x4 */
369		mem_dev = (dra >> (index * 4 + 3)) & 0x1;
370		csrow = &mci->csrows[index];
371
372		pci_read_config_byte(pdev, E7XXX_DRB + index, &value);
373		/* convert a 64 or 32 MiB DRB to a page size. */
374		cumul_size = value << (25 + drc_drbg - PAGE_SHIFT);
375		debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index,
376			cumul_size);
377		if (cumul_size == last_cumul_size)
378			continue;	/* not populated */
379
380		csrow->first_page = last_cumul_size;
381		csrow->last_page = cumul_size - 1;
382		csrow->nr_pages = cumul_size - last_cumul_size;
383		last_cumul_size = cumul_size;
384		csrow->grain = 1 << 12;	/* 4KiB - resolution of CELOG */
385		csrow->mtype = MEM_RDDR;	/* only one type supported */
386		csrow->dtype = mem_dev ? DEV_X4 : DEV_X8;
387
388		/*
389		 * if single channel or x8 devices then SECDED
390		 * if dual channel and x4 then S4ECD4ED
391		 */
392		if (drc_ddim) {
393			if (drc_chan && mem_dev) {
394				csrow->edac_mode = EDAC_S4ECD4ED;
395				mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
396			} else {
397				csrow->edac_mode = EDAC_SECDED;
398				mci->edac_cap |= EDAC_FLAG_SECDED;
399			}
400		} else
401			csrow->edac_mode = EDAC_NONE;
 
 
 
 
 
 
 
 
 
 
402	}
403}
404
405static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx)
406{
407	u16 pci_data;
408	struct mem_ctl_info *mci = NULL;
 
409	struct e7xxx_pvt *pvt = NULL;
410	u32 drc;
411	int drc_chan;
412	struct e7xxx_error_info discard;
413
414	debugf0("%s(): mci\n", __func__);
415
416	pci_read_config_dword(pdev, E7XXX_DRC, &drc);
417
418	drc_chan = dual_channel_active(drc, dev_idx);
419	mci = edac_mc_alloc(sizeof(*pvt), E7XXX_NR_CSROWS, drc_chan + 1, 0);
420
 
 
 
 
 
 
 
 
 
 
 
 
 
421	if (mci == NULL)
422		return -ENOMEM;
423
424	debugf3("%s(): init mci\n", __func__);
425	mci->mtype_cap = MEM_FLAG_RDDR;
426	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED |
427		EDAC_FLAG_S4ECD4ED;
428	/* FIXME - what if different memory types are in different csrows? */
429	mci->mod_name = EDAC_MOD_STR;
430	mci->mod_ver = E7XXX_REVISION;
431	mci->dev = &pdev->dev;
432	debugf3("%s(): init pvt\n", __func__);
433	pvt = (struct e7xxx_pvt *)mci->pvt_info;
434	pvt->dev_info = &e7xxx_devs[dev_idx];
435	pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL,
436					pvt->dev_info->err_dev, pvt->bridge_ck);
437
438	if (!pvt->bridge_ck) {
439		e7xxx_printk(KERN_ERR, "error reporting device not found:"
440			"vendor %x device 0x%x (broken BIOS?)\n",
441			PCI_VENDOR_ID_INTEL, e7xxx_devs[dev_idx].err_dev);
442		goto fail0;
443	}
444
445	debugf3("%s(): more mci init\n", __func__);
446	mci->ctl_name = pvt->dev_info->ctl_name;
447	mci->dev_name = pci_name(pdev);
448	mci->edac_check = e7xxx_check;
449	mci->ctl_page_to_phys = ctl_page_to_phys;
450	e7xxx_init_csrows(mci, pdev, dev_idx, drc);
451	mci->edac_cap |= EDAC_FLAG_NONE;
452	debugf3("%s(): tolm, remapbase, remaplimit\n", __func__);
453	/* load the top of low memory, remap base, and remap limit vars */
454	pci_read_config_word(pdev, E7XXX_TOLM, &pci_data);
455	pvt->tolm = ((u32) pci_data) << 4;
456	pci_read_config_word(pdev, E7XXX_REMAPBASE, &pci_data);
457	pvt->remapbase = ((u32) pci_data) << 14;
458	pci_read_config_word(pdev, E7XXX_REMAPLIMIT, &pci_data);
459	pvt->remaplimit = ((u32) pci_data) << 14;
460	e7xxx_printk(KERN_INFO,
461		"tolm = %x, remapbase = %x, remaplimit = %x\n", pvt->tolm,
462		pvt->remapbase, pvt->remaplimit);
463
464	/* clear any pending errors, or initial state bits */
465	e7xxx_get_error_info(mci, &discard);
466
467	/* Here we assume that we will never see multiple instances of this
468	 * type of memory controller.  The ID is therefore hardcoded to 0.
469	 */
470	if (edac_mc_add_mc(mci)) {
471		debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
472		goto fail1;
473	}
474
475	/* allocating generic PCI control info */
476	e7xxx_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
477	if (!e7xxx_pci) {
478		printk(KERN_WARNING
479			"%s(): Unable to create PCI control\n",
480			__func__);
481		printk(KERN_WARNING
482			"%s(): PCI error report via EDAC not setup\n",
483			__func__);
484	}
485
486	/* get this far and it's successful */
487	debugf3("%s(): success\n", __func__);
488	return 0;
489
490fail1:
491	pci_dev_put(pvt->bridge_ck);
492
493fail0:
494	edac_mc_free(mci);
495
496	return -ENODEV;
497}
498
499/* returns count (>= 0), or negative on error */
500static int __devinit e7xxx_init_one(struct pci_dev *pdev,
501				const struct pci_device_id *ent)
502{
503	debugf0("%s()\n", __func__);
504
505	/* wake up and enable device */
506	return pci_enable_device(pdev) ?
507		-EIO : e7xxx_probe1(pdev, ent->driver_data);
508}
509
510static void __devexit e7xxx_remove_one(struct pci_dev *pdev)
511{
512	struct mem_ctl_info *mci;
513	struct e7xxx_pvt *pvt;
514
515	debugf0("%s()\n", __func__);
516
517	if (e7xxx_pci)
518		edac_pci_release_generic_ctl(e7xxx_pci);
519
520	if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
521		return;
522
523	pvt = (struct e7xxx_pvt *)mci->pvt_info;
524	pci_dev_put(pvt->bridge_ck);
525	edac_mc_free(mci);
526}
527
528static const struct pci_device_id e7xxx_pci_tbl[] __devinitdata = {
529	{
530	 PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
531	 E7205},
532	{
533	 PCI_VEND_DEV(INTEL, 7500_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
534	 E7500},
535	{
536	 PCI_VEND_DEV(INTEL, 7501_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
537	 E7501},
538	{
539	 PCI_VEND_DEV(INTEL, 7505_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
540	 E7505},
541	{
542	 0,
543	 }			/* 0 terminated list. */
544};
545
546MODULE_DEVICE_TABLE(pci, e7xxx_pci_tbl);
547
548static struct pci_driver e7xxx_driver = {
549	.name = EDAC_MOD_STR,
550	.probe = e7xxx_init_one,
551	.remove = __devexit_p(e7xxx_remove_one),
552	.id_table = e7xxx_pci_tbl,
553};
554
555static int __init e7xxx_init(void)
556{
557       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
558       opstate_init();
559
560	return pci_register_driver(&e7xxx_driver);
561}
562
563static void __exit e7xxx_exit(void)
564{
565	pci_unregister_driver(&e7xxx_driver);
566}
567
568module_init(e7xxx_init);
569module_exit(e7xxx_exit);
570
571MODULE_LICENSE("GPL");
572MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n"
573		"Based on.work by Dan Hollis et al");
574MODULE_DESCRIPTION("MC support for Intel e7xxx memory controllers");
575module_param(edac_op_state, int, 0444);
576MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");