Loading...
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
4 * Originally split out from the skx_edac driver.
5 *
6 * Copyright (c) 2018, Intel Corporation.
7 */
8
9#ifndef _SKX_COMM_EDAC_H
10#define _SKX_COMM_EDAC_H
11
12#include <linux/bits.h>
13#include <asm/mce.h>
14
15#define MSG_SIZE 1024
16
17/*
18 * Debug macros
19 */
20#define skx_printk(level, fmt, arg...) \
21 edac_printk(level, "skx", fmt, ##arg)
22
23#define skx_mc_printk(mci, level, fmt, arg...) \
24 edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
25
26/*
27 * Get a bit field at register value <v>, from bit <lo> to bit <hi>
28 */
29#define GET_BITFIELD(v, lo, hi) \
30 (((v) & GENMASK_ULL((hi), (lo))) >> (lo))
31
32#define SKX_NUM_IMC 2 /* Memory controllers per socket */
33#define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
34#define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
35
36#define I10NM_NUM_DDR_IMC 12
37#define I10NM_NUM_DDR_CHANNELS 2
38#define I10NM_NUM_DDR_DIMMS 2
39
40#define I10NM_NUM_HBM_IMC 16
41#define I10NM_NUM_HBM_CHANNELS 2
42#define I10NM_NUM_HBM_DIMMS 1
43
44#define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
45#define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
46#define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
47
48#define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
49#define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
50#define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
51
52#define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15)
53#define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i)
54
55#define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15)
56#define MCI_MISC_ECC_DDRT 8 /* read from DDRT */
57
58/*
59 * According to Intel Architecture spec vol 3B,
60 * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
61 * memory errors should fit one of these masks:
62 * 000f 0000 1mmm cccc (binary)
63 * 000f 0010 1mmm cccc (binary) [RAM used as cache]
64 * where:
65 * f = Correction Report Filtering Bit. If 1, subsequent errors
66 * won't be shown
67 * mmm = error type
68 * cccc = channel
69 */
70#define MCACOD_MEM_ERR_MASK 0xef80
71/*
72 * Errors from either the memory of the 1-level memory system or the
73 * 2nd level memory (the slow "far" memory) of the 2-level memory system.
74 */
75#define MCACOD_MEM_CTL_ERR 0x80
76/*
77 * Errors from the 1st level memory (the fast "near" memory as cache)
78 * of the 2-level memory system.
79 */
80#define MCACOD_EXT_MEM_ERR 0x280
81
82/*
83 * Each cpu socket contains some pci devices that provide global
84 * information, and also some that are local to each of the two
85 * memory controllers on the die.
86 */
87struct skx_dev {
88 struct list_head list;
89 u8 bus[4];
90 int seg;
91 struct pci_dev *sad_all;
92 struct pci_dev *util_all;
93 struct pci_dev *uracu; /* for i10nm CPU */
94 struct pci_dev *pcu_cr3; /* for HBM memory detection */
95 u32 mcroute;
96 struct skx_imc {
97 struct mem_ctl_info *mci;
98 struct pci_dev *mdev; /* for i10nm CPU */
99 void __iomem *mbase; /* for i10nm CPU */
100 int chan_mmio_sz; /* for i10nm CPU */
101 int num_channels; /* channels per memory controller */
102 int num_dimms; /* dimms per channel */
103 bool hbm_mc;
104 u8 mc; /* system wide mc# */
105 u8 lmc; /* socket relative mc# */
106 u8 src_id, node_id;
107 struct skx_channel {
108 struct pci_dev *cdev;
109 struct pci_dev *edev;
110 u32 retry_rd_err_log_s;
111 u32 retry_rd_err_log_d;
112 u32 retry_rd_err_log_d2;
113 struct skx_dimm {
114 u8 close_pg;
115 u8 bank_xor_enable;
116 u8 fine_grain_bank;
117 u8 rowbits;
118 u8 colbits;
119 } dimms[NUM_DIMMS];
120 } chan[NUM_CHANNELS];
121 } imc[NUM_IMC];
122};
123
124struct skx_pvt {
125 struct skx_imc *imc;
126};
127
128enum type {
129 SKX,
130 I10NM,
131 SPR,
132 GNR
133};
134
135enum {
136 INDEX_SOCKET,
137 INDEX_MEMCTRL,
138 INDEX_CHANNEL,
139 INDEX_DIMM,
140 INDEX_CS,
141 INDEX_NM_FIRST,
142 INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
143 INDEX_NM_CHANNEL,
144 INDEX_NM_DIMM,
145 INDEX_NM_CS,
146 INDEX_MAX
147};
148
149enum error_source {
150 ERR_SRC_1LM,
151 ERR_SRC_2LM_NM,
152 ERR_SRC_2LM_FM,
153 ERR_SRC_NOT_MEMORY,
154};
155
156#define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL)
157#define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL)
158#define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM)
159#define BIT_NM_CS BIT_ULL(INDEX_NM_CS)
160
161struct decoded_addr {
162 struct mce *mce;
163 struct skx_dev *dev;
164 u64 addr;
165 int socket;
166 int imc;
167 int channel;
168 u64 chan_addr;
169 int sktways;
170 int chanways;
171 int dimm;
172 int cs;
173 int rank;
174 int channel_rank;
175 u64 rank_address;
176 int row;
177 int column;
178 int bank_address;
179 int bank_group;
180 bool decoded_by_adxl;
181};
182
183struct pci_bdf {
184 u32 bus : 8;
185 u32 dev : 5;
186 u32 fun : 3;
187};
188
189struct res_config {
190 enum type type;
191 /* Configuration agent device ID */
192 unsigned int decs_did;
193 /* Default bus number configuration register offset */
194 int busno_cfg_offset;
195 /* DDR memory controllers per socket */
196 int ddr_imc_num;
197 /* DDR channels per DDR memory controller */
198 int ddr_chan_num;
199 /* DDR DIMMs per DDR memory channel */
200 int ddr_dimm_num;
201 /* Per DDR channel memory-mapped I/O size */
202 int ddr_chan_mmio_sz;
203 /* HBM memory controllers per socket */
204 int hbm_imc_num;
205 /* HBM channels per HBM memory controller */
206 int hbm_chan_num;
207 /* HBM DIMMs per HBM memory channel */
208 int hbm_dimm_num;
209 /* Per HBM channel memory-mapped I/O size */
210 int hbm_chan_mmio_sz;
211 bool support_ddr5;
212 /* SAD device BDF */
213 struct pci_bdf sad_all_bdf;
214 /* PCU device BDF */
215 struct pci_bdf pcu_cr3_bdf;
216 /* UTIL device BDF */
217 struct pci_bdf util_all_bdf;
218 /* URACU device BDF */
219 struct pci_bdf uracu_bdf;
220 /* DDR mdev device BDF */
221 struct pci_bdf ddr_mdev_bdf;
222 /* HBM mdev device BDF */
223 struct pci_bdf hbm_mdev_bdf;
224 int sad_all_offset;
225 /* Offsets of retry_rd_err_log registers */
226 u32 *offsets_scrub;
227 u32 *offsets_scrub_hbm0;
228 u32 *offsets_scrub_hbm1;
229 u32 *offsets_demand;
230 u32 *offsets_demand2;
231 u32 *offsets_demand_hbm0;
232 u32 *offsets_demand_hbm1;
233};
234
235typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
236 struct res_config *cfg);
237typedef bool (*skx_decode_f)(struct decoded_addr *res);
238typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
239
240int skx_adxl_get(void);
241void skx_adxl_put(void);
242void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
243void skx_set_mem_cfg(bool mem_cfg_2lm);
244void skx_set_res_cfg(struct res_config *cfg);
245
246int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
247int skx_get_node_id(struct skx_dev *d, u8 *id);
248
249int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
250
251int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
252
253int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
254 struct skx_imc *imc, int chan, int dimmno,
255 struct res_config *cfg);
256
257int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
258 int chan, int dimmno, const char *mod_str);
259
260int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
261 const char *ctl_name, const char *mod_str,
262 get_dimm_config_f get_dimm_config,
263 struct res_config *cfg);
264
265int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
266 void *data);
267
268void skx_remove(void);
269
270#ifdef CONFIG_EDAC_DEBUG
271void skx_setup_debug(const char *name);
272void skx_teardown_debug(void);
273#else
274static inline void skx_setup_debug(const char *name) {}
275static inline void skx_teardown_debug(void) {}
276#endif
277
278#endif /* _SKX_COMM_EDAC_H */
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
4 * Originally split out from the skx_edac driver.
5 *
6 * Copyright (c) 2018, Intel Corporation.
7 */
8
9#ifndef _SKX_COMM_EDAC_H
10#define _SKX_COMM_EDAC_H
11
12#include <linux/bits.h>
13#include <asm/mce.h>
14
15#define MSG_SIZE 1024
16
17/*
18 * Debug macros
19 */
20#define skx_printk(level, fmt, arg...) \
21 edac_printk(level, "skx", fmt, ##arg)
22
23#define skx_mc_printk(mci, level, fmt, arg...) \
24 edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
25
26/*
27 * Get a bit field at register value <v>, from bit <lo> to bit <hi>
28 */
29#define GET_BITFIELD(v, lo, hi) \
30 (((v) & GENMASK_ULL((hi), (lo))) >> (lo))
31
32#define SKX_NUM_IMC 2 /* Memory controllers per socket */
33#define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
34#define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
35
36#define I10NM_NUM_DDR_IMC 4
37#define I10NM_NUM_DDR_CHANNELS 2
38#define I10NM_NUM_DDR_DIMMS 2
39
40#define I10NM_NUM_HBM_IMC 16
41#define I10NM_NUM_HBM_CHANNELS 2
42#define I10NM_NUM_HBM_DIMMS 1
43
44#define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
45#define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
46#define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
47
48#define MAX(a, b) ((a) > (b) ? (a) : (b))
49#define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
50#define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
51#define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
52
53#define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15)
54#define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i)
55
56#define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15)
57#define MCI_MISC_ECC_DDRT 8 /* read from DDRT */
58
59/*
60 * Each cpu socket contains some pci devices that provide global
61 * information, and also some that are local to each of the two
62 * memory controllers on the die.
63 */
64struct skx_dev {
65 struct list_head list;
66 u8 bus[4];
67 int seg;
68 struct pci_dev *sad_all;
69 struct pci_dev *util_all;
70 struct pci_dev *uracu; /* for i10nm CPU */
71 struct pci_dev *pcu_cr3; /* for HBM memory detection */
72 u32 mcroute;
73 struct skx_imc {
74 struct mem_ctl_info *mci;
75 struct pci_dev *mdev; /* for i10nm CPU */
76 void __iomem *mbase; /* for i10nm CPU */
77 int chan_mmio_sz; /* for i10nm CPU */
78 int num_channels; /* channels per memory controller */
79 int num_dimms; /* dimms per channel */
80 bool hbm_mc;
81 u8 mc; /* system wide mc# */
82 u8 lmc; /* socket relative mc# */
83 u8 src_id, node_id;
84 struct skx_channel {
85 struct pci_dev *cdev;
86 struct pci_dev *edev;
87 u32 retry_rd_err_log_s;
88 u32 retry_rd_err_log_d;
89 u32 retry_rd_err_log_d2;
90 struct skx_dimm {
91 u8 close_pg;
92 u8 bank_xor_enable;
93 u8 fine_grain_bank;
94 u8 rowbits;
95 u8 colbits;
96 } dimms[NUM_DIMMS];
97 } chan[NUM_CHANNELS];
98 } imc[NUM_IMC];
99};
100
101struct skx_pvt {
102 struct skx_imc *imc;
103};
104
105enum type {
106 SKX,
107 I10NM,
108 SPR
109};
110
111enum {
112 INDEX_SOCKET,
113 INDEX_MEMCTRL,
114 INDEX_CHANNEL,
115 INDEX_DIMM,
116 INDEX_CS,
117 INDEX_NM_FIRST,
118 INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
119 INDEX_NM_CHANNEL,
120 INDEX_NM_DIMM,
121 INDEX_NM_CS,
122 INDEX_MAX
123};
124
125#define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL)
126#define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL)
127#define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM)
128#define BIT_NM_CS BIT_ULL(INDEX_NM_CS)
129
130struct decoded_addr {
131 struct mce *mce;
132 struct skx_dev *dev;
133 u64 addr;
134 int socket;
135 int imc;
136 int channel;
137 u64 chan_addr;
138 int sktways;
139 int chanways;
140 int dimm;
141 int cs;
142 int rank;
143 int channel_rank;
144 u64 rank_address;
145 int row;
146 int column;
147 int bank_address;
148 int bank_group;
149 bool decoded_by_adxl;
150};
151
152struct res_config {
153 enum type type;
154 /* Configuration agent device ID */
155 unsigned int decs_did;
156 /* Default bus number configuration register offset */
157 int busno_cfg_offset;
158 /* Per DDR channel memory-mapped I/O size */
159 int ddr_chan_mmio_sz;
160 /* Per HBM channel memory-mapped I/O size */
161 int hbm_chan_mmio_sz;
162 bool support_ddr5;
163 /* SAD device number and function number */
164 unsigned int sad_all_devfn;
165 int sad_all_offset;
166 /* Offsets of retry_rd_err_log registers */
167 u32 *offsets_scrub;
168 u32 *offsets_scrub_hbm0;
169 u32 *offsets_scrub_hbm1;
170 u32 *offsets_demand;
171 u32 *offsets_demand2;
172 u32 *offsets_demand_hbm0;
173 u32 *offsets_demand_hbm1;
174};
175
176typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
177 struct res_config *cfg);
178typedef bool (*skx_decode_f)(struct decoded_addr *res);
179typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
180
181int __init skx_adxl_get(void);
182void __exit skx_adxl_put(void);
183void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
184void skx_set_mem_cfg(bool mem_cfg_2lm);
185
186int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
187int skx_get_node_id(struct skx_dev *d, u8 *id);
188
189int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
190
191int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
192
193int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
194 struct skx_imc *imc, int chan, int dimmno,
195 struct res_config *cfg);
196
197int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
198 int chan, int dimmno, const char *mod_str);
199
200int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
201 const char *ctl_name, const char *mod_str,
202 get_dimm_config_f get_dimm_config,
203 struct res_config *cfg);
204
205int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
206 void *data);
207
208void skx_remove(void);
209
210#endif /* _SKX_COMM_EDAC_H */