Loading...
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright IBM Corp. 2012
4 *
5 * Author(s):
6 * Jan Glauber <jang@linux.vnet.ibm.com>
7 */
8
9#define KMSG_COMPONENT "zpci"
10#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
11
12#include <linux/kernel.h>
13#include <linux/pci.h>
14#include <asm/pci_debug.h>
15#include <asm/pci_dma.h>
16#include <asm/sclp.h>
17
18#include "pci_bus.h"
19
20/* Content Code Description for PCI Function Error */
21struct zpci_ccdf_err {
22 u32 reserved1;
23 u32 fh; /* function handle */
24 u32 fid; /* function id */
25 u32 ett : 4; /* expected table type */
26 u32 mvn : 12; /* MSI vector number */
27 u32 dmaas : 8; /* DMA address space */
28 u32 : 6;
29 u32 q : 1; /* event qualifier */
30 u32 rw : 1; /* read/write */
31 u64 faddr; /* failing address */
32 u32 reserved3;
33 u16 reserved4;
34 u16 pec; /* PCI event code */
35} __packed;
36
37/* Content Code Description for PCI Function Availability */
38struct zpci_ccdf_avail {
39 u32 reserved1;
40 u32 fh; /* function handle */
41 u32 fid; /* function id */
42 u32 reserved2;
43 u32 reserved3;
44 u32 reserved4;
45 u32 reserved5;
46 u16 reserved6;
47 u16 pec; /* PCI event code */
48} __packed;
49
50static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
51{
52 switch (ers_res) {
53 case PCI_ERS_RESULT_CAN_RECOVER:
54 case PCI_ERS_RESULT_RECOVERED:
55 case PCI_ERS_RESULT_NEED_RESET:
56 return false;
57 default:
58 return true;
59 }
60}
61
62static bool is_passed_through(struct zpci_dev *zdev)
63{
64 return zdev->s390_domain;
65}
66
67static bool is_driver_supported(struct pci_driver *driver)
68{
69 if (!driver || !driver->err_handler)
70 return false;
71 if (!driver->err_handler->error_detected)
72 return false;
73 if (!driver->err_handler->slot_reset)
74 return false;
75 if (!driver->err_handler->resume)
76 return false;
77 return true;
78}
79
80static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
81 struct pci_driver *driver)
82{
83 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
84
85 ers_res = driver->err_handler->error_detected(pdev, pdev->error_state);
86 if (ers_result_indicates_abort(ers_res))
87 pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
88 else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
89 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
90
91 return ers_res;
92}
93
94static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
95 struct pci_driver *driver)
96{
97 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
98 struct zpci_dev *zdev = to_zpci(pdev);
99 int rc;
100
101 pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
102 rc = zpci_reset_load_store_blocked(zdev);
103 if (rc) {
104 pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
105 /* Let's try a full reset instead */
106 return PCI_ERS_RESULT_NEED_RESET;
107 }
108
109 if (driver->err_handler->mmio_enabled) {
110 ers_res = driver->err_handler->mmio_enabled(pdev);
111 if (ers_result_indicates_abort(ers_res)) {
112 pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
113 pci_name(pdev));
114 return ers_res;
115 } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
116 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
117 return ers_res;
118 }
119 }
120
121 pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
122 rc = zpci_clear_error_state(zdev);
123 if (!rc) {
124 pdev->error_state = pci_channel_io_normal;
125 } else {
126 pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
127 /* Let's try a full reset instead */
128 return PCI_ERS_RESULT_NEED_RESET;
129 }
130
131 return ers_res;
132}
133
134static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
135 struct pci_driver *driver)
136{
137 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
138
139 pr_info("%s: Initiating reset\n", pci_name(pdev));
140 if (zpci_hot_reset_device(to_zpci(pdev))) {
141 pr_err("%s: The reset request failed\n", pci_name(pdev));
142 return ers_res;
143 }
144 pdev->error_state = pci_channel_io_normal;
145 ers_res = driver->err_handler->slot_reset(pdev);
146 if (ers_result_indicates_abort(ers_res)) {
147 pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
148 return ers_res;
149 }
150
151 return ers_res;
152}
153
154/* zpci_event_attempt_error_recovery - Try to recover the given PCI function
155 * @pdev: PCI function to recover currently in the error state
156 *
157 * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
158 * With the simplification that recovery always happens per function
159 * and the platform determines which functions are affected for
160 * multi-function devices.
161 */
162static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
163{
164 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
165 struct pci_driver *driver;
166
167 /*
168 * Ensure that the PCI function is not removed concurrently, no driver
169 * is unbound or probed and that userspace can't access its
170 * configuration space while we perform recovery.
171 */
172 pci_dev_lock(pdev);
173 if (pdev->error_state == pci_channel_io_perm_failure) {
174 ers_res = PCI_ERS_RESULT_DISCONNECT;
175 goto out_unlock;
176 }
177 pdev->error_state = pci_channel_io_frozen;
178
179 if (is_passed_through(to_zpci(pdev))) {
180 pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
181 pci_name(pdev));
182 goto out_unlock;
183 }
184
185 driver = to_pci_driver(pdev->dev.driver);
186 if (!is_driver_supported(driver)) {
187 if (!driver)
188 pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
189 pci_name(pdev));
190 else
191 pr_info("%s: The %s driver bound to the device does not support error recovery\n",
192 pci_name(pdev),
193 driver->name);
194 goto out_unlock;
195 }
196
197 ers_res = zpci_event_notify_error_detected(pdev, driver);
198 if (ers_result_indicates_abort(ers_res))
199 goto out_unlock;
200
201 if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
202 ers_res = zpci_event_do_error_state_clear(pdev, driver);
203 if (ers_result_indicates_abort(ers_res))
204 goto out_unlock;
205 }
206
207 if (ers_res == PCI_ERS_RESULT_NEED_RESET)
208 ers_res = zpci_event_do_reset(pdev, driver);
209
210 if (ers_res != PCI_ERS_RESULT_RECOVERED) {
211 pr_err("%s: Automatic recovery failed; operator intervention is required\n",
212 pci_name(pdev));
213 goto out_unlock;
214 }
215
216 pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
217 if (driver->err_handler->resume)
218 driver->err_handler->resume(pdev);
219out_unlock:
220 pci_dev_unlock(pdev);
221
222 return ers_res;
223}
224
225/* zpci_event_io_failure - Report PCI channel failure state to driver
226 * @pdev: PCI function for which to report
227 * @es: PCI channel failure state to report
228 */
229static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
230{
231 struct pci_driver *driver;
232
233 pci_dev_lock(pdev);
234 pdev->error_state = es;
235 /**
236 * While vfio-pci's error_detected callback notifies user-space QEMU
237 * reacts to this by freezing the guest. In an s390 environment PCI
238 * errors are rarely fatal so this is overkill. Instead in the future
239 * we will inject the error event and let the guest recover the device
240 * itself.
241 */
242 if (is_passed_through(to_zpci(pdev)))
243 goto out;
244 driver = to_pci_driver(pdev->dev.driver);
245 if (driver && driver->err_handler && driver->err_handler->error_detected)
246 driver->err_handler->error_detected(pdev, pdev->error_state);
247out:
248 pci_dev_unlock(pdev);
249}
250
251static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
252{
253 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
254 struct pci_dev *pdev = NULL;
255 pci_ers_result_t ers_res;
256
257 zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
258 ccdf->fid, ccdf->fh, ccdf->pec);
259 zpci_err("error CCDF:\n");
260 zpci_err_hex(ccdf, sizeof(*ccdf));
261
262 if (zdev) {
263 zpci_update_fh(zdev, ccdf->fh);
264 if (zdev->zbus->bus)
265 pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
266 }
267
268 pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
269 pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
270
271 if (!pdev)
272 goto no_pdev;
273
274 switch (ccdf->pec) {
275 case 0x003a: /* Service Action or Error Recovery Successful */
276 ers_res = zpci_event_attempt_error_recovery(pdev);
277 if (ers_res != PCI_ERS_RESULT_RECOVERED)
278 zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
279 break;
280 default:
281 /*
282 * Mark as frozen not permanently failed because the device
283 * could be subsequently recovered by the platform.
284 */
285 zpci_event_io_failure(pdev, pci_channel_io_frozen);
286 break;
287 }
288 pci_dev_put(pdev);
289no_pdev:
290 zpci_zdev_put(zdev);
291}
292
293void zpci_event_error(void *data)
294{
295 if (zpci_is_enabled())
296 __zpci_event_error(data);
297}
298
299static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
300{
301 zpci_update_fh(zdev, fh);
302 /* Give the driver a hint that the function is
303 * already unusable.
304 */
305 zpci_bus_remove_device(zdev, true);
306 /* Even though the device is already gone we still
307 * need to free zPCI resources as part of the disable.
308 */
309 if (zdev->dma_table)
310 zpci_dma_exit_device(zdev);
311 if (zdev_enabled(zdev))
312 zpci_disable_device(zdev);
313 zdev->state = ZPCI_FN_STATE_STANDBY;
314}
315
316static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
317{
318 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
319 bool existing_zdev = !!zdev;
320 enum zpci_state state;
321
322 zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
323 ccdf->fid, ccdf->fh, ccdf->pec);
324 switch (ccdf->pec) {
325 case 0x0301: /* Reserved|Standby -> Configured */
326 if (!zdev) {
327 zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
328 if (IS_ERR(zdev))
329 break;
330 } else {
331 /* the configuration request may be stale */
332 if (zdev->state != ZPCI_FN_STATE_STANDBY)
333 break;
334 zdev->state = ZPCI_FN_STATE_CONFIGURED;
335 }
336 zpci_scan_configured_device(zdev, ccdf->fh);
337 break;
338 case 0x0302: /* Reserved -> Standby */
339 if (!zdev)
340 zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
341 else
342 zpci_update_fh(zdev, ccdf->fh);
343 break;
344 case 0x0303: /* Deconfiguration requested */
345 if (zdev) {
346 /* The event may have been queued before we confirgured
347 * the device.
348 */
349 if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
350 break;
351 zpci_update_fh(zdev, ccdf->fh);
352 zpci_deconfigure_device(zdev);
353 }
354 break;
355 case 0x0304: /* Configured -> Standby|Reserved */
356 if (zdev) {
357 /* The event may have been queued before we confirgured
358 * the device.:
359 */
360 if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
361 zpci_event_hard_deconfigured(zdev, ccdf->fh);
362 /* The 0x0304 event may immediately reserve the device */
363 if (!clp_get_state(zdev->fid, &state) &&
364 state == ZPCI_FN_STATE_RESERVED) {
365 zpci_device_reserved(zdev);
366 }
367 }
368 break;
369 case 0x0306: /* 0x308 or 0x302 for multiple devices */
370 zpci_remove_reserved_devices();
371 clp_scan_pci_devices();
372 break;
373 case 0x0308: /* Standby -> Reserved */
374 if (!zdev)
375 break;
376 zpci_device_reserved(zdev);
377 break;
378 default:
379 break;
380 }
381 if (existing_zdev)
382 zpci_zdev_put(zdev);
383}
384
385void zpci_event_availability(void *data)
386{
387 if (zpci_is_enabled())
388 __zpci_event_availability(data);
389}
1/*
2 * Copyright IBM Corp. 2012
3 *
4 * Author(s):
5 * Jan Glauber <jang@linux.vnet.ibm.com>
6 */
7
8#define KMSG_COMPONENT "zpci"
9#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/pci.h>
13#include <asm/pci_debug.h>
14#include <asm/sclp.h>
15
16/* Content Code Description for PCI Function Error */
17struct zpci_ccdf_err {
18 u32 reserved1;
19 u32 fh; /* function handle */
20 u32 fid; /* function id */
21 u32 ett : 4; /* expected table type */
22 u32 mvn : 12; /* MSI vector number */
23 u32 dmaas : 8; /* DMA address space */
24 u32 : 6;
25 u32 q : 1; /* event qualifier */
26 u32 rw : 1; /* read/write */
27 u64 faddr; /* failing address */
28 u32 reserved3;
29 u16 reserved4;
30 u16 pec; /* PCI event code */
31} __packed;
32
33/* Content Code Description for PCI Function Availability */
34struct zpci_ccdf_avail {
35 u32 reserved1;
36 u32 fh; /* function handle */
37 u32 fid; /* function id */
38 u32 reserved2;
39 u32 reserved3;
40 u32 reserved4;
41 u32 reserved5;
42 u16 reserved6;
43 u16 pec; /* PCI event code */
44} __packed;
45
46static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
47{
48 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
49 struct pci_dev *pdev = NULL;
50
51 zpci_err("error CCDF:\n");
52 zpci_err_hex(ccdf, sizeof(*ccdf));
53
54 if (zdev)
55 pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
56
57 pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
58 pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
59
60 if (!pdev)
61 return;
62
63 pdev->error_state = pci_channel_io_perm_failure;
64 pci_dev_put(pdev);
65}
66
67void zpci_event_error(void *data)
68{
69 if (zpci_is_enabled())
70 __zpci_event_error(data);
71}
72
73static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
74{
75 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
76 struct pci_dev *pdev = NULL;
77 int ret;
78
79 if (zdev)
80 pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
81
82 pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n",
83 pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
84 zpci_err("avail CCDF:\n");
85 zpci_err_hex(ccdf, sizeof(*ccdf));
86
87 switch (ccdf->pec) {
88 case 0x0301: /* Reserved|Standby -> Configured */
89 if (!zdev) {
90 ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
91 if (ret)
92 break;
93 zdev = get_zdev_by_fid(ccdf->fid);
94 }
95 if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY)
96 break;
97 zdev->state = ZPCI_FN_STATE_CONFIGURED;
98 zdev->fh = ccdf->fh;
99 ret = zpci_enable_device(zdev);
100 if (ret)
101 break;
102 pci_lock_rescan_remove();
103 pci_rescan_bus(zdev->bus);
104 pci_unlock_rescan_remove();
105 break;
106 case 0x0302: /* Reserved -> Standby */
107 if (!zdev)
108 clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
109 break;
110 case 0x0303: /* Deconfiguration requested */
111 if (pdev)
112 pci_stop_and_remove_bus_device_locked(pdev);
113
114 ret = zpci_disable_device(zdev);
115 if (ret)
116 break;
117
118 ret = sclp_pci_deconfigure(zdev->fid);
119 zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
120 if (!ret)
121 zdev->state = ZPCI_FN_STATE_STANDBY;
122
123 break;
124 case 0x0304: /* Configured -> Standby */
125 if (pdev) {
126 /* Give the driver a hint that the function is
127 * already unusable. */
128 pdev->error_state = pci_channel_io_perm_failure;
129 pci_stop_and_remove_bus_device_locked(pdev);
130 }
131
132 zdev->fh = ccdf->fh;
133 zpci_disable_device(zdev);
134 zdev->state = ZPCI_FN_STATE_STANDBY;
135 break;
136 case 0x0306: /* 0x308 or 0x302 for multiple devices */
137 clp_rescan_pci_devices();
138 break;
139 case 0x0308: /* Standby -> Reserved */
140 if (!zdev)
141 break;
142 pci_stop_root_bus(zdev->bus);
143 pci_remove_root_bus(zdev->bus);
144 break;
145 default:
146 break;
147 }
148 if (pdev)
149 pci_dev_put(pdev);
150}
151
152void zpci_event_availability(void *data)
153{
154 if (zpci_is_enabled())
155 __zpci_event_availability(data);
156}