Loading...
Note: File does not exist in v6.2.
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2023-2024 Intel Corporation
4 */
5
6#include <drm/drm_managed.h>
7
8#include "abi/guc_actions_sriov_abi.h"
9
10#include "xe_device.h"
11#include "xe_gt.h"
12#include "xe_gt_sriov_pf.h"
13#include "xe_gt_sriov_pf_config.h"
14#include "xe_gt_sriov_pf_control.h"
15#include "xe_gt_sriov_pf_helpers.h"
16#include "xe_gt_sriov_pf_migration.h"
17#include "xe_gt_sriov_pf_monitor.h"
18#include "xe_gt_sriov_pf_service.h"
19#include "xe_gt_sriov_printk.h"
20#include "xe_guc_ct.h"
21#include "xe_sriov.h"
22
23static const char *control_cmd_to_string(u32 cmd)
24{
25 switch (cmd) {
26 case GUC_PF_TRIGGER_VF_PAUSE:
27 return "PAUSE";
28 case GUC_PF_TRIGGER_VF_RESUME:
29 return "RESUME";
30 case GUC_PF_TRIGGER_VF_STOP:
31 return "STOP";
32 case GUC_PF_TRIGGER_VF_FLR_START:
33 return "FLR_START";
34 case GUC_PF_TRIGGER_VF_FLR_FINISH:
35 return "FLR_FINISH";
36 default:
37 return "<unknown>";
38 }
39}
40
41static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd)
42{
43 u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = {
44 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
45 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
46 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL),
47 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid),
48 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd),
49 };
50 int ret;
51
52 ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
53 return ret > 0 ? -EPROTO : ret;
54}
55
56static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd)
57{
58 int err;
59
60 xe_gt_assert(gt, vfid != PFID);
61 xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n",
62 vfid, control_cmd_to_string(cmd));
63
64 err = guc_action_vf_control_cmd(>->uc.guc, vfid, cmd);
65 if (unlikely(err))
66 xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n",
67 vfid, control_cmd_to_string(cmd), ERR_PTR(err));
68 return err;
69}
70
71static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid)
72{
73 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE);
74}
75
76static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid)
77{
78 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME);
79}
80
81static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid)
82{
83 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP);
84}
85
86static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid)
87{
88 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START);
89}
90
91static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid)
92{
93 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH);
94}
95
96/**
97 * DOC: The VF state machine
98 *
99 * The simplified VF state machine could be presented as::
100 *
101 * pause--------------------------o
102 * / |
103 * / v
104 * (READY)<------------------resume-----(PAUSED)
105 * ^ \ / /
106 * | \ / /
107 * | stop---->(STOPPED)<----stop /
108 * | / /
109 * | / /
110 * o--------<-----flr /
111 * \ /
112 * o------<--------------------flr
113 *
114 * Where:
115 *
116 * * READY - represents a state in which VF is fully operable
117 * * PAUSED - represents a state in which VF activity is temporarily suspended
118 * * STOPPED - represents a state in which VF activity is definitely halted
119 * * pause - represents a request to temporarily suspend VF activity
120 * * resume - represents a request to resume VF activity
121 * * stop - represents a request to definitely halt VF activity
122 * * flr - represents a request to perform VF FLR to restore VF activity
123 *
124 * However, each state transition requires additional steps that involves
125 * communication with GuC that might fail or be interrupted by other requests::
126 *
127 * .................................WIP....
128 * : :
129 * pause--------------------->PAUSE_WIP----------------------------o
130 * / : / \ : |
131 * / : o----<---stop flr--o : |
132 * / : | \ / | : V
133 * (READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED)
134 * ^ \ \ : | | : / /
135 * | \ \ : | | : / /
136 * | \ \ : | | : / /
137 * | \ \ : o----<----------------------+--<-------stop /
138 * | \ \ : | | : /
139 * | \ \ : V | : /
140 * | \ stop----->STOP_WIP---------flr--->-----o : /
141 * | \ : | | : /
142 * | \ : | V : /
143 * | flr--------+----->----------------->FLR_WIP<-----flr
144 * | : | / ^ :
145 * | : | / | :
146 * o--------<-------:----+-----<----------------o | :
147 * : | | :
148 * :....|...........................|.....:
149 * | |
150 * V |
151 * (STOPPED)--------------------flr
152 *
153 * For details about each internal WIP state machine see:
154 *
155 * * `The VF PAUSE state machine`_
156 * * `The VF RESUME state machine`_
157 * * `The VF STOP state machine`_
158 * * `The VF FLR state machine`_
159 */
160
161#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
162static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
163{
164 switch (bit) {
165#define CASE2STR(_X) \
166 case XE_GT_SRIOV_STATE_##_X: return #_X
167 CASE2STR(WIP);
168 CASE2STR(FLR_WIP);
169 CASE2STR(FLR_SEND_START);
170 CASE2STR(FLR_WAIT_GUC);
171 CASE2STR(FLR_GUC_DONE);
172 CASE2STR(FLR_RESET_CONFIG);
173 CASE2STR(FLR_RESET_DATA);
174 CASE2STR(FLR_RESET_MMIO);
175 CASE2STR(FLR_SEND_FINISH);
176 CASE2STR(FLR_FAILED);
177 CASE2STR(PAUSE_WIP);
178 CASE2STR(PAUSE_SEND_PAUSE);
179 CASE2STR(PAUSE_WAIT_GUC);
180 CASE2STR(PAUSE_GUC_DONE);
181 CASE2STR(PAUSE_SAVE_GUC);
182 CASE2STR(PAUSE_FAILED);
183 CASE2STR(PAUSED);
184 CASE2STR(RESUME_WIP);
185 CASE2STR(RESUME_SEND_RESUME);
186 CASE2STR(RESUME_FAILED);
187 CASE2STR(RESUMED);
188 CASE2STR(STOP_WIP);
189 CASE2STR(STOP_SEND_STOP);
190 CASE2STR(STOP_FAILED);
191 CASE2STR(STOPPED);
192 CASE2STR(MISMATCH);
193#undef CASE2STR
194 default: return "?";
195 }
196}
197#endif
198
199static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)
200{
201 switch (bit) {
202 case XE_GT_SRIOV_STATE_FLR_WAIT_GUC:
203 case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC:
204 return HZ / 2;
205 case XE_GT_SRIOV_STATE_FLR_WIP:
206 case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG:
207 return 5 * HZ;
208 default:
209 return HZ;
210 }
211}
212
213static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid)
214{
215 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
216 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
217
218 return >->sriov.pf.vfs[vfid].control;
219}
220
221static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid)
222{
223 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
224
225 return &cs->state;
226}
227
228static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid,
229 enum xe_gt_sriov_control_bits bit)
230{
231 return test_bit(bit, pf_peek_vf_state(gt, vfid));
232}
233
234static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid)
235{
236 unsigned long state = *pf_peek_vf_state(gt, vfid);
237 enum xe_gt_sriov_control_bits bit;
238
239 if (state) {
240 xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n",
241 vfid, state, state ? " bits " : "",
242 (int)BITS_PER_LONG, &state);
243 for_each_set_bit(bit, &state, BITS_PER_LONG)
244 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n",
245 vfid, control_bit_to_string(bit), bit);
246 } else {
247 xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid);
248 }
249}
250
251static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid,
252 enum xe_gt_sriov_control_bits bit)
253{
254 bool result = pf_check_vf_state(gt, vfid, bit);
255
256 if (unlikely(!result))
257 pf_dump_vf_state(gt, vfid);
258
259 return result;
260}
261
262static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid,
263 enum xe_gt_sriov_control_bits bit)
264{
265 bool result = !pf_check_vf_state(gt, vfid, bit);
266
267 if (unlikely(!result))
268 pf_dump_vf_state(gt, vfid);
269
270 return result;
271}
272
273static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid,
274 enum xe_gt_sriov_control_bits bit)
275{
276 if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) {
277 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n",
278 vfid, control_bit_to_string(bit), bit);
279 return true;
280 }
281 return false;
282}
283
284static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid,
285 enum xe_gt_sriov_control_bits bit)
286{
287 if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) {
288 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n",
289 vfid, control_bit_to_string(bit), bit);
290 return true;
291 }
292 return false;
293}
294
295static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid,
296 enum xe_gt_sriov_control_bits bit)
297{
298 if (pf_exit_vf_state(gt, vfid, bit))
299 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n",
300 vfid, control_bit_to_string(bit), bit,
301 __builtin_return_address(0));
302}
303
304static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
305{
306 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) {
307 xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n",
308 vfid, __builtin_return_address(0));
309 pf_dump_vf_state(gt, vfid);
310 }
311}
312
313static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
314{
315 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH))
316 xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n",
317 vfid, __builtin_return_address(0));
318
319 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
320 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
321 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
322 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED);
323}
324
325#define pf_enter_vf_state_machine_bug(gt, vfid) ({ \
326 pf_enter_vf_mismatch((gt), (vfid)); \
327})
328
329static void pf_queue_control_worker(struct xe_gt *gt)
330{
331 struct xe_device *xe = gt_to_xe(gt);
332
333 xe_gt_assert(gt, IS_SRIOV_PF(xe));
334
335 queue_work(xe->sriov.wq, >->sriov.pf.control.worker);
336}
337
338static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid)
339{
340 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control;
341
342 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
343
344 spin_lock(&pfc->lock);
345 list_move_tail(>->sriov.pf.vfs[vfid].control.link, &pfc->list);
346 spin_unlock(&pfc->lock);
347
348 pf_queue_control_worker(gt);
349}
350
351static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid);
352static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid);
353static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid);
354static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid);
355
356static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid)
357{
358 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
359 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
360
361 reinit_completion(&cs->done);
362 return true;
363 }
364 return false;
365}
366
367static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid)
368{
369 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
370 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
371
372 pf_exit_vf_flr_wip(gt, vfid);
373 pf_exit_vf_stop_wip(gt, vfid);
374 pf_exit_vf_pause_wip(gt, vfid);
375 pf_exit_vf_resume_wip(gt, vfid);
376
377 complete_all(&cs->done);
378 }
379}
380
381static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout)
382{
383 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
384
385 return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT;
386}
387
388static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
389{
390 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
391 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED);
392 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
393 pf_exit_vf_mismatch(gt, vfid);
394 pf_exit_vf_wip(gt, vfid);
395}
396
397/**
398 * DOC: The VF PAUSE state machine
399 *
400 * The VF PAUSE state machine looks like::
401 *
402 * (READY,RESUMED)<-------------<---------------------o---------o
403 * | \ \
404 * pause \ \
405 * | \ \
406 * ....V...........................PAUSE_WIP........ \ \
407 * : \ : o \
408 * : \ o------<-----busy : | \
409 * : \ / / : | |
410 * : PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED) |
411 * : | \ : | |
412 * : acked rejected---->----------o--->(MISMATCH) /
413 * : | : /
414 * : v : /
415 * : PAUSE_WAIT_GUC : /
416 * : | : /
417 * : done : /
418 * : | : /
419 * : v : /
420 * : PAUSE_GUC_DONE o-----restart
421 * : | :
422 * : | o---<--busy :
423 * : v / / :
424 * : PAUSE_SAVE_GUC :
425 * : / :
426 * : / :
427 * :....o..............o...............o...........:
428 * | | |
429 * completed flr stop
430 * | | |
431 * V .....V..... ......V.....
432 * (PAUSED) : FLR_WIP : : STOP_WIP :
433 * :.........: :..........:
434 *
435 * For the full state machine view, see `The VF state machine`_.
436 */
437
438static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
439{
440 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
441 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE);
442 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
443 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE);
444 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC);
445 }
446}
447
448static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid)
449{
450 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED))
451 pf_enter_vf_state_machine_bug(gt, vfid);
452
453 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
454 pf_exit_vf_mismatch(gt, vfid);
455 pf_exit_vf_wip(gt, vfid);
456}
457
458static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid)
459{
460 pf_enter_vf_paused(gt, vfid);
461}
462
463static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid)
464{
465 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
466 pf_exit_vf_wip(gt, vfid);
467}
468
469static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid)
470{
471 pf_enter_vf_mismatch(gt, vfid);
472 pf_enter_vf_pause_failed(gt, vfid);
473}
474
475static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
476{
477 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
478 pf_enter_vf_state_machine_bug(gt, vfid);
479}
480
481static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
482{
483 int err;
484
485 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
486 return false;
487
488 err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid);
489 if (err) {
490 /* retry if busy */
491 if (err == -EBUSY) {
492 pf_enter_vf_pause_save_guc(gt, vfid);
493 return true;
494 }
495 /* give up on error */
496 if (err == -EIO)
497 pf_enter_vf_mismatch(gt, vfid);
498 }
499
500 pf_enter_vf_pause_completed(gt, vfid);
501 return true;
502}
503
504static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
505{
506 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
507 return false;
508
509 pf_enter_vf_pause_save_guc(gt, vfid);
510 return true;
511}
512
513static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
514{
515 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
516 pf_queue_vf(gt, vfid);
517}
518
519static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
520{
521 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC))
522 pf_enter_vf_state_machine_bug(gt, vfid);
523}
524
525static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
526{
527 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
528}
529
530static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
531{
532 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
533 pf_enter_vf_state_machine_bug(gt, vfid);
534
535 pf_queue_vf(gt, vfid);
536}
537
538static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
539{
540 int err;
541
542 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
543 return false;
544
545 /* GuC may actually send a PAUSE_DONE before we get a RESPONSE */
546 pf_enter_pause_wait_guc(gt, vfid);
547
548 err = pf_send_vf_pause(gt, vfid);
549 if (err) {
550 /* send failed, so we shouldn't expect PAUSE_DONE from GuC */
551 pf_exit_pause_wait_guc(gt, vfid);
552
553 if (err == -EBUSY)
554 pf_enter_vf_pause_send_pause(gt, vfid);
555 else if (err == -EIO)
556 pf_enter_vf_pause_rejected(gt, vfid);
557 else
558 pf_enter_vf_pause_failed(gt, vfid);
559 } else {
560 /*
561 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
562 * but since GuC didn't complain, we may clear MISMATCH
563 */
564 pf_exit_vf_mismatch(gt, vfid);
565 }
566
567 return true;
568}
569
570static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
571{
572 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
573 pf_enter_vf_wip(gt, vfid);
574 pf_enter_vf_pause_send_pause(gt, vfid);
575 return true;
576 }
577
578 return false;
579}
580
581/**
582 * xe_gt_sriov_pf_control_pause_vf - Pause a VF.
583 * @gt: the &xe_gt
584 * @vfid: the VF identifier
585 *
586 * This function is for PF only.
587 *
588 * Return: 0 on success or a negative error code on failure.
589 */
590int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid)
591{
592 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP);
593 int err;
594
595 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
596 xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
597 return -EPERM;
598 }
599
600 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
601 xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid);
602 return -ESTALE;
603 }
604
605 if (!pf_enter_vf_pause_wip(gt, vfid)) {
606 xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid);
607 return -EALREADY;
608 }
609
610 err = pf_wait_vf_wip_done(gt, vfid, timeout);
611 if (err) {
612 xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n",
613 vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
614 return err;
615 }
616
617 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
618 xe_gt_sriov_info(gt, "VF%u paused!\n", vfid);
619 return 0;
620 }
621
622 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) {
623 xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid);
624 return -EIO;
625 }
626
627 xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid);
628 return -ECANCELED;
629}
630
631/**
632 * DOC: The VF RESUME state machine
633 *
634 * The VF RESUME state machine looks like::
635 *
636 * (PAUSED)<-----------------<------------------------o
637 * | \
638 * resume \
639 * | \
640 * ....V............................RESUME_WIP...... \
641 * : \ : o
642 * : \ o-------<-----busy : |
643 * : \ / / : |
644 * : RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED)
645 * : / \ : |
646 * : acked rejected---->---------o--->(MISMATCH)
647 * : / :
648 * :....o..............o...............o.....o.....:
649 * | | | \
650 * completed flr stop restart-->(READY)
651 * | | |
652 * V .....V..... ......V.....
653 * (RESUMED) : FLR_WIP : : STOP_WIP :
654 * :.........: :..........:
655 *
656 * For the full state machine view, see `The VF state machine`_.
657 */
658
659static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
660{
661 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP))
662 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME);
663}
664
665static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid)
666{
667 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
668 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
669 pf_exit_vf_mismatch(gt, vfid);
670 pf_exit_vf_wip(gt, vfid);
671}
672
673static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid)
674{
675 pf_enter_vf_resumed(gt, vfid);
676}
677
678static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid)
679{
680 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
681 pf_exit_vf_wip(gt, vfid);
682}
683
684static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid)
685{
686 pf_enter_vf_mismatch(gt, vfid);
687 pf_enter_vf_resume_failed(gt, vfid);
688}
689
690static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
691{
692 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
693 pf_enter_vf_state_machine_bug(gt, vfid);
694
695 pf_queue_vf(gt, vfid);
696}
697
698static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
699{
700 int err;
701
702 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
703 return false;
704
705 err = pf_send_vf_resume(gt, vfid);
706 if (err == -EBUSY)
707 pf_enter_vf_resume_send_resume(gt, vfid);
708 else if (err == -EIO)
709 pf_enter_vf_resume_rejected(gt, vfid);
710 else if (err)
711 pf_enter_vf_resume_failed(gt, vfid);
712 else
713 pf_enter_vf_resume_completed(gt, vfid);
714 return true;
715}
716
717static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
718{
719 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) {
720 pf_enter_vf_wip(gt, vfid);
721 pf_enter_vf_resume_send_resume(gt, vfid);
722 return true;
723 }
724
725 return false;
726}
727
728/**
729 * xe_gt_sriov_pf_control_resume_vf - Resume a VF.
730 * @gt: the &xe_gt
731 * @vfid: the VF identifier
732 *
733 * This function is for PF only.
734 *
735 * Return: 0 on success or a negative error code on failure.
736 */
737int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
738{
739 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP);
740 int err;
741
742 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
743 xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
744 return -EPERM;
745 }
746
747 if (!pf_enter_vf_resume_wip(gt, vfid)) {
748 xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid);
749 return -EALREADY;
750 }
751
752 err = pf_wait_vf_wip_done(gt, vfid, timeout);
753 if (err)
754 return err;
755
756 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) {
757 xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid);
758 return 0;
759 }
760
761 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) {
762 xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid);
763 return -EIO;
764 }
765
766 xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid);
767 return -ECANCELED;
768}
769
770/**
771 * DOC: The VF STOP state machine
772 *
773 * The VF STOP state machine looks like::
774 *
775 * (READY,PAUSED,RESUMED)<-------<--------------------o
776 * | \
777 * stop \
778 * | \
779 * ....V..............................STOP_WIP...... \
780 * : \ : o
781 * : \ o----<----busy : |
782 * : \ / / : |
783 * : STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED)
784 * : / \ : |
785 * : acked rejected-------->--------o--->(MISMATCH)
786 * : / :
787 * :....o..............o...............o...........:
788 * | | |
789 * completed flr restart
790 * | | |
791 * V .....V..... V
792 * (STOPPED) : FLR_WIP : (READY)
793 * :.........:
794 *
795 * For the full state machine view, see `The VF state machine`_.
796 */
797
798static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
799{
800 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP))
801 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP);
802}
803
804static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid)
805{
806 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED))
807 pf_enter_vf_state_machine_bug(gt, vfid);
808
809 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
810 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
811 pf_exit_vf_mismatch(gt, vfid);
812 pf_exit_vf_wip(gt, vfid);
813}
814
815static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid)
816{
817 pf_enter_vf_stopped(gt, vfid);
818}
819
820static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid)
821{
822 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
823 pf_exit_vf_wip(gt, vfid);
824}
825
826static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid)
827{
828 pf_enter_vf_mismatch(gt, vfid);
829 pf_enter_vf_stop_failed(gt, vfid);
830}
831
832static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
833{
834 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
835 pf_enter_vf_state_machine_bug(gt, vfid);
836
837 pf_queue_vf(gt, vfid);
838}
839
840static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
841{
842 int err;
843
844 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
845 return false;
846
847 err = pf_send_vf_stop(gt, vfid);
848 if (err == -EBUSY)
849 pf_enter_vf_stop_send_stop(gt, vfid);
850 else if (err == -EIO)
851 pf_enter_vf_stop_rejected(gt, vfid);
852 else if (err)
853 pf_enter_vf_stop_failed(gt, vfid);
854 else
855 pf_enter_vf_stop_completed(gt, vfid);
856 return true;
857}
858
859static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
860{
861 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) {
862 pf_enter_vf_wip(gt, vfid);
863 pf_enter_vf_stop_send_stop(gt, vfid);
864 return true;
865 }
866 return false;
867}
868
869/**
870 * xe_gt_sriov_pf_control_stop_vf - Stop a VF.
871 * @gt: the &xe_gt
872 * @vfid: the VF identifier
873 *
874 * This function is for PF only.
875 *
876 * Return: 0 on success or a negative error code on failure.
877 */
878int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
879{
880 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP);
881 int err;
882
883 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
884 xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid);
885 return -ESTALE;
886 }
887
888 if (!pf_enter_vf_stop_wip(gt, vfid)) {
889 xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid);
890 return -EALREADY;
891 }
892
893 err = pf_wait_vf_wip_done(gt, vfid, timeout);
894 if (err)
895 return err;
896
897 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
898 xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid);
899 return 0;
900 }
901
902 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) {
903 xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid);
904 return -EIO;
905 }
906
907 xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid);
908 return -ECANCELED;
909}
910
911/**
912 * DOC: The VF FLR state machine
913 *
914 * The VF FLR state machine looks like::
915 *
916 * (READY,PAUSED,STOPPED)<------------<--------------o
917 * | \
918 * flr \
919 * | \
920 * ....V..........................FLR_WIP........... \
921 * : \ : \
922 * : \ o----<----busy : |
923 * : \ / / : |
924 * : FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o
925 * : | \ : | |
926 * : acked rejected----->-----------o--->(MISMATCH) |
927 * : | : ^ |
928 * : v : | |
929 * : FLR_WAIT_GUC : | |
930 * : | : | |
931 * : done : | |
932 * : | : | |
933 * : v : | |
934 * : FLR_GUC_DONE : | |
935 * : | : | |
936 * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o
937 * : | : | |
938 * : FLR_RESET_DATA : | |
939 * : | : | |
940 * : FLR_RESET_MMIO : | |
941 * : | : | |
942 * : | o----<----busy : | |
943 * : |/ / : | |
944 * : FLR_SEND_FINISH----failed--->-----------o--------+-----------o
945 * : / \ : |
946 * : acked rejected----->-----------o--------o
947 * : / :
948 * :....o..............................o...........:
949 * | |
950 * completed restart
951 * | /
952 * V /
953 * (READY)<----------<------------o
954 *
955 * For the full state machine view, see `The VF state machine`_.
956 */
957
958static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
959{
960 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
961 pf_enter_vf_state_machine_bug(gt, vfid);
962
963 pf_queue_vf(gt, vfid);
964}
965
966static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
967{
968 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
969 xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid);
970 return;
971 }
972
973 pf_enter_vf_wip(gt, vfid);
974 pf_enter_vf_flr_send_start(gt, vfid);
975}
976
977static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
978{
979 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
980 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH);
981 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO);
982 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA);
983 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
984 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE);
985 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
986 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START);
987 }
988}
989
990static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid)
991{
992 pf_enter_vf_ready(gt, vfid);
993}
994
995static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid)
996{
997 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
998 xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid);
999 pf_exit_vf_wip(gt, vfid);
1000}
1001
1002static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid)
1003{
1004 pf_enter_vf_mismatch(gt, vfid);
1005 pf_enter_vf_flr_failed(gt, vfid);
1006}
1007
1008static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
1009{
1010 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
1011 pf_enter_vf_state_machine_bug(gt, vfid);
1012
1013 pf_queue_vf(gt, vfid);
1014}
1015
1016static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
1017{
1018 int err;
1019
1020 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
1021 return false;
1022
1023 err = pf_send_vf_flr_finish(gt, vfid);
1024 if (err == -EBUSY)
1025 pf_enter_vf_flr_send_finish(gt, vfid);
1026 else if (err == -EIO)
1027 pf_enter_vf_flr_rejected(gt, vfid);
1028 else if (err)
1029 pf_enter_vf_flr_failed(gt, vfid);
1030 else
1031 pf_enter_vf_flr_completed(gt, vfid);
1032 return true;
1033}
1034
1035static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1036{
1037 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1038 pf_enter_vf_state_machine_bug(gt, vfid);
1039
1040 pf_queue_vf(gt, vfid);
1041}
1042
1043static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1044{
1045 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1046 return false;
1047
1048 xe_gt_sriov_pf_sanitize_hw(gt, vfid);
1049
1050 pf_enter_vf_flr_send_finish(gt, vfid);
1051 return true;
1052}
1053
1054static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1055{
1056 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1057 pf_enter_vf_state_machine_bug(gt, vfid);
1058
1059 pf_queue_vf(gt, vfid);
1060}
1061
1062static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1063{
1064 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1065 return false;
1066
1067 xe_gt_sriov_pf_service_reset(gt, vfid);
1068 xe_gt_sriov_pf_monitor_flr(gt, vfid);
1069
1070 pf_enter_vf_flr_reset_mmio(gt, vfid);
1071 return true;
1072}
1073
1074static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1075{
1076 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1077 pf_enter_vf_state_machine_bug(gt, vfid);
1078
1079 pf_queue_vf(gt, vfid);
1080}
1081
1082static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1083{
1084 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
1085 int err;
1086
1087 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1088 return false;
1089
1090 err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout);
1091 if (err)
1092 pf_enter_vf_flr_failed(gt, vfid);
1093 else
1094 pf_enter_vf_flr_reset_data(gt, vfid);
1095 return true;
1096}
1097
1098static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1099{
1100 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC))
1101 pf_enter_vf_state_machine_bug(gt, vfid);
1102}
1103
1104static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1105{
1106 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
1107}
1108
1109static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
1110{
1111 int err;
1112
1113 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
1114 return false;
1115
1116 /* GuC may actually send a FLR_DONE before we get a RESPONSE */
1117 pf_enter_vf_flr_wait_guc(gt, vfid);
1118
1119 err = pf_send_vf_flr_start(gt, vfid);
1120 if (err) {
1121 /* send failed, so we shouldn't expect FLR_DONE from GuC */
1122 pf_exit_vf_flr_wait_guc(gt, vfid);
1123
1124 if (err == -EBUSY)
1125 pf_enter_vf_flr_send_start(gt, vfid);
1126 else if (err == -EIO)
1127 pf_enter_vf_flr_rejected(gt, vfid);
1128 else
1129 pf_enter_vf_flr_failed(gt, vfid);
1130 } else {
1131 /*
1132 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
1133 * but since GuC didn't complain, we may clear MISMATCH
1134 */
1135 pf_exit_vf_mismatch(gt, vfid);
1136 }
1137
1138 return true;
1139}
1140
1141static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1142{
1143 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1144 return false;
1145
1146 pf_enter_vf_flr_reset_config(gt, vfid);
1147 return true;
1148}
1149
1150static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1151{
1152 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1153 pf_queue_vf(gt, vfid);
1154}
1155
1156/**
1157 * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence.
1158 * @gt: the &xe_gt
1159 * @vfid: the VF identifier
1160 *
1161 * This function is for PF only.
1162 *
1163 * Return: 0 on success or a negative error code on failure.
1164 */
1165int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid)
1166{
1167 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP);
1168 int err;
1169
1170 pf_enter_vf_flr_wip(gt, vfid);
1171
1172 err = pf_wait_vf_wip_done(gt, vfid, timeout);
1173 if (err) {
1174 xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n",
1175 vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
1176 return err;
1177 }
1178
1179 if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1180 return -EIO;
1181
1182 return 0;
1183}
1184
1185/**
1186 * DOC: The VF FLR Flow with GuC
1187 *
1188 * The VF FLR flow includes several steps::
1189 *
1190 * PF GUC PCI
1191 * ========================================================
1192 * | | |
1193 * (1) | [ ] <----- FLR --|
1194 * | [ ] :
1195 * (2) [ ] <-------- NOTIFY FLR --[ ]
1196 * [ ] |
1197 * (3) [ ] |
1198 * [ ] |
1199 * [ ]-- START FLR ---------> [ ]
1200 * | [ ]
1201 * (4) | [ ]
1202 * | [ ]
1203 * [ ] <--------- FLR DONE -- [ ]
1204 * [ ] |
1205 * (5) [ ] |
1206 * [ ] |
1207 * [ ]-- FINISH FLR --------> [ ]
1208 * | |
1209 *
1210 * * Step 1: PCI HW generates interrupt to the GuC about VF FLR
1211 * * Step 2: GuC FW sends G2H notification to the PF about VF FLR
1212 * * Step 2a: on some platforms G2H is only received from root GuC
1213 * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence
1214 * * Step 3a: on some platforms PF must send H2G to all other GuCs
1215 * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done
1216 * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished
1217 */
1218
1219static bool needs_dispatch_flr(struct xe_device *xe)
1220{
1221 return xe->info.platform == XE_PVC;
1222}
1223
1224static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid)
1225{
1226 struct xe_device *xe = gt_to_xe(gt);
1227 struct xe_gt *gtit;
1228 unsigned int gtid;
1229
1230 xe_gt_sriov_info(gt, "VF%u FLR\n", vfid);
1231
1232 if (needs_dispatch_flr(xe)) {
1233 for_each_gt(gtit, xe, gtid)
1234 pf_enter_vf_flr_wip(gtit, vfid);
1235 } else {
1236 pf_enter_vf_flr_wip(gt, vfid);
1237 }
1238}
1239
1240static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid)
1241{
1242 if (!pf_exit_vf_flr_wait_guc(gt, vfid)) {
1243 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid);
1244 pf_enter_vf_mismatch(gt, vfid);
1245 return;
1246 }
1247
1248 pf_enter_vf_flr_guc_done(gt, vfid);
1249}
1250
1251static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid)
1252{
1253 if (!pf_exit_pause_wait_guc(gt, vfid)) {
1254 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid);
1255 pf_enter_vf_mismatch(gt, vfid);
1256 return;
1257 }
1258
1259 pf_enter_vf_pause_guc_done(gt, vfid);
1260}
1261
1262static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid)
1263{
1264 xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid);
1265
1266 if (vfid > xe_gt_sriov_pf_get_totalvfs(gt))
1267 return -EPROTO;
1268
1269 switch (eventid) {
1270 case GUC_PF_NOTIFY_VF_FLR:
1271 pf_handle_vf_flr(gt, vfid);
1272 break;
1273 case GUC_PF_NOTIFY_VF_FLR_DONE:
1274 pf_handle_vf_flr_done(gt, vfid);
1275 break;
1276 case GUC_PF_NOTIFY_VF_PAUSE_DONE:
1277 pf_handle_vf_pause_done(gt, vfid);
1278 break;
1279 case GUC_PF_NOTIFY_VF_FIXUP_DONE:
1280 break;
1281 default:
1282 return -ENOPKG;
1283 }
1284 return 0;
1285}
1286
1287static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid)
1288{
1289 switch (eventid) {
1290 case GUC_PF_NOTIFY_VF_ENABLE:
1291 xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n",
1292 str_enabled_disabled(true),
1293 str_enabled_disabled(false));
1294 break;
1295 default:
1296 return -ENOPKG;
1297 }
1298 return 0;
1299}
1300
1301/**
1302 * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC.
1303 * @gt: the &xe_gt
1304 * @msg: the G2H message
1305 * @len: the length of the G2H message
1306 *
1307 * This function is for PF only.
1308 *
1309 * Return: 0 on success or a negative error code on failure.
1310 */
1311int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
1312{
1313 u32 vfid;
1314 u32 eventid;
1315
1316 xe_gt_assert(gt, len);
1317 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
1318 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
1319 xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
1320 GUC_ACTION_GUC2PF_VF_STATE_NOTIFY);
1321
1322 if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt))))
1323 return -EPROTO;
1324
1325 if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0])))
1326 return -EPFNOSUPPORT;
1327
1328 if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN))
1329 return -EPROTO;
1330
1331 vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]);
1332 eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]);
1333
1334 return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid);
1335}
1336
1337static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
1338{
1339 if (pf_exit_vf_flr_send_start(gt, vfid))
1340 return true;
1341
1342 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) {
1343 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1344 control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC));
1345 return false;
1346 }
1347
1348 if (pf_exit_vf_flr_guc_done(gt, vfid))
1349 return true;
1350
1351 if (pf_exit_vf_flr_reset_config(gt, vfid))
1352 return true;
1353
1354 if (pf_exit_vf_flr_reset_data(gt, vfid))
1355 return true;
1356
1357 if (pf_exit_vf_flr_reset_mmio(gt, vfid))
1358 return true;
1359
1360 if (pf_exit_vf_flr_send_finish(gt, vfid))
1361 return true;
1362
1363 if (pf_exit_vf_stop_send_stop(gt, vfid))
1364 return true;
1365
1366 if (pf_exit_vf_pause_send_pause(gt, vfid))
1367 return true;
1368
1369 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) {
1370 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1371 control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC));
1372 return true;
1373 }
1374
1375 if (pf_exit_vf_pause_guc_done(gt, vfid))
1376 return true;
1377
1378 if (pf_exit_vf_pause_save_guc(gt, vfid))
1379 return true;
1380
1381 if (pf_exit_vf_resume_send_resume(gt, vfid))
1382 return true;
1383
1384 return false;
1385}
1386
1387static unsigned int pf_control_state_index(struct xe_gt *gt,
1388 struct xe_gt_sriov_control_state *cs)
1389{
1390 return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs;
1391}
1392
1393static void pf_worker_find_work(struct xe_gt *gt)
1394{
1395 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control;
1396 struct xe_gt_sriov_control_state *cs;
1397 unsigned int vfid;
1398 bool empty;
1399 bool more;
1400
1401 spin_lock(&pfc->lock);
1402 cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link);
1403 if (cs)
1404 list_del_init(&cs->link);
1405 empty = list_empty(&pfc->list);
1406 spin_unlock(&pfc->lock);
1407
1408 if (!cs)
1409 return;
1410
1411 /* VF metadata structures are indexed by the VFID */
1412 vfid = pf_control_state_index(gt, cs);
1413 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
1414
1415 more = pf_process_vf_state_machine(gt, vfid);
1416 if (more)
1417 pf_queue_vf(gt, vfid);
1418 else if (!empty)
1419 pf_queue_control_worker(gt);
1420}
1421
1422static void control_worker_func(struct work_struct *w)
1423{
1424 struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker);
1425
1426 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1427 pf_worker_find_work(gt);
1428}
1429
1430static void pf_stop_worker(struct xe_gt *gt)
1431{
1432 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1433 cancel_work_sync(>->sriov.pf.control.worker);
1434}
1435
1436static void control_fini_action(struct drm_device *dev, void *data)
1437{
1438 struct xe_gt *gt = data;
1439
1440 pf_stop_worker(gt);
1441}
1442
1443/**
1444 * xe_gt_sriov_pf_control_init() - Initialize PF's control data.
1445 * @gt: the &xe_gt
1446 *
1447 * This function is for PF only.
1448 *
1449 * Return: 0 on success or a negative error code on failure.
1450 */
1451int xe_gt_sriov_pf_control_init(struct xe_gt *gt)
1452{
1453 struct xe_device *xe = gt_to_xe(gt);
1454 unsigned int n, totalvfs;
1455
1456 xe_gt_assert(gt, IS_SRIOV_PF(xe));
1457
1458 totalvfs = xe_sriov_pf_get_totalvfs(xe);
1459 for (n = 0; n <= totalvfs; n++) {
1460 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n);
1461
1462 init_completion(&cs->done);
1463 INIT_LIST_HEAD(&cs->link);
1464 }
1465
1466 spin_lock_init(>->sriov.pf.control.lock);
1467 INIT_LIST_HEAD(>->sriov.pf.control.list);
1468 INIT_WORK(>->sriov.pf.control.worker, control_worker_func);
1469
1470 return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt);
1471}
1472
1473/**
1474 * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset.
1475 * @gt: the &xe_gt
1476 *
1477 * Any per-VF status maintained by the PF or any ongoing VF control activity
1478 * performed by the PF must be reset or cancelled when the GT is reset.
1479 *
1480 * This function is for PF only.
1481 */
1482void xe_gt_sriov_pf_control_restart(struct xe_gt *gt)
1483{
1484 struct xe_device *xe = gt_to_xe(gt);
1485 unsigned int n, totalvfs;
1486
1487 xe_gt_assert(gt, IS_SRIOV_PF(xe));
1488
1489 pf_stop_worker(gt);
1490
1491 totalvfs = xe_sriov_pf_get_totalvfs(xe);
1492 for (n = 1; n <= totalvfs; n++)
1493 pf_enter_vf_ready(gt, n);
1494}