rotate.c - fs/afs/rotate.c - Linux diff v6.13.7 - Bootlin Elixir Cross Referencer

  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/* Handle fileserver selection and rotation.
  3 *
  4 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
  5 * Written by David Howells (dhowells@redhat.com)
  6 */
  7
  8#include <linux/kernel.h>
  9#include <linux/slab.h>
 10#include <linux/fs.h>
 11#include <linux/sched.h>
 12#include <linux/delay.h>
 13#include <linux/sched/signal.h>
 14#include "internal.h"
 15#include "afs_fs.h"
 16#include "protocol_uae.h"
 17
 18void afs_clear_server_states(struct afs_operation *op)
 
 
 
 
 
 
 
 19{
 20	unsigned int i;
 21
 22	if (op->server_states) {
 23		for (i = 0; i < op->server_list->nr_servers; i++)
 24			afs_put_endpoint_state(op->server_states[i].endpoint_state,
 25					       afs_estate_trace_put_server_state);
 26		kfree(op->server_states);
 
 
 
 
 
 
 
 
 27	}
 
 
 
 
 28}
 29
 30/*
 31 * Begin iteration through a server list, starting with the vnode's last used
 32 * server if possible, or the last recorded good server if not.
 33 */
 34static bool afs_start_fs_iteration(struct afs_operation *op,
 35				   struct afs_vnode *vnode)
 36{
 37	struct afs_server *server;
 38	void *cb_server;
 39	int i;
 40
 41	trace_afs_rotate(op, afs_rotate_trace_start, 0);
 42
 43	read_lock(&op->volume->servers_lock);
 44	op->server_list = afs_get_serverlist(
 45		rcu_dereference_protected(op->volume->servers,
 46					  lockdep_is_held(&op->volume->servers_lock)));
 47	read_unlock(&op->volume->servers_lock);
 48
 49	op->server_states = kcalloc(op->server_list->nr_servers, sizeof(op->server_states[0]),
 50				    GFP_KERNEL);
 51	if (!op->server_states) {
 52		afs_op_nomem(op);
 53		trace_afs_rotate(op, afs_rotate_trace_nomem, 0);
 54		return false;
 55	}
 56
 57	rcu_read_lock();
 58	for (i = 0; i < op->server_list->nr_servers; i++) {
 59		struct afs_endpoint_state *estate;
 60		struct afs_server_state *s = &op->server_states[i];
 61
 62		server = op->server_list->servers[i].server;
 63		estate = rcu_dereference(server->endpoint_state);
 64		s->endpoint_state = afs_get_endpoint_state(estate,
 65							   afs_estate_trace_get_server_state);
 66		s->probe_seq = estate->probe_seq;
 67		s->untried_addrs = (1UL << estate->addresses->nr_addrs) - 1;
 68		init_waitqueue_entry(&s->probe_waiter, current);
 69		afs_get_address_preferences(op->net, estate->addresses);
 70	}
 71	rcu_read_unlock();
 72
 73
 74	op->untried_servers = (1UL << op->server_list->nr_servers) - 1;
 75	op->server_index = -1;
 76
 77	cb_server = vnode->cb_server;
 78	if (cb_server) {
 79		/* See if the vnode's preferred record is still available */
 80		for (i = 0; i < op->server_list->nr_servers; i++) {
 81			server = op->server_list->servers[i].server;
 82			if (server == cb_server) {
 83				op->server_index = i;
 84				goto found_interest;
 85			}
 86		}
 87
 88		/* If we have a lock outstanding on a server that's no longer
 89		 * serving this vnode, then we can't switch to another server
 90		 * and have to return an error.
 91		 */
 92		if (op->flags & AFS_OPERATION_CUR_ONLY) {
 93			afs_op_set_error(op, -ESTALE);
 94			trace_afs_rotate(op, afs_rotate_trace_stale_lock, 0);
 95			return false;
 96		}
 97
 98		/* Note that the callback promise is effectively broken */
 99		write_seqlock(&vnode->cb_lock);
100		ASSERTCMP(cb_server, ==, vnode->cb_server);
101		vnode->cb_server = NULL;
102		if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE)
103			vnode->cb_break++;
104		write_sequnlock(&vnode->cb_lock);
 
 
 
105	}
106
107found_interest:
108	return true;
109}
110
111/*
112 * Post volume busy note.
113 */
114static void afs_busy(struct afs_operation *op, u32 abort_code)
115{
116	const char *m;
117
118	switch (abort_code) {
119	case VOFFLINE:		m = "offline";		break;
120	case VRESTARTING:	m = "restarting";	break;
121	case VSALVAGING:	m = "being salvaged";	break;
122	default:		m = "busy";		break;
123	}
124
125	pr_notice("kAFS: Volume %llu '%s' on server %pU is %s\n",
126		  op->volume->vid, op->volume->name, &op->server->uuid, m);
127}
128
129/*
130 * Sleep and retry the operation to the same fileserver.
131 */
132static bool afs_sleep_and_retry(struct afs_operation *op)
133{
134	trace_afs_rotate(op, afs_rotate_trace_busy_sleep, 0);
135	if (!(op->flags & AFS_OPERATION_UNINTR)) {
136		msleep_interruptible(1000);
137		if (signal_pending(current)) {
138			afs_op_set_error(op, -ERESTARTSYS);
139			return false;
140		}
141	} else {
142		msleep(1000);
143	}
144
145	return true;
146}
147
148/*
149 * Select the fileserver to use.  May be called multiple times to rotate
150 * through the fileservers.
151 */
152bool afs_select_fileserver(struct afs_operation *op)
153{
154	struct afs_addr_list *alist;
155	struct afs_server *server;
156	struct afs_vnode *vnode = op->file[0].vnode;
157	unsigned long set, failed;
158	s32 abort_code = op->call_abort_code;
159	int best_prio = 0;
160	int error = op->call_error, addr_index, i, j;
161
162	op->nr_iterations++;
163
164	_enter("OP=%x+%x,%llx,%u{%lx},%u{%lx},%d,%d",
165	       op->debug_id, op->nr_iterations, op->volume->vid,
166	       op->server_index, op->untried_servers,
167	       op->addr_index, op->addr_tried,
168	       error, abort_code);
169
170	if (op->flags & AFS_OPERATION_STOP) {
171		trace_afs_rotate(op, afs_rotate_trace_stopped, 0);
172		_leave(" = f [stopped]");
173		return false;
174	}
175
176	if (op->nr_iterations == 0)
177		goto start;
178
179	WRITE_ONCE(op->estate->addresses->addrs[op->addr_index].last_error, error);
180	trace_afs_rotate(op, afs_rotate_trace_iter, op->call_error);
181
182	/* Evaluate the result of the previous operation, if there was one. */
183	switch (op->call_error) {
 
 
 
184	case 0:
185		clear_bit(AFS_SE_VOLUME_OFFLINE,
186			  &op->server_list->servers[op->server_index].flags);
187		clear_bit(AFS_SE_VOLUME_BUSY,
188			  &op->server_list->servers[op->server_index].flags);
189		op->cumul_error.responded = true;
190
191		/* We succeeded, but we may need to redo the op from another
192		 * server if we're looking at a set of RO volumes where some of
193		 * the servers have not yet been brought up to date lest we
194		 * regress the data.  We only switch to the new version once
195		 * >=50% of the servers are updated.
196		 */
197		error = afs_update_volume_state(op);
198		if (error != 0) {
199			if (error == 1) {
200				afs_sleep_and_retry(op);
201				goto restart_from_beginning;
202			}
203			afs_op_set_error(op, error);
204			goto failed;
205		}
206		fallthrough;
207	default:
208		/* Success or local failure.  Stop. */
209		afs_op_set_error(op, error);
210		op->flags |= AFS_OPERATION_STOP;
211		trace_afs_rotate(op, afs_rotate_trace_stop, error);
212		_leave(" = f [okay/local %d]", error);
213		return false;
214
215	case -ECONNABORTED:
216		/* The far side rejected the operation on some grounds.  This
217		 * might involve the server being busy or the volume having been moved.
218		 *
219		 * Note that various V* errors should not be sent to a cache manager
220		 * by a fileserver as they should be translated to more modern UAE*
221		 * errors instead.  IBM AFS and OpenAFS fileservers, however, do leak
222		 * these abort codes.
223		 */
224		trace_afs_rotate(op, afs_rotate_trace_aborted, abort_code);
225		op->cumul_error.responded = true;
226		switch (abort_code) {
227		case VNOVOL:
228			/* This fileserver doesn't know about the volume.
229			 * - May indicate that the VL is wrong - retry once and compare
230			 *   the results.
231			 * - May indicate that the fileserver couldn't attach to the vol.
232			 * - The volume might have been temporarily removed so that it can
233			 *   be replaced by a volume restore.  "vos" might have ended one
234			 *   transaction and has yet to create the next.
235			 * - The volume might not be blessed or might not be in-service
236			 *   (administrative action).
237			 */
238			if (op->flags & AFS_OPERATION_VNOVOL) {
239				afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
240				goto next_server;
241			}
242
243			write_lock(&op->volume->servers_lock);
244			op->server_list->vnovol_mask |= 1 << op->server_index;
245			write_unlock(&op->volume->servers_lock);
246
247			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
248			error = afs_check_volume_status(op->volume, op);
249			if (error < 0) {
250				afs_op_set_error(op, error);
251				goto failed;
252			}
253
254			if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
255				afs_op_set_error(op, -ENOMEDIUM);
256				goto failed;
257			}
258
259			/* If the server list didn't change, then assume that
260			 * it's the fileserver having trouble.
261			 */
262			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
263				afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
264				goto next_server;
265			}
266
267			/* Try again */
268			op->flags |= AFS_OPERATION_VNOVOL;
269			_leave(" = t [vnovol]");
270			return true;
271
 
272		case VVOLEXISTS:
273		case VONLINE:
274			/* These should not be returned from the fileserver. */
275			pr_warn("Fileserver returned unexpected abort %d\n",
276				abort_code);
277			afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
278			goto next_server;
279
280		case VNOSERVICE:
281			/* Prior to AFS 3.2 VNOSERVICE was returned from the fileserver
282			 * if the volume was neither in-service nor administratively
283			 * blessed.  All usage was replaced by VNOVOL because AFS 3.1 and
284			 * earlier cache managers did not handle VNOSERVICE and assumed
285			 * it was the client OSes errno 105.
286			 *
287			 * Starting with OpenAFS 1.4.8 VNOSERVICE was repurposed as the
288			 * fileserver idle dead time error which was sent in place of
289			 * RX_CALL_TIMEOUT (-3).  The error was intended to be sent if the
290			 * fileserver took too long to send a reply to the client.
291			 * RX_CALL_TIMEOUT would have caused the cache manager to mark the
292			 * server down whereas VNOSERVICE since AFS 3.2 would cause cache
293			 * manager to temporarily (up to 15 minutes) mark the volume
294			 * instance as unusable.
295			 *
296			 * The idle dead logic resulted in cache inconsistency since a
297			 * state changing call that the cache manager assumed was dead
298			 * could still be processed to completion by the fileserver.  This
299			 * logic was removed in OpenAFS 1.8.0 and VNOSERVICE is no longer
300			 * returned.  However, many 1.4.8 through 1.6.24 fileservers are
301			 * still in existence.
302			 *
303			 * AuriStorFS fileservers have never returned VNOSERVICE.
304			 *
305			 * VNOSERVICE should be treated as an alias for RX_CALL_TIMEOUT.
306			 */
307		case RX_CALL_TIMEOUT:
308			afs_op_accumulate_error(op, -ETIMEDOUT, abort_code);
309			goto next_server;
310
311		case VSALVAGING: /* This error should not be leaked to cache managers
312				  * but is from OpenAFS demand attach fileservers.
313				  * It should be treated as an alias for VOFFLINE.
314				  */
315		case VSALVAGE: /* VSALVAGE should be treated as a synonym of VOFFLINE */
316		case VOFFLINE:
317			/* The volume is in use by the volserver or another volume utility
318			 * for an operation that might alter the contents.  The volume is
319			 * expected to come back but it might take a long time (could be
320			 * days).
321			 */
322			if (!test_and_set_bit(AFS_SE_VOLUME_OFFLINE,
323					      &op->server_list->servers[op->server_index].flags)) {
324				afs_busy(op, abort_code);
325				clear_bit(AFS_SE_VOLUME_BUSY,
326					  &op->server_list->servers[op->server_index].flags);
327			}
328			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
329				afs_op_set_error(op, -EADV);
 
 
 
 
330				goto failed;
331			}
332			goto busy;
333
334		case VRESTARTING: /* The fileserver is either shutting down or starting up. */
 
335		case VBUSY:
336			/* The volume is in use by the volserver or another volume
337			 * utility for an operation that is not expected to alter the
338			 * contents of the volume.  VBUSY does not need to be returned
339			 * for a ROVOL or BACKVOL bound to an ITBusy volserver
340			 * transaction.  The fileserver is permitted to continue serving
341			 * content from ROVOLs and BACKVOLs during an ITBusy transaction
342			 * because the content will not change.  However, many fileserver
343			 * releases do return VBUSY for ROVOL and BACKVOL instances under
344			 * many circumstances.
345			 *
346			 * Retry after going round all the servers unless we have a file
347			 * lock we need to maintain.
348			 */
349			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
350				afs_op_set_error(op, -EBUSY);
351				goto failed;
352			}
353			if (!test_and_set_bit(AFS_SE_VOLUME_BUSY,
354					      &op->server_list->servers[op->server_index].flags)) {
355				afs_busy(op, abort_code);
356				clear_bit(AFS_SE_VOLUME_OFFLINE,
357					  &op->server_list->servers[op->server_index].flags);
358			}
359		busy:
360			if (op->flags & AFS_OPERATION_CUR_ONLY) {
361				if (!afs_sleep_and_retry(op))
362					goto failed;
363
364				/* Retry with same server & address */
365				_leave(" = t [vbusy]");
366				return true;
367			}
368
369			op->flags |= AFS_OPERATION_VBUSY;
370			goto next_server;
371
372		case VMOVED:
373			/* The volume migrated to another server.  We consider
374			 * consider all locks and callbacks broken and request
375			 * an update from the VLDB.
376			 *
377			 * We also limit the number of VMOVED hops we will
378			 * honour, just in case someone sets up a loop.
379			 */
380			if (op->flags & AFS_OPERATION_VMOVED) {
381				afs_op_set_error(op, -EREMOTEIO);
382				goto failed;
383			}
384			op->flags |= AFS_OPERATION_VMOVED;
385
386			set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
387			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
388			error = afs_check_volume_status(op->volume, op);
389			if (error < 0) {
390				afs_op_set_error(op, error);
391				goto failed;
392			}
393
394			/* If the server list didn't change, then the VLDB is
395			 * out of sync with the fileservers.  This is hopefully
396			 * a temporary condition, however, so we don't want to
397			 * permanently block access to the file.
398			 *
399			 * TODO: Try other fileservers if we can.
400			 *
401			 * TODO: Retry a few times with sleeps.
402			 */
403			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
404				afs_op_accumulate_error(op, -ENOMEDIUM, abort_code);
405				goto failed;
406			}
407
408			goto restart_from_beginning;
409
410		case UAEIO:
411		case VIO:
412			afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
413			if (op->volume->type != AFSVL_RWVOL)
414				goto next_server;
415			goto failed;
416
417		case VDISKFULL:
418		case UAENOSPC:
419			/* The partition is full.  Only applies to RWVOLs.
420			 * Translate locally and return ENOSPC.
421			 * No replicas to failover to.
422			 */
423			afs_op_set_error(op, -ENOSPC);
424			goto failed_but_online;
425
426		case VOVERQUOTA:
427		case UAEDQUOT:
428			/* Volume is full.  Only applies to RWVOLs.
429			 * Translate locally and return EDQUOT.
430			 * No replicas to failover to.
431			 */
432			afs_op_set_error(op, -EDQUOT);
433			goto failed_but_online;
434
435		default:
436			afs_op_accumulate_error(op, error, abort_code);
437		failed_but_online:
438			clear_bit(AFS_SE_VOLUME_OFFLINE,
439				  &op->server_list->servers[op->server_index].flags);
440			clear_bit(AFS_SE_VOLUME_BUSY,
441				  &op->server_list->servers[op->server_index].flags);
442			goto failed;
443		}
444
445	case -ETIMEDOUT:
446	case -ETIME:
447		if (afs_op_error(op) != -EDESTADDRREQ)
448			goto iterate_address;
449		fallthrough;
450	case -ERFKILL:
451	case -EADDRNOTAVAIL:
452	case -ENETUNREACH:
453	case -EHOSTUNREACH:
454	case -EHOSTDOWN:
455	case -ECONNREFUSED:
456		_debug("no conn");
457		afs_op_accumulate_error(op, error, 0);
458		goto iterate_address;
459
460	case -ENETRESET:
461		pr_warn("kAFS: Peer reset %s (op=%x)\n",
462			op->type ? op->type->name : "???", op->debug_id);
463		fallthrough;
464	case -ECONNRESET:
465		_debug("call reset");
466		afs_op_set_error(op, error);
467		goto failed;
468	}
469
470restart_from_beginning:
471	trace_afs_rotate(op, afs_rotate_trace_restart, 0);
472	_debug("restart");
473	op->estate = NULL;
474	op->server = NULL;
475	afs_clear_server_states(op);
476	op->server_states = NULL;
477	afs_put_serverlist(op->net, op->server_list);
478	op->server_list = NULL;
479start:
480	_debug("start");
481	ASSERTCMP(op->estate, ==, NULL);
482	/* See if we need to do an update of the volume record.  Note that the
483	 * volume may have moved or even have been deleted.
484	 */
485	error = afs_check_volume_status(op->volume, op);
486	trace_afs_rotate(op, afs_rotate_trace_check_vol_status, error);
487	if (error < 0) {
488		afs_op_set_error(op, error);
489		goto failed;
490	}
491
492	if (!afs_start_fs_iteration(op, vnode))
493		goto failed;
494
495	_debug("__ VOL %llx __", op->volume->vid);
 
 
 
496
497pick_server:
498	_debug("pick [%lx]", op->untried_servers);
499	ASSERTCMP(op->estate, ==, NULL);
500
501	error = afs_wait_for_fs_probes(op, op->server_states,
502				       !(op->flags & AFS_OPERATION_UNINTR));
503	switch (error) {
504	case 0: /* No untried responsive servers and no outstanding probes */
505		trace_afs_rotate(op, afs_rotate_trace_probe_none, 0);
506		goto no_more_servers;
507	case 1: /* Got a response */
508		trace_afs_rotate(op, afs_rotate_trace_probe_response, 0);
509		break;
510	case 2: /* Probe data superseded */
511		trace_afs_rotate(op, afs_rotate_trace_probe_superseded, 0);
512		goto restart_from_beginning;
513	default:
514		trace_afs_rotate(op, afs_rotate_trace_probe_error, error);
515		afs_op_set_error(op, error);
516		goto failed;
517	}
518
519	/* Pick the untried server with the highest priority untried endpoint.
520	 * If we have outstanding callbacks, we stick with the server we're
521	 * already using if we can.
522	 */
523	if (op->server) {
524		_debug("server %u", op->server_index);
525		if (test_bit(op->server_index, &op->untried_servers))
526			goto selected_server;
527		op->server = NULL;
528		_debug("no server");
 
529	}
530
531	rcu_read_lock();
532	op->server_index = -1;
533	best_prio = -1;
534	for (i = 0; i < op->server_list->nr_servers; i++) {
535		struct afs_endpoint_state *es;
536		struct afs_server_entry *se = &op->server_list->servers[i];
537		struct afs_addr_list *sal;
538		struct afs_server *s = se->server;
539
540		if (!test_bit(i, &op->untried_servers) ||
541		    test_bit(AFS_SE_EXCLUDED, &se->flags) ||
542		    !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
543			continue;
544		es = op->server_states[i].endpoint_state;
545		sal = es->addresses;
546
547		afs_get_address_preferences_rcu(op->net, sal);
548		for (j = 0; j < sal->nr_addrs; j++) {
549			if (es->failed_set & (1 << j))
550				continue;
551			if (!sal->addrs[j].peer)
552				continue;
553			if (sal->addrs[j].prio > best_prio) {
554				op->server_index = i;
555				best_prio = sal->addrs[j].prio;
556			}
557		}
558	}
559	rcu_read_unlock();
560
561	if (op->server_index == -1)
562		goto no_more_servers;
563
564selected_server:
565	trace_afs_rotate(op, afs_rotate_trace_selected_server, best_prio);
566	_debug("use %d prio %u", op->server_index, best_prio);
567	__clear_bit(op->server_index, &op->untried_servers);
568
569	/* We're starting on a different fileserver from the list.  We need to
570	 * check it, create a callback intercept, find its address list and
571	 * probe its capabilities before we use it.
572	 */
573	ASSERTCMP(op->estate, ==, NULL);
574	server = op->server_list->servers[op->server_index].server;
575
576	if (!afs_check_server_record(op, server, op->key))
577		goto failed;
578
579	_debug("USING SERVER: %pU", &server->uuid);
580
581	op->flags |= AFS_OPERATION_RETRY_SERVER;
582	op->server = server;
583	if (vnode->cb_server != server) {
584		vnode->cb_server = server;
585		vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
586		atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
587	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
589retry_server:
590	op->addr_tried = 0;
591	op->addr_index = -1;
592
593iterate_address:
 
594	/* Iterate over the current server's address list to try and find an
595	 * address on which it will respond to us.
596	 */
597	op->estate = op->server_states[op->server_index].endpoint_state;
598	set = READ_ONCE(op->estate->responsive_set);
599	failed = READ_ONCE(op->estate->failed_set);
600	_debug("iterate ES=%x rs=%lx fs=%lx", op->estate->probe_seq, set, failed);
601	set &= ~(failed | op->addr_tried);
602	trace_afs_rotate(op, afs_rotate_trace_iterate_addr, set);
603	if (!set)
604		goto wait_for_more_probe_results;
605
606	alist = op->estate->addresses;
607	best_prio = -1;
608	addr_index = 0;
609	for (i = 0; i < alist->nr_addrs; i++) {
610		if (!(set & (1 << i)))
611			continue;
612		if (alist->addrs[i].prio > best_prio) {
613			addr_index = i;
614			best_prio = alist->addrs[i].prio;
615		}
616	}
617
618	alist->preferred = addr_index;
619
620	op->addr_index = addr_index;
621	set_bit(addr_index, &op->addr_tried);
622
623	op->volsync.creation = TIME64_MIN;
624	op->volsync.update = TIME64_MIN;
625	op->call_responded = false;
626	_debug("address [%u] %u/%u %pISp",
627	       op->server_index, addr_index, alist->nr_addrs,
628	       rxrpc_kernel_remote_addr(alist->addrs[op->addr_index].peer));
629	_leave(" = t");
630	return true;
631
632wait_for_more_probe_results:
633	error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
634					  !(op->flags & AFS_OPERATION_UNINTR));
635	if (error == 1)
636		goto iterate_address;
637	if (!error)
638		goto restart_from_beginning;
639
640	/* We've now had a failure to respond on all of a server's addresses -
641	 * immediately probe them again and consider retrying the server.
642	 */
643	trace_afs_rotate(op, afs_rotate_trace_probe_fileserver, 0);
644	afs_probe_fileserver(op->net, op->server);
645	if (op->flags & AFS_OPERATION_RETRY_SERVER) {
646		error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
647						  !(op->flags & AFS_OPERATION_UNINTR));
648		switch (error) {
649		case 1:
650			op->flags &= ~AFS_OPERATION_RETRY_SERVER;
651			trace_afs_rotate(op, afs_rotate_trace_retry_server, 1);
652			goto retry_server;
653		case 0:
654			trace_afs_rotate(op, afs_rotate_trace_retry_server, 0);
655			goto restart_from_beginning;
656		case -ERESTARTSYS:
657			afs_op_set_error(op, error);
658			goto failed;
659		case -ETIME:
660		case -EDESTADDRREQ:
661			goto next_server;
662		}
663	}
664
665next_server:
666	trace_afs_rotate(op, afs_rotate_trace_next_server, 0);
667	_debug("next");
668	op->estate = NULL;
669	goto pick_server;
670
671no_more_servers:
672	/* That's all the servers poked to no good effect.  Try again if some
673	 * of them were busy.
674	 */
675	trace_afs_rotate(op, afs_rotate_trace_no_more_servers, 0);
676	if (op->flags & AFS_OPERATION_VBUSY) {
677		afs_sleep_and_retry(op);
678		op->flags &= ~AFS_OPERATION_VBUSY;
679		goto restart_from_beginning;
680	}
681
682	rcu_read_lock();
683	for (i = 0; i < op->server_list->nr_servers; i++) {
684		struct afs_endpoint_state *estate;
 
685
686		estate = op->server_states[i].endpoint_state;
687		error = READ_ONCE(estate->error);
688		if (error < 0)
689			afs_op_accumulate_error(op, error, estate->abort_code);
690	}
691	rcu_read_unlock();
692
 
 
 
 
693failed:
694	trace_afs_rotate(op, afs_rotate_trace_failed, 0);
695	op->flags |= AFS_OPERATION_STOP;
696	op->estate = NULL;
697	_leave(" = f [failed %d]", afs_op_error(op));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
698	return false;
699}
700
701/*
702 * Dump cursor state in the case of the error being EDESTADDRREQ.
703 */
704void afs_dump_edestaddrreq(const struct afs_operation *op)
705{
706	static int count;
707	int i;
708
709	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
710		return;
711	count++;
712
713	rcu_read_lock();
714
715	pr_notice("EDESTADDR occurred\n");
716	pr_notice("OP: cbb=%x cbb2=%x fl=%x err=%hd\n",
717		  op->file[0].cb_break_before,
718		  op->file[1].cb_break_before, op->flags, op->cumul_error.error);
719	pr_notice("OP: ut=%lx ix=%d ni=%u\n",
720		  op->untried_servers, op->server_index, op->nr_iterations);
721	pr_notice("OP: call  er=%d ac=%d r=%u\n",
722		  op->call_error, op->call_abort_code, op->call_responded);
723
724	if (op->server_list) {
725		const struct afs_server_list *sl = op->server_list;
726
727		pr_notice("FC: SL nr=%u vnov=%hx\n",
728			  sl->nr_servers, sl->vnovol_mask);
729		for (i = 0; i < sl->nr_servers; i++) {
730			const struct afs_server *s = sl->servers[i].server;
731			const struct afs_endpoint_state *e =
732				rcu_dereference(s->endpoint_state);
733			const struct afs_addr_list *a = e->addresses;
734
735			pr_notice("FC: server fl=%lx av=%u %pU\n",
736				  s->flags, s->addr_version, &s->uuid);
737			pr_notice("FC:  - pq=%x R=%lx F=%lx\n",
738				  e->probe_seq, e->responsive_set, e->failed_set);
739			if (a) {
740				pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
741					  a->version,
742					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
743					  a->preferred);
744				if (a == e->addresses)
 
 
745					pr_notice("FC:  - current\n");
746			}
747		}
748	}
749
750	pr_notice("AC: t=%lx ax=%d\n", op->addr_tried, op->addr_index);
 
 
751	rcu_read_unlock();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
752}

  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/* Handle fileserver selection and rotation.
  3 *
  4 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
  5 * Written by David Howells (dhowells@redhat.com)
  6 */
  7
  8#include <linux/kernel.h>
  9#include <linux/slab.h>
 10#include <linux/fs.h>
 11#include <linux/sched.h>
 12#include <linux/delay.h>
 13#include <linux/sched/signal.h>
 14#include "internal.h"
 15#include "afs_fs.h"
 
 16
 17/*
 18 * Begin an operation on the fileserver.
 19 *
 20 * Fileserver operations are serialised on the server by vnode, so we serialise
 21 * them here also using the io_lock.
 22 */
 23bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 24			       struct key *key, bool intr)
 25{
 26	memset(fc, 0, sizeof(*fc));
 27	fc->vnode = vnode;
 28	fc->key = key;
 29	fc->ac.error = SHRT_MAX;
 30	fc->error = -EDESTADDRREQ;
 31
 32	if (intr) {
 33		fc->flags |= AFS_FS_CURSOR_INTR;
 34		if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
 35			fc->error = -EINTR;
 36			fc->flags |= AFS_FS_CURSOR_STOP;
 37			return false;
 38		}
 39	} else {
 40		mutex_lock(&vnode->io_lock);
 41	}
 42
 43	if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
 44		fc->flags |= AFS_FS_CURSOR_CUR_ONLY;
 45	return true;
 46}
 47
 48/*
 49 * Begin iteration through a server list, starting with the vnode's last used
 50 * server if possible, or the last recorded good server if not.
 51 */
 52static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
 53				   struct afs_vnode *vnode)
 54{
 55	struct afs_cb_interest *cbi;
 
 56	int i;
 57
 58	read_lock(&vnode->volume->servers_lock);
 59	fc->server_list = afs_get_serverlist(vnode->volume->servers);
 60	read_unlock(&vnode->volume->servers_lock);
 61
 62	fc->untried = (1UL << fc->server_list->nr_servers) - 1;
 63	fc->index = READ_ONCE(fc->server_list->preferred);
 64
 65	cbi = rcu_dereference_protected(vnode->cb_interest,
 66					lockdep_is_held(&vnode->io_lock));
 67	if (cbi) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 68		/* See if the vnode's preferred record is still available */
 69		for (i = 0; i < fc->server_list->nr_servers; i++) {
 70			if (fc->server_list->servers[i].cb_interest == cbi) {
 71				fc->index = i;
 
 72				goto found_interest;
 73			}
 74		}
 75
 76		/* If we have a lock outstanding on a server that's no longer
 77		 * serving this vnode, then we can't switch to another server
 78		 * and have to return an error.
 79		 */
 80		if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
 81			fc->error = -ESTALE;
 
 82			return false;
 83		}
 84
 85		/* Note that the callback promise is effectively broken */
 86		write_seqlock(&vnode->cb_lock);
 87		ASSERTCMP(cbi, ==, rcu_access_pointer(vnode->cb_interest));
 88		rcu_assign_pointer(vnode->cb_interest, NULL);
 89		if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
 90			vnode->cb_break++;
 91		write_sequnlock(&vnode->cb_lock);
 92
 93		afs_put_cb_interest(afs_v2net(vnode), cbi);
 94		cbi = NULL;
 95	}
 96
 97found_interest:
 98	return true;
 99}
100
101/*
102 * Post volume busy note.
103 */
104static void afs_busy(struct afs_volume *volume, u32 abort_code)
105{
106	const char *m;
107
108	switch (abort_code) {
109	case VOFFLINE:		m = "offline";		break;
110	case VRESTARTING:	m = "restarting";	break;
111	case VSALVAGING:	m = "being salvaged";	break;
112	default:		m = "busy";		break;
113	}
114
115	pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
 
116}
117
118/*
119 * Sleep and retry the operation to the same fileserver.
120 */
121static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
122{
123	if (fc->flags & AFS_FS_CURSOR_INTR) {
 
124		msleep_interruptible(1000);
125		if (signal_pending(current)) {
126			fc->error = -ERESTARTSYS;
127			return false;
128		}
129	} else {
130		msleep(1000);
131	}
132
133	return true;
134}
135
136/*
137 * Select the fileserver to use.  May be called multiple times to rotate
138 * through the fileservers.
139 */
140bool afs_select_fileserver(struct afs_fs_cursor *fc)
141{
142	struct afs_addr_list *alist;
143	struct afs_server *server;
144	struct afs_vnode *vnode = fc->vnode;
145	struct afs_error e;
146	u32 rtt;
147	int error = fc->ac.error, i;
148
149	_enter("%lx[%d],%lx[%d],%d,%d",
150	       fc->untried, fc->index,
151	       fc->ac.tried, fc->ac.index,
152	       error, fc->ac.abort_code);
 
 
 
 
153
154	if (fc->flags & AFS_FS_CURSOR_STOP) {
 
155		_leave(" = f [stopped]");
156		return false;
157	}
158
159	fc->nr_iterations++;
 
 
 
 
160
161	/* Evaluate the result of the previous operation, if there was one. */
162	switch (error) {
163	case SHRT_MAX:
164		goto start;
165
166	case 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167	default:
168		/* Success or local failure.  Stop. */
169		fc->error = error;
170		fc->flags |= AFS_FS_CURSOR_STOP;
 
171		_leave(" = f [okay/local %d]", error);
172		return false;
173
174	case -ECONNABORTED:
175		/* The far side rejected the operation on some grounds.  This
176		 * might involve the server being busy or the volume having been moved.
 
 
 
 
 
177		 */
178		switch (fc->ac.abort_code) {
 
 
179		case VNOVOL:
180			/* This fileserver doesn't know about the volume.
181			 * - May indicate that the VL is wrong - retry once and compare
182			 *   the results.
183			 * - May indicate that the fileserver couldn't attach to the vol.
 
 
 
 
 
184			 */
185			if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
186				fc->error = -EREMOTEIO;
187				goto next_server;
188			}
189
190			write_lock(&vnode->volume->servers_lock);
191			fc->server_list->vnovol_mask |= 1 << fc->index;
192			write_unlock(&vnode->volume->servers_lock);
193
194			set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
195			error = afs_check_volume_status(vnode->volume, fc->key);
196			if (error < 0)
197				goto failed_set_error;
 
 
198
199			if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) {
200				fc->error = -ENOMEDIUM;
201				goto failed;
202			}
203
204			/* If the server list didn't change, then assume that
205			 * it's the fileserver having trouble.
206			 */
207			if (vnode->volume->servers == fc->server_list) {
208				fc->error = -EREMOTEIO;
209				goto next_server;
210			}
211
212			/* Try again */
213			fc->flags |= AFS_FS_CURSOR_VNOVOL;
214			_leave(" = t [vnovol]");
215			return true;
216
217		case VSALVAGE: /* TODO: Should this return an error or iterate? */
218		case VVOLEXISTS:
 
 
 
 
 
 
 
219		case VNOSERVICE:
220		case VONLINE:
221		case VDISKFULL:
222		case VOVERQUOTA:
223			fc->error = afs_abort_to_error(fc->ac.abort_code);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224			goto next_server;
225
 
 
 
 
 
226		case VOFFLINE:
227			if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) {
228				afs_busy(vnode->volume, fc->ac.abort_code);
229				clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
 
 
 
 
 
 
 
230			}
231			if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
232				fc->error = -EADV;
233				goto failed;
234			}
235			if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
236				fc->error = -ESTALE;
237				goto failed;
238			}
239			goto busy;
240
241		case VSALVAGING:
242		case VRESTARTING:
243		case VBUSY:
244			/* Retry after going round all the servers unless we
245			 * have a file lock we need to maintain.
 
 
 
 
 
 
 
 
 
 
246			 */
247			if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
248				fc->error = -EBUSY;
249				goto failed;
250			}
251			if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) {
252				afs_busy(vnode->volume, fc->ac.abort_code);
253				clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
 
 
254			}
255		busy:
256			if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
257				if (!afs_sleep_and_retry(fc))
258					goto failed;
259
260				 /* Retry with same server & address */
261				_leave(" = t [vbusy]");
262				return true;
263			}
264
265			fc->flags |= AFS_FS_CURSOR_VBUSY;
266			goto next_server;
267
268		case VMOVED:
269			/* The volume migrated to another server.  We consider
270			 * consider all locks and callbacks broken and request
271			 * an update from the VLDB.
272			 *
273			 * We also limit the number of VMOVED hops we will
274			 * honour, just in case someone sets up a loop.
275			 */
276			if (fc->flags & AFS_FS_CURSOR_VMOVED) {
277				fc->error = -EREMOTEIO;
278				goto failed;
279			}
280			fc->flags |= AFS_FS_CURSOR_VMOVED;
281
282			set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags);
283			set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
284			error = afs_check_volume_status(vnode->volume, fc->key);
285			if (error < 0)
286				goto failed_set_error;
 
 
287
288			/* If the server list didn't change, then the VLDB is
289			 * out of sync with the fileservers.  This is hopefully
290			 * a temporary condition, however, so we don't want to
291			 * permanently block access to the file.
292			 *
293			 * TODO: Try other fileservers if we can.
294			 *
295			 * TODO: Retry a few times with sleeps.
296			 */
297			if (vnode->volume->servers == fc->server_list) {
298				fc->error = -ENOMEDIUM;
299				goto failed;
300			}
301
302			goto restart_from_beginning;
303
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304		default:
305			clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
306			clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
307			fc->error = afs_abort_to_error(fc->ac.abort_code);
 
 
 
308			goto failed;
309		}
310
311	case -ETIMEDOUT:
312	case -ETIME:
313		if (fc->error != -EDESTADDRREQ)
314			goto iterate_address;
315		/* Fall through */
316	case -ERFKILL:
317	case -EADDRNOTAVAIL:
318	case -ENETUNREACH:
319	case -EHOSTUNREACH:
320	case -EHOSTDOWN:
321	case -ECONNREFUSED:
322		_debug("no conn");
323		fc->error = error;
324		goto iterate_address;
325
 
 
 
 
326	case -ECONNRESET:
327		_debug("call reset");
328		fc->error = error;
329		goto failed;
330	}
331
332restart_from_beginning:
 
333	_debug("restart");
334	afs_end_cursor(&fc->ac);
335	afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
336	fc->cbi = NULL;
337	afs_put_serverlist(afs_v2net(vnode), fc->server_list);
338	fc->server_list = NULL;
 
339start:
340	_debug("start");
 
341	/* See if we need to do an update of the volume record.  Note that the
342	 * volume may have moved or even have been deleted.
343	 */
344	error = afs_check_volume_status(vnode->volume, fc->key);
345	if (error < 0)
346		goto failed_set_error;
 
 
 
347
348	if (!afs_start_fs_iteration(fc, vnode))
349		goto failed;
350
351	_debug("__ VOL %llx __", vnode->volume->vid);
352	error = afs_probe_fileservers(afs_v2net(vnode), fc->key, fc->server_list);
353	if (error < 0)
354		goto failed_set_error;
355
356pick_server:
357	_debug("pick [%lx]", fc->untried);
 
358
359	error = afs_wait_for_fs_probes(fc->server_list, fc->untried);
360	if (error < 0)
361		goto failed_set_error;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
363	/* Pick the untried server with the lowest RTT.  If we have outstanding
364	 * callbacks, we stick with the server we're already using if we can.
 
365	 */
366	if (fc->cbi) {
367		_debug("cbi %u", fc->index);
368		if (test_bit(fc->index, &fc->untried))
369			goto selected_server;
370		afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
371		fc->cbi = NULL;
372		_debug("nocbi");
373	}
374
375	fc->index = -1;
376	rtt = U32_MAX;
377	for (i = 0; i < fc->server_list->nr_servers; i++) {
378		struct afs_server *s = fc->server_list->servers[i].server;
 
 
 
 
 
 
 
 
 
 
 
379
380		if (!test_bit(i, &fc->untried) || !s->probe.responded)
381			continue;
382		if (s->probe.rtt < rtt) {
383			fc->index = i;
384			rtt = s->probe.rtt;
 
 
 
 
 
385		}
386	}
 
387
388	if (fc->index == -1)
389		goto no_more_servers;
390
391selected_server:
392	_debug("use %d", fc->index);
393	__clear_bit(fc->index, &fc->untried);
 
394
395	/* We're starting on a different fileserver from the list.  We need to
396	 * check it, create a callback intercept, find its address list and
397	 * probe its capabilities before we use it.
398	 */
399	ASSERTCMP(fc->ac.alist, ==, NULL);
400	server = fc->server_list->servers[fc->index].server;
401
402	if (!afs_check_server_record(fc, server))
403		goto failed;
404
405	_debug("USING SERVER: %pU", &server->uuid);
406
407	/* Make sure we've got a callback interest record for this server.  We
408	 * have to link it in before we send the request as we can be sent a
409	 * break request before we've finished decoding the reply and
410	 * installing the vnode.
411	 */
412	error = afs_register_server_cb_interest(vnode, fc->server_list,
413						fc->index);
414	if (error < 0)
415		goto failed_set_error;
416
417	fc->cbi = afs_get_cb_interest(
418		rcu_dereference_protected(vnode->cb_interest,
419					  lockdep_is_held(&vnode->io_lock)));
420
421	read_lock(&server->fs_lock);
422	alist = rcu_dereference_protected(server->addresses,
423					  lockdep_is_held(&server->fs_lock));
424	afs_get_addrlist(alist);
425	read_unlock(&server->fs_lock);
426
427	memset(&fc->ac, 0, sizeof(fc->ac));
428
429	if (!fc->ac.alist)
430		fc->ac.alist = alist;
431	else
432		afs_put_addrlist(alist);
433
434	fc->ac.index = -1;
 
 
435
436iterate_address:
437	ASSERT(fc->ac.alist);
438	/* Iterate over the current server's address list to try and find an
439	 * address on which it will respond to us.
440	 */
441	if (!afs_iterate_addresses(&fc->ac))
442		goto next_server;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
444	_debug("address [%u] %u/%u", fc->index, fc->ac.index, fc->ac.alist->nr_addrs);
445
 
 
 
 
 
 
 
 
 
446	_leave(" = t");
447	return true;
448
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449next_server:
 
450	_debug("next");
451	afs_end_cursor(&fc->ac);
452	goto pick_server;
453
454no_more_servers:
455	/* That's all the servers poked to no good effect.  Try again if some
456	 * of them were busy.
457	 */
458	if (fc->flags & AFS_FS_CURSOR_VBUSY)
 
 
 
459		goto restart_from_beginning;
 
460
461	e.error = -EDESTADDRREQ;
462	e.responded = false;
463	for (i = 0; i < fc->server_list->nr_servers; i++) {
464		struct afs_server *s = fc->server_list->servers[i].server;
465
466		afs_prioritise_error(&e, READ_ONCE(s->probe.error),
467				     s->probe.abort_code);
 
 
468	}
 
469
470	error = e.error;
471
472failed_set_error:
473	fc->error = error;
474failed:
475	fc->flags |= AFS_FS_CURSOR_STOP;
476	afs_end_cursor(&fc->ac);
477	_leave(" = f [failed %d]", fc->error);
478	return false;
479}
480
481/*
482 * Select the same fileserver we used for a vnode before and only that
483 * fileserver.  We use this when we have a lock on that file, which is backed
484 * only by the fileserver we obtained it from.
485 */
486bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
487{
488	struct afs_vnode *vnode = fc->vnode;
489	struct afs_cb_interest *cbi;
490	struct afs_addr_list *alist;
491	int error = fc->ac.error;
492
493	_enter("");
494
495	cbi = rcu_dereference_protected(vnode->cb_interest,
496					lockdep_is_held(&vnode->io_lock));
497
498	switch (error) {
499	case SHRT_MAX:
500		if (!cbi) {
501			fc->error = -ESTALE;
502			fc->flags |= AFS_FS_CURSOR_STOP;
503			return false;
504		}
505
506		fc->cbi = afs_get_cb_interest(cbi);
507
508		read_lock(&cbi->server->fs_lock);
509		alist = rcu_dereference_protected(cbi->server->addresses,
510						  lockdep_is_held(&cbi->server->fs_lock));
511		afs_get_addrlist(alist);
512		read_unlock(&cbi->server->fs_lock);
513		if (!alist) {
514			fc->error = -ESTALE;
515			fc->flags |= AFS_FS_CURSOR_STOP;
516			return false;
517		}
518
519		memset(&fc->ac, 0, sizeof(fc->ac));
520		fc->ac.alist = alist;
521		fc->ac.index = -1;
522		goto iterate_address;
523
524	case 0:
525	default:
526		/* Success or local failure.  Stop. */
527		fc->error = error;
528		fc->flags |= AFS_FS_CURSOR_STOP;
529		_leave(" = f [okay/local %d]", error);
530		return false;
531
532	case -ECONNABORTED:
533		fc->error = afs_abort_to_error(fc->ac.abort_code);
534		fc->flags |= AFS_FS_CURSOR_STOP;
535		_leave(" = f [abort]");
536		return false;
537
538	case -ERFKILL:
539	case -EADDRNOTAVAIL:
540	case -ENETUNREACH:
541	case -EHOSTUNREACH:
542	case -EHOSTDOWN:
543	case -ECONNREFUSED:
544	case -ETIMEDOUT:
545	case -ETIME:
546		_debug("no conn");
547		fc->error = error;
548		goto iterate_address;
549	}
550
551iterate_address:
552	/* Iterate over the current server's address list to try and find an
553	 * address on which it will respond to us.
554	 */
555	if (afs_iterate_addresses(&fc->ac)) {
556		_leave(" = t");
557		return true;
558	}
559
560	afs_end_cursor(&fc->ac);
561	return false;
562}
563
564/*
565 * Dump cursor state in the case of the error being EDESTADDRREQ.
566 */
567static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
568{
569	static int count;
570	int i;
571
572	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
573		return;
574	count++;
575
576	rcu_read_lock();
577
578	pr_notice("EDESTADDR occurred\n");
579	pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n",
580		  fc->cb_break, fc->cb_break_2, fc->flags, fc->error);
581	pr_notice("FC: ut=%lx ix=%d ni=%u\n",
582		  fc->untried, fc->index, fc->nr_iterations);
583
584	if (fc->server_list) {
585		const struct afs_server_list *sl = fc->server_list;
586		pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
587			  sl->nr_servers, sl->preferred, sl->vnovol_mask);
 
 
 
 
588		for (i = 0; i < sl->nr_servers; i++) {
589			const struct afs_server *s = sl->servers[i].server;
 
 
 
 
590			pr_notice("FC: server fl=%lx av=%u %pU\n",
591				  s->flags, s->addr_version, &s->uuid);
592			if (s->addresses) {
593				const struct afs_addr_list *a =
594					rcu_dereference(s->addresses);
595				pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
596					  a->version,
597					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
598					  a->preferred);
599				pr_notice("FC:  - pr=%lx R=%lx F=%lx\n",
600					  a->probed, a->responded, a->failed);
601				if (a == fc->ac.alist)
602					pr_notice("FC:  - current\n");
603			}
604		}
605	}
606
607	pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
608		  fc->ac.tried, fc->ac.index, fc->ac.abort_code, fc->ac.error,
609		  fc->ac.responded, fc->ac.nr_iterations);
610	rcu_read_unlock();
611}
612
613/*
614 * Tidy up a filesystem cursor and unlock the vnode.
615 */
616int afs_end_vnode_operation(struct afs_fs_cursor *fc)
617{
618	struct afs_net *net = afs_v2net(fc->vnode);
619
620	if (fc->error == -EDESTADDRREQ ||
621	    fc->error == -EADDRNOTAVAIL ||
622	    fc->error == -ENETUNREACH ||
623	    fc->error == -EHOSTUNREACH)
624		afs_dump_edestaddrreq(fc);
625
626	mutex_unlock(&fc->vnode->io_lock);
627
628	afs_end_cursor(&fc->ac);
629	afs_put_cb_interest(net, fc->cbi);
630	afs_put_serverlist(net, fc->server_list);
631
632	if (fc->error == -ECONNABORTED)
633		fc->error = afs_abort_to_error(fc->ac.abort_code);
634
635	return fc->error;
636}