Linux Audio

Check our new training course

Linux kernel drivers training

Mar 31-Apr 9, 2025, special US time zones
Register
Loading...
v6.2
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/* Handle fileserver selection and rotation.
  3 *
  4 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
  5 * Written by David Howells (dhowells@redhat.com)
  6 */
  7
  8#include <linux/kernel.h>
  9#include <linux/slab.h>
 10#include <linux/fs.h>
 11#include <linux/sched.h>
 12#include <linux/delay.h>
 13#include <linux/sched/signal.h>
 14#include "internal.h"
 15#include "afs_fs.h"
 
 
 
 
 
 
 
 
 
 
 
 
 
 16
 17/*
 18 * Begin iteration through a server list, starting with the vnode's last used
 19 * server if possible, or the last recorded good server if not.
 20 */
 21static bool afs_start_fs_iteration(struct afs_operation *op,
 22				   struct afs_vnode *vnode)
 23{
 24	struct afs_server *server;
 25	void *cb_server;
 26	int i;
 27
 
 
 28	read_lock(&op->volume->servers_lock);
 29	op->server_list = afs_get_serverlist(
 30		rcu_dereference_protected(op->volume->servers,
 31					  lockdep_is_held(&op->volume->servers_lock)));
 32	read_unlock(&op->volume->servers_lock);
 33
 34	op->untried = (1UL << op->server_list->nr_servers) - 1;
 35	op->index = READ_ONCE(op->server_list->preferred);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 36
 37	cb_server = vnode->cb_server;
 38	if (cb_server) {
 39		/* See if the vnode's preferred record is still available */
 40		for (i = 0; i < op->server_list->nr_servers; i++) {
 41			server = op->server_list->servers[i].server;
 42			if (server == cb_server) {
 43				op->index = i;
 44				goto found_interest;
 45			}
 46		}
 47
 48		/* If we have a lock outstanding on a server that's no longer
 49		 * serving this vnode, then we can't switch to another server
 50		 * and have to return an error.
 51		 */
 52		if (op->flags & AFS_OPERATION_CUR_ONLY) {
 53			op->error = -ESTALE;
 
 54			return false;
 55		}
 56
 57		/* Note that the callback promise is effectively broken */
 58		write_seqlock(&vnode->cb_lock);
 59		ASSERTCMP(cb_server, ==, vnode->cb_server);
 60		vnode->cb_server = NULL;
 61		if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
 62			vnode->cb_break++;
 63		write_sequnlock(&vnode->cb_lock);
 64	}
 65
 66found_interest:
 67	return true;
 68}
 69
 70/*
 71 * Post volume busy note.
 72 */
 73static void afs_busy(struct afs_volume *volume, u32 abort_code)
 74{
 75	const char *m;
 76
 77	switch (abort_code) {
 78	case VOFFLINE:		m = "offline";		break;
 79	case VRESTARTING:	m = "restarting";	break;
 80	case VSALVAGING:	m = "being salvaged";	break;
 81	default:		m = "busy";		break;
 82	}
 83
 84	pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
 
 85}
 86
 87/*
 88 * Sleep and retry the operation to the same fileserver.
 89 */
 90static bool afs_sleep_and_retry(struct afs_operation *op)
 91{
 
 92	if (!(op->flags & AFS_OPERATION_UNINTR)) {
 93		msleep_interruptible(1000);
 94		if (signal_pending(current)) {
 95			op->error = -ERESTARTSYS;
 96			return false;
 97		}
 98	} else {
 99		msleep(1000);
100	}
101
102	return true;
103}
104
105/*
106 * Select the fileserver to use.  May be called multiple times to rotate
107 * through the fileservers.
108 */
109bool afs_select_fileserver(struct afs_operation *op)
110{
111	struct afs_addr_list *alist;
112	struct afs_server *server;
113	struct afs_vnode *vnode = op->file[0].vnode;
114	struct afs_error e;
115	u32 rtt;
116	int error = op->ac.error, i;
117
118	_enter("%lx[%d],%lx[%d],%d,%d",
119	       op->untried, op->index,
120	       op->ac.tried, op->ac.index,
121	       error, op->ac.abort_code);
 
 
 
 
122
123	if (op->flags & AFS_OPERATION_STOP) {
 
124		_leave(" = f [stopped]");
125		return false;
126	}
127
128	op->nr_iterations++;
129
130	/* Evaluate the result of the previous operation, if there was one. */
131	switch (error) {
132	case SHRT_MAX:
133		goto start;
134
 
 
 
 
 
135	case 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136	default:
137		/* Success or local failure.  Stop. */
138		op->error = error;
139		op->flags |= AFS_OPERATION_STOP;
 
140		_leave(" = f [okay/local %d]", error);
141		return false;
142
143	case -ECONNABORTED:
144		/* The far side rejected the operation on some grounds.  This
145		 * might involve the server being busy or the volume having been moved.
 
 
 
 
 
146		 */
147		switch (op->ac.abort_code) {
 
 
148		case VNOVOL:
149			/* This fileserver doesn't know about the volume.
150			 * - May indicate that the VL is wrong - retry once and compare
151			 *   the results.
152			 * - May indicate that the fileserver couldn't attach to the vol.
 
 
 
 
 
153			 */
154			if (op->flags & AFS_OPERATION_VNOVOL) {
155				op->error = -EREMOTEIO;
156				goto next_server;
157			}
158
159			write_lock(&op->volume->servers_lock);
160			op->server_list->vnovol_mask |= 1 << op->index;
161			write_unlock(&op->volume->servers_lock);
162
163			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
164			error = afs_check_volume_status(op->volume, op);
165			if (error < 0)
166				goto failed_set_error;
 
 
167
168			if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
169				op->error = -ENOMEDIUM;
170				goto failed;
171			}
172
173			/* If the server list didn't change, then assume that
174			 * it's the fileserver having trouble.
175			 */
176			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
177				op->error = -EREMOTEIO;
178				goto next_server;
179			}
180
181			/* Try again */
182			op->flags |= AFS_OPERATION_VNOVOL;
183			_leave(" = t [vnovol]");
184			return true;
185
186		case VSALVAGE: /* TODO: Should this return an error or iterate? */
187		case VVOLEXISTS:
188		case VNOSERVICE:
189		case VONLINE:
190		case VDISKFULL:
191		case VOVERQUOTA:
192			op->error = afs_abort_to_error(op->ac.abort_code);
 
193			goto next_server;
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195		case VOFFLINE:
196			if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) {
197				afs_busy(op->volume, op->ac.abort_code);
198				clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
 
 
 
 
 
 
 
199			}
200			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
201				op->error = -EADV;
202				goto failed;
203			}
204			if (op->flags & AFS_OPERATION_CUR_ONLY) {
205				op->error = -ESTALE;
206				goto failed;
207			}
208			goto busy;
209
210		case VSALVAGING:
211		case VRESTARTING:
212		case VBUSY:
213			/* Retry after going round all the servers unless we
214			 * have a file lock we need to maintain.
 
 
 
 
 
 
 
 
 
 
215			 */
216			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
217				op->error = -EBUSY;
218				goto failed;
219			}
220			if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) {
221				afs_busy(op->volume, op->ac.abort_code);
222				clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
 
 
223			}
224		busy:
225			if (op->flags & AFS_OPERATION_CUR_ONLY) {
226				if (!afs_sleep_and_retry(op))
227					goto failed;
228
229				 /* Retry with same server & address */
230				_leave(" = t [vbusy]");
231				return true;
232			}
233
234			op->flags |= AFS_OPERATION_VBUSY;
235			goto next_server;
236
237		case VMOVED:
238			/* The volume migrated to another server.  We consider
239			 * consider all locks and callbacks broken and request
240			 * an update from the VLDB.
241			 *
242			 * We also limit the number of VMOVED hops we will
243			 * honour, just in case someone sets up a loop.
244			 */
245			if (op->flags & AFS_OPERATION_VMOVED) {
246				op->error = -EREMOTEIO;
247				goto failed;
248			}
249			op->flags |= AFS_OPERATION_VMOVED;
250
251			set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
252			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
253			error = afs_check_volume_status(op->volume, op);
254			if (error < 0)
255				goto failed_set_error;
 
 
256
257			/* If the server list didn't change, then the VLDB is
258			 * out of sync with the fileservers.  This is hopefully
259			 * a temporary condition, however, so we don't want to
260			 * permanently block access to the file.
261			 *
262			 * TODO: Try other fileservers if we can.
263			 *
264			 * TODO: Retry a few times with sleeps.
265			 */
266			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
267				op->error = -ENOMEDIUM;
268				goto failed;
269			}
270
271			goto restart_from_beginning;
272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273		default:
274			clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
275			clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
276			op->error = afs_abort_to_error(op->ac.abort_code);
 
 
 
277			goto failed;
278		}
279
280	case -ETIMEDOUT:
281	case -ETIME:
282		if (op->error != -EDESTADDRREQ)
283			goto iterate_address;
284		fallthrough;
285	case -ERFKILL:
286	case -EADDRNOTAVAIL:
287	case -ENETUNREACH:
288	case -EHOSTUNREACH:
289	case -EHOSTDOWN:
290	case -ECONNREFUSED:
291		_debug("no conn");
292		op->error = error;
293		goto iterate_address;
294
295	case -ENETRESET:
296		pr_warn("kAFS: Peer reset %s (op=%x)\n",
297			op->type ? op->type->name : "???", op->debug_id);
298		fallthrough;
299	case -ECONNRESET:
300		_debug("call reset");
301		op->error = error;
302		goto failed;
303	}
304
305restart_from_beginning:
 
306	_debug("restart");
307	afs_end_cursor(&op->ac);
308	op->server = NULL;
 
 
309	afs_put_serverlist(op->net, op->server_list);
310	op->server_list = NULL;
311start:
312	_debug("start");
 
313	/* See if we need to do an update of the volume record.  Note that the
314	 * volume may have moved or even have been deleted.
315	 */
316	error = afs_check_volume_status(op->volume, op);
317	if (error < 0)
318		goto failed_set_error;
 
 
 
319
320	if (!afs_start_fs_iteration(op, vnode))
321		goto failed;
322
323	_debug("__ VOL %llx __", op->volume->vid);
324
325pick_server:
326	_debug("pick [%lx]", op->untried);
 
327
328	error = afs_wait_for_fs_probes(op->server_list, op->untried);
329	if (error < 0)
330		goto failed_set_error;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
332	/* Pick the untried server with the lowest RTT.  If we have outstanding
333	 * callbacks, we stick with the server we're already using if we can.
 
334	 */
335	if (op->server) {
336		_debug("server %u", op->index);
337		if (test_bit(op->index, &op->untried))
338			goto selected_server;
339		op->server = NULL;
340		_debug("no server");
341	}
342
343	op->index = -1;
344	rtt = U32_MAX;
 
345	for (i = 0; i < op->server_list->nr_servers; i++) {
346		struct afs_server *s = op->server_list->servers[i].server;
 
 
 
347
348		if (!test_bit(i, &op->untried) ||
 
349		    !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
350			continue;
351		if (s->probe.rtt < rtt) {
352			op->index = i;
353			rtt = s->probe.rtt;
 
 
 
 
 
 
 
 
 
 
354		}
355	}
 
356
357	if (op->index == -1)
358		goto no_more_servers;
359
360selected_server:
361	_debug("use %d", op->index);
362	__clear_bit(op->index, &op->untried);
 
363
364	/* We're starting on a different fileserver from the list.  We need to
365	 * check it, create a callback intercept, find its address list and
366	 * probe its capabilities before we use it.
367	 */
368	ASSERTCMP(op->ac.alist, ==, NULL);
369	server = op->server_list->servers[op->index].server;
370
371	if (!afs_check_server_record(op, server))
372		goto failed;
373
374	_debug("USING SERVER: %pU", &server->uuid);
375
376	op->flags |= AFS_OPERATION_RETRY_SERVER;
377	op->server = server;
378	if (vnode->cb_server != server) {
379		vnode->cb_server = server;
380		vnode->cb_s_break = server->cb_s_break;
381		vnode->cb_fs_s_break = atomic_read(&server->cell->fs_s_break);
382		vnode->cb_v_break = vnode->volume->cb_v_break;
383		clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
384	}
385
386	read_lock(&server->fs_lock);
387	alist = rcu_dereference_protected(server->addresses,
388					  lockdep_is_held(&server->fs_lock));
389	afs_get_addrlist(alist);
390	read_unlock(&server->fs_lock);
391
392retry_server:
393	memset(&op->ac, 0, sizeof(op->ac));
394
395	if (!op->ac.alist)
396		op->ac.alist = alist;
397	else
398		afs_put_addrlist(alist);
399
400	op->ac.index = -1;
401
402iterate_address:
403	ASSERT(op->ac.alist);
404	/* Iterate over the current server's address list to try and find an
405	 * address on which it will respond to us.
406	 */
407	if (!afs_iterate_addresses(&op->ac))
408		goto out_of_addresses;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
410	_debug("address [%u] %u/%u %pISp",
411	       op->index, op->ac.index, op->ac.alist->nr_addrs,
412	       &op->ac.alist->addrs[op->ac.index].transport);
 
413
 
 
 
 
 
 
414	_leave(" = t");
415	return true;
416
417out_of_addresses:
 
 
 
 
 
 
 
418	/* We've now had a failure to respond on all of a server's addresses -
419	 * immediately probe them again and consider retrying the server.
420	 */
 
421	afs_probe_fileserver(op->net, op->server);
422	if (op->flags & AFS_OPERATION_RETRY_SERVER) {
423		alist = op->ac.alist;
424		error = afs_wait_for_one_fs_probe(
425			op->server, !(op->flags & AFS_OPERATION_UNINTR));
426		switch (error) {
427		case 0:
428			op->flags &= ~AFS_OPERATION_RETRY_SERVER;
 
429			goto retry_server;
 
 
 
430		case -ERESTARTSYS:
431			goto failed_set_error;
 
432		case -ETIME:
433		case -EDESTADDRREQ:
434			goto next_server;
435		}
436	}
437
438next_server:
 
439	_debug("next");
440	afs_end_cursor(&op->ac);
441	goto pick_server;
442
443no_more_servers:
444	/* That's all the servers poked to no good effect.  Try again if some
445	 * of them were busy.
446	 */
447	if (op->flags & AFS_OPERATION_VBUSY)
 
 
 
448		goto restart_from_beginning;
 
449
450	e.error = -EDESTADDRREQ;
451	e.responded = false;
452	for (i = 0; i < op->server_list->nr_servers; i++) {
453		struct afs_server *s = op->server_list->servers[i].server;
454
455		afs_prioritise_error(&e, READ_ONCE(s->probe.error),
456				     s->probe.abort_code);
 
 
457	}
 
458
459	error = e.error;
460
461failed_set_error:
462	op->error = error;
463failed:
 
464	op->flags |= AFS_OPERATION_STOP;
465	afs_end_cursor(&op->ac);
466	_leave(" = f [failed %d]", op->error);
467	return false;
468}
469
470/*
471 * Dump cursor state in the case of the error being EDESTADDRREQ.
472 */
473void afs_dump_edestaddrreq(const struct afs_operation *op)
474{
475	static int count;
476	int i;
477
478	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
479		return;
480	count++;
481
482	rcu_read_lock();
483
484	pr_notice("EDESTADDR occurred\n");
485	pr_notice("FC: cbb=%x cbb2=%x fl=%x err=%hd\n",
486		  op->file[0].cb_break_before,
487		  op->file[1].cb_break_before, op->flags, op->error);
488	pr_notice("FC: ut=%lx ix=%d ni=%u\n",
489		  op->untried, op->index, op->nr_iterations);
 
 
490
491	if (op->server_list) {
492		const struct afs_server_list *sl = op->server_list;
493		pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
494			  sl->nr_servers, sl->preferred, sl->vnovol_mask);
 
495		for (i = 0; i < sl->nr_servers; i++) {
496			const struct afs_server *s = sl->servers[i].server;
 
 
 
 
497			pr_notice("FC: server fl=%lx av=%u %pU\n",
498				  s->flags, s->addr_version, &s->uuid);
499			if (s->addresses) {
500				const struct afs_addr_list *a =
501					rcu_dereference(s->addresses);
502				pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
503					  a->version,
504					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
505					  a->preferred);
506				pr_notice("FC:  - R=%lx F=%lx\n",
507					  a->responded, a->failed);
508				if (a == op->ac.alist)
509					pr_notice("FC:  - current\n");
510			}
511		}
512	}
513
514	pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
515		  op->ac.tried, op->ac.index, op->ac.abort_code, op->ac.error,
516		  op->ac.responded, op->ac.nr_iterations);
517	rcu_read_unlock();
518}
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/* Handle fileserver selection and rotation.
  3 *
  4 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
  5 * Written by David Howells (dhowells@redhat.com)
  6 */
  7
  8#include <linux/kernel.h>
  9#include <linux/slab.h>
 10#include <linux/fs.h>
 11#include <linux/sched.h>
 12#include <linux/delay.h>
 13#include <linux/sched/signal.h>
 14#include "internal.h"
 15#include "afs_fs.h"
 16#include "protocol_uae.h"
 17
 18void afs_clear_server_states(struct afs_operation *op)
 19{
 20	unsigned int i;
 21
 22	if (op->server_states) {
 23		for (i = 0; i < op->server_list->nr_servers; i++)
 24			afs_put_endpoint_state(op->server_states[i].endpoint_state,
 25					       afs_estate_trace_put_server_state);
 26		kfree(op->server_states);
 27	}
 28}
 29
 30/*
 31 * Begin iteration through a server list, starting with the vnode's last used
 32 * server if possible, or the last recorded good server if not.
 33 */
 34static bool afs_start_fs_iteration(struct afs_operation *op,
 35				   struct afs_vnode *vnode)
 36{
 37	struct afs_server *server;
 38	void *cb_server;
 39	int i;
 40
 41	trace_afs_rotate(op, afs_rotate_trace_start, 0);
 42
 43	read_lock(&op->volume->servers_lock);
 44	op->server_list = afs_get_serverlist(
 45		rcu_dereference_protected(op->volume->servers,
 46					  lockdep_is_held(&op->volume->servers_lock)));
 47	read_unlock(&op->volume->servers_lock);
 48
 49	op->server_states = kcalloc(op->server_list->nr_servers, sizeof(op->server_states[0]),
 50				    GFP_KERNEL);
 51	if (!op->server_states) {
 52		afs_op_nomem(op);
 53		trace_afs_rotate(op, afs_rotate_trace_nomem, 0);
 54		return false;
 55	}
 56
 57	rcu_read_lock();
 58	for (i = 0; i < op->server_list->nr_servers; i++) {
 59		struct afs_endpoint_state *estate;
 60		struct afs_server_state *s = &op->server_states[i];
 61
 62		server = op->server_list->servers[i].server;
 63		estate = rcu_dereference(server->endpoint_state);
 64		s->endpoint_state = afs_get_endpoint_state(estate,
 65							   afs_estate_trace_get_server_state);
 66		s->probe_seq = estate->probe_seq;
 67		s->untried_addrs = (1UL << estate->addresses->nr_addrs) - 1;
 68		init_waitqueue_entry(&s->probe_waiter, current);
 69		afs_get_address_preferences(op->net, estate->addresses);
 70	}
 71	rcu_read_unlock();
 72
 73
 74	op->untried_servers = (1UL << op->server_list->nr_servers) - 1;
 75	op->server_index = -1;
 76
 77	cb_server = vnode->cb_server;
 78	if (cb_server) {
 79		/* See if the vnode's preferred record is still available */
 80		for (i = 0; i < op->server_list->nr_servers; i++) {
 81			server = op->server_list->servers[i].server;
 82			if (server == cb_server) {
 83				op->server_index = i;
 84				goto found_interest;
 85			}
 86		}
 87
 88		/* If we have a lock outstanding on a server that's no longer
 89		 * serving this vnode, then we can't switch to another server
 90		 * and have to return an error.
 91		 */
 92		if (op->flags & AFS_OPERATION_CUR_ONLY) {
 93			afs_op_set_error(op, -ESTALE);
 94			trace_afs_rotate(op, afs_rotate_trace_stale_lock, 0);
 95			return false;
 96		}
 97
 98		/* Note that the callback promise is effectively broken */
 99		write_seqlock(&vnode->cb_lock);
100		ASSERTCMP(cb_server, ==, vnode->cb_server);
101		vnode->cb_server = NULL;
102		if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE)
103			vnode->cb_break++;
104		write_sequnlock(&vnode->cb_lock);
105	}
106
107found_interest:
108	return true;
109}
110
111/*
112 * Post volume busy note.
113 */
114static void afs_busy(struct afs_operation *op, u32 abort_code)
115{
116	const char *m;
117
118	switch (abort_code) {
119	case VOFFLINE:		m = "offline";		break;
120	case VRESTARTING:	m = "restarting";	break;
121	case VSALVAGING:	m = "being salvaged";	break;
122	default:		m = "busy";		break;
123	}
124
125	pr_notice("kAFS: Volume %llu '%s' on server %pU is %s\n",
126		  op->volume->vid, op->volume->name, &op->server->uuid, m);
127}
128
129/*
130 * Sleep and retry the operation to the same fileserver.
131 */
132static bool afs_sleep_and_retry(struct afs_operation *op)
133{
134	trace_afs_rotate(op, afs_rotate_trace_busy_sleep, 0);
135	if (!(op->flags & AFS_OPERATION_UNINTR)) {
136		msleep_interruptible(1000);
137		if (signal_pending(current)) {
138			afs_op_set_error(op, -ERESTARTSYS);
139			return false;
140		}
141	} else {
142		msleep(1000);
143	}
144
145	return true;
146}
147
148/*
149 * Select the fileserver to use.  May be called multiple times to rotate
150 * through the fileservers.
151 */
152bool afs_select_fileserver(struct afs_operation *op)
153{
154	struct afs_addr_list *alist;
155	struct afs_server *server;
156	struct afs_vnode *vnode = op->file[0].vnode;
157	unsigned long set, failed;
158	s32 abort_code = op->call_abort_code;
159	int best_prio = 0;
160	int error = op->call_error, addr_index, i, j;
161
162	op->nr_iterations++;
163
164	_enter("OP=%x+%x,%llx,%u{%lx},%u{%lx},%d,%d",
165	       op->debug_id, op->nr_iterations, op->volume->vid,
166	       op->server_index, op->untried_servers,
167	       op->addr_index, op->addr_tried,
168	       error, abort_code);
169
170	if (op->flags & AFS_OPERATION_STOP) {
171		trace_afs_rotate(op, afs_rotate_trace_stopped, 0);
172		_leave(" = f [stopped]");
173		return false;
174	}
175
176	if (op->nr_iterations == 0)
 
 
 
 
177		goto start;
178
179	WRITE_ONCE(op->estate->addresses->addrs[op->addr_index].last_error, error);
180	trace_afs_rotate(op, afs_rotate_trace_iter, op->call_error);
181
182	/* Evaluate the result of the previous operation, if there was one. */
183	switch (op->call_error) {
184	case 0:
185		clear_bit(AFS_SE_VOLUME_OFFLINE,
186			  &op->server_list->servers[op->server_index].flags);
187		clear_bit(AFS_SE_VOLUME_BUSY,
188			  &op->server_list->servers[op->server_index].flags);
189		op->cumul_error.responded = true;
190
191		/* We succeeded, but we may need to redo the op from another
192		 * server if we're looking at a set of RO volumes where some of
193		 * the servers have not yet been brought up to date lest we
194		 * regress the data.  We only switch to the new version once
195		 * >=50% of the servers are updated.
196		 */
197		error = afs_update_volume_state(op);
198		if (error != 0) {
199			if (error == 1) {
200				afs_sleep_and_retry(op);
201				goto restart_from_beginning;
202			}
203			afs_op_set_error(op, error);
204			goto failed;
205		}
206		fallthrough;
207	default:
208		/* Success or local failure.  Stop. */
209		afs_op_set_error(op, error);
210		op->flags |= AFS_OPERATION_STOP;
211		trace_afs_rotate(op, afs_rotate_trace_stop, error);
212		_leave(" = f [okay/local %d]", error);
213		return false;
214
215	case -ECONNABORTED:
216		/* The far side rejected the operation on some grounds.  This
217		 * might involve the server being busy or the volume having been moved.
218		 *
219		 * Note that various V* errors should not be sent to a cache manager
220		 * by a fileserver as they should be translated to more modern UAE*
221		 * errors instead.  IBM AFS and OpenAFS fileservers, however, do leak
222		 * these abort codes.
223		 */
224		trace_afs_rotate(op, afs_rotate_trace_aborted, abort_code);
225		op->cumul_error.responded = true;
226		switch (abort_code) {
227		case VNOVOL:
228			/* This fileserver doesn't know about the volume.
229			 * - May indicate that the VL is wrong - retry once and compare
230			 *   the results.
231			 * - May indicate that the fileserver couldn't attach to the vol.
232			 * - The volume might have been temporarily removed so that it can
233			 *   be replaced by a volume restore.  "vos" might have ended one
234			 *   transaction and has yet to create the next.
235			 * - The volume might not be blessed or might not be in-service
236			 *   (administrative action).
237			 */
238			if (op->flags & AFS_OPERATION_VNOVOL) {
239				afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
240				goto next_server;
241			}
242
243			write_lock(&op->volume->servers_lock);
244			op->server_list->vnovol_mask |= 1 << op->server_index;
245			write_unlock(&op->volume->servers_lock);
246
247			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
248			error = afs_check_volume_status(op->volume, op);
249			if (error < 0) {
250				afs_op_set_error(op, error);
251				goto failed;
252			}
253
254			if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
255				afs_op_set_error(op, -ENOMEDIUM);
256				goto failed;
257			}
258
259			/* If the server list didn't change, then assume that
260			 * it's the fileserver having trouble.
261			 */
262			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
263				afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
264				goto next_server;
265			}
266
267			/* Try again */
268			op->flags |= AFS_OPERATION_VNOVOL;
269			_leave(" = t [vnovol]");
270			return true;
271
 
272		case VVOLEXISTS:
 
273		case VONLINE:
274			/* These should not be returned from the fileserver. */
275			pr_warn("Fileserver returned unexpected abort %d\n",
276				abort_code);
277			afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
278			goto next_server;
279
280		case VNOSERVICE:
281			/* Prior to AFS 3.2 VNOSERVICE was returned from the fileserver
282			 * if the volume was neither in-service nor administratively
283			 * blessed.  All usage was replaced by VNOVOL because AFS 3.1 and
284			 * earlier cache managers did not handle VNOSERVICE and assumed
285			 * it was the client OSes errno 105.
286			 *
287			 * Starting with OpenAFS 1.4.8 VNOSERVICE was repurposed as the
288			 * fileserver idle dead time error which was sent in place of
289			 * RX_CALL_TIMEOUT (-3).  The error was intended to be sent if the
290			 * fileserver took too long to send a reply to the client.
291			 * RX_CALL_TIMEOUT would have caused the cache manager to mark the
292			 * server down whereas VNOSERVICE since AFS 3.2 would cause cache
293			 * manager to temporarily (up to 15 minutes) mark the volume
294			 * instance as unusable.
295			 *
296			 * The idle dead logic resulted in cache inconsistency since a
297			 * state changing call that the cache manager assumed was dead
298			 * could still be processed to completion by the fileserver.  This
299			 * logic was removed in OpenAFS 1.8.0 and VNOSERVICE is no longer
300			 * returned.  However, many 1.4.8 through 1.6.24 fileservers are
301			 * still in existence.
302			 *
303			 * AuriStorFS fileservers have never returned VNOSERVICE.
304			 *
305			 * VNOSERVICE should be treated as an alias for RX_CALL_TIMEOUT.
306			 */
307		case RX_CALL_TIMEOUT:
308			afs_op_accumulate_error(op, -ETIMEDOUT, abort_code);
309			goto next_server;
310
311		case VSALVAGING: /* This error should not be leaked to cache managers
312				  * but is from OpenAFS demand attach fileservers.
313				  * It should be treated as an alias for VOFFLINE.
314				  */
315		case VSALVAGE: /* VSALVAGE should be treated as a synonym of VOFFLINE */
316		case VOFFLINE:
317			/* The volume is in use by the volserver or another volume utility
318			 * for an operation that might alter the contents.  The volume is
319			 * expected to come back but it might take a long time (could be
320			 * days).
321			 */
322			if (!test_and_set_bit(AFS_SE_VOLUME_OFFLINE,
323					      &op->server_list->servers[op->server_index].flags)) {
324				afs_busy(op, abort_code);
325				clear_bit(AFS_SE_VOLUME_BUSY,
326					  &op->server_list->servers[op->server_index].flags);
327			}
328			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
329				afs_op_set_error(op, -EADV);
 
 
 
 
330				goto failed;
331			}
332			goto busy;
333
334		case VRESTARTING: /* The fileserver is either shutting down or starting up. */
 
335		case VBUSY:
336			/* The volume is in use by the volserver or another volume
337			 * utility for an operation that is not expected to alter the
338			 * contents of the volume.  VBUSY does not need to be returned
339			 * for a ROVOL or BACKVOL bound to an ITBusy volserver
340			 * transaction.  The fileserver is permitted to continue serving
341			 * content from ROVOLs and BACKVOLs during an ITBusy transaction
342			 * because the content will not change.  However, many fileserver
343			 * releases do return VBUSY for ROVOL and BACKVOL instances under
344			 * many circumstances.
345			 *
346			 * Retry after going round all the servers unless we have a file
347			 * lock we need to maintain.
348			 */
349			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
350				afs_op_set_error(op, -EBUSY);
351				goto failed;
352			}
353			if (!test_and_set_bit(AFS_SE_VOLUME_BUSY,
354					      &op->server_list->servers[op->server_index].flags)) {
355				afs_busy(op, abort_code);
356				clear_bit(AFS_SE_VOLUME_OFFLINE,
357					  &op->server_list->servers[op->server_index].flags);
358			}
359		busy:
360			if (op->flags & AFS_OPERATION_CUR_ONLY) {
361				if (!afs_sleep_and_retry(op))
362					goto failed;
363
364				/* Retry with same server & address */
365				_leave(" = t [vbusy]");
366				return true;
367			}
368
369			op->flags |= AFS_OPERATION_VBUSY;
370			goto next_server;
371
372		case VMOVED:
373			/* The volume migrated to another server.  We consider
374			 * consider all locks and callbacks broken and request
375			 * an update from the VLDB.
376			 *
377			 * We also limit the number of VMOVED hops we will
378			 * honour, just in case someone sets up a loop.
379			 */
380			if (op->flags & AFS_OPERATION_VMOVED) {
381				afs_op_set_error(op, -EREMOTEIO);
382				goto failed;
383			}
384			op->flags |= AFS_OPERATION_VMOVED;
385
386			set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
387			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
388			error = afs_check_volume_status(op->volume, op);
389			if (error < 0) {
390				afs_op_set_error(op, error);
391				goto failed;
392			}
393
394			/* If the server list didn't change, then the VLDB is
395			 * out of sync with the fileservers.  This is hopefully
396			 * a temporary condition, however, so we don't want to
397			 * permanently block access to the file.
398			 *
399			 * TODO: Try other fileservers if we can.
400			 *
401			 * TODO: Retry a few times with sleeps.
402			 */
403			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
404				afs_op_accumulate_error(op, -ENOMEDIUM, abort_code);
405				goto failed;
406			}
407
408			goto restart_from_beginning;
409
410		case UAEIO:
411		case VIO:
412			afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
413			if (op->volume->type != AFSVL_RWVOL)
414				goto next_server;
415			goto failed;
416
417		case VDISKFULL:
418		case UAENOSPC:
419			/* The partition is full.  Only applies to RWVOLs.
420			 * Translate locally and return ENOSPC.
421			 * No replicas to failover to.
422			 */
423			afs_op_set_error(op, -ENOSPC);
424			goto failed_but_online;
425
426		case VOVERQUOTA:
427		case UAEDQUOT:
428			/* Volume is full.  Only applies to RWVOLs.
429			 * Translate locally and return EDQUOT.
430			 * No replicas to failover to.
431			 */
432			afs_op_set_error(op, -EDQUOT);
433			goto failed_but_online;
434
435		default:
436			afs_op_accumulate_error(op, error, abort_code);
437		failed_but_online:
438			clear_bit(AFS_SE_VOLUME_OFFLINE,
439				  &op->server_list->servers[op->server_index].flags);
440			clear_bit(AFS_SE_VOLUME_BUSY,
441				  &op->server_list->servers[op->server_index].flags);
442			goto failed;
443		}
444
445	case -ETIMEDOUT:
446	case -ETIME:
447		if (afs_op_error(op) != -EDESTADDRREQ)
448			goto iterate_address;
449		fallthrough;
450	case -ERFKILL:
451	case -EADDRNOTAVAIL:
452	case -ENETUNREACH:
453	case -EHOSTUNREACH:
454	case -EHOSTDOWN:
455	case -ECONNREFUSED:
456		_debug("no conn");
457		afs_op_accumulate_error(op, error, 0);
458		goto iterate_address;
459
460	case -ENETRESET:
461		pr_warn("kAFS: Peer reset %s (op=%x)\n",
462			op->type ? op->type->name : "???", op->debug_id);
463		fallthrough;
464	case -ECONNRESET:
465		_debug("call reset");
466		afs_op_set_error(op, error);
467		goto failed;
468	}
469
470restart_from_beginning:
471	trace_afs_rotate(op, afs_rotate_trace_restart, 0);
472	_debug("restart");
473	op->estate = NULL;
474	op->server = NULL;
475	afs_clear_server_states(op);
476	op->server_states = NULL;
477	afs_put_serverlist(op->net, op->server_list);
478	op->server_list = NULL;
479start:
480	_debug("start");
481	ASSERTCMP(op->estate, ==, NULL);
482	/* See if we need to do an update of the volume record.  Note that the
483	 * volume may have moved or even have been deleted.
484	 */
485	error = afs_check_volume_status(op->volume, op);
486	trace_afs_rotate(op, afs_rotate_trace_check_vol_status, error);
487	if (error < 0) {
488		afs_op_set_error(op, error);
489		goto failed;
490	}
491
492	if (!afs_start_fs_iteration(op, vnode))
493		goto failed;
494
495	_debug("__ VOL %llx __", op->volume->vid);
496
497pick_server:
498	_debug("pick [%lx]", op->untried_servers);
499	ASSERTCMP(op->estate, ==, NULL);
500
501	error = afs_wait_for_fs_probes(op, op->server_states,
502				       !(op->flags & AFS_OPERATION_UNINTR));
503	switch (error) {
504	case 0: /* No untried responsive servers and no outstanding probes */
505		trace_afs_rotate(op, afs_rotate_trace_probe_none, 0);
506		goto no_more_servers;
507	case 1: /* Got a response */
508		trace_afs_rotate(op, afs_rotate_trace_probe_response, 0);
509		break;
510	case 2: /* Probe data superseded */
511		trace_afs_rotate(op, afs_rotate_trace_probe_superseded, 0);
512		goto restart_from_beginning;
513	default:
514		trace_afs_rotate(op, afs_rotate_trace_probe_error, error);
515		afs_op_set_error(op, error);
516		goto failed;
517	}
518
519	/* Pick the untried server with the highest priority untried endpoint.
520	 * If we have outstanding callbacks, we stick with the server we're
521	 * already using if we can.
522	 */
523	if (op->server) {
524		_debug("server %u", op->server_index);
525		if (test_bit(op->server_index, &op->untried_servers))
526			goto selected_server;
527		op->server = NULL;
528		_debug("no server");
529	}
530
531	rcu_read_lock();
532	op->server_index = -1;
533	best_prio = -1;
534	for (i = 0; i < op->server_list->nr_servers; i++) {
535		struct afs_endpoint_state *es;
536		struct afs_server_entry *se = &op->server_list->servers[i];
537		struct afs_addr_list *sal;
538		struct afs_server *s = se->server;
539
540		if (!test_bit(i, &op->untried_servers) ||
541		    test_bit(AFS_SE_EXCLUDED, &se->flags) ||
542		    !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
543			continue;
544		es = op->server_states[i].endpoint_state;
545		sal = es->addresses;
546
547		afs_get_address_preferences_rcu(op->net, sal);
548		for (j = 0; j < sal->nr_addrs; j++) {
549			if (es->failed_set & (1 << j))
550				continue;
551			if (!sal->addrs[j].peer)
552				continue;
553			if (sal->addrs[j].prio > best_prio) {
554				op->server_index = i;
555				best_prio = sal->addrs[j].prio;
556			}
557		}
558	}
559	rcu_read_unlock();
560
561	if (op->server_index == -1)
562		goto no_more_servers;
563
564selected_server:
565	trace_afs_rotate(op, afs_rotate_trace_selected_server, best_prio);
566	_debug("use %d prio %u", op->server_index, best_prio);
567	__clear_bit(op->server_index, &op->untried_servers);
568
569	/* We're starting on a different fileserver from the list.  We need to
570	 * check it, create a callback intercept, find its address list and
571	 * probe its capabilities before we use it.
572	 */
573	ASSERTCMP(op->estate, ==, NULL);
574	server = op->server_list->servers[op->server_index].server;
575
576	if (!afs_check_server_record(op, server, op->key))
577		goto failed;
578
579	_debug("USING SERVER: %pU", &server->uuid);
580
581	op->flags |= AFS_OPERATION_RETRY_SERVER;
582	op->server = server;
583	if (vnode->cb_server != server) {
584		vnode->cb_server = server;
585		vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
586		atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
 
 
587	}
588
 
 
 
 
 
 
589retry_server:
590	op->addr_tried = 0;
591	op->addr_index = -1;
 
 
 
 
 
 
592
593iterate_address:
 
594	/* Iterate over the current server's address list to try and find an
595	 * address on which it will respond to us.
596	 */
597	op->estate = op->server_states[op->server_index].endpoint_state;
598	set = READ_ONCE(op->estate->responsive_set);
599	failed = READ_ONCE(op->estate->failed_set);
600	_debug("iterate ES=%x rs=%lx fs=%lx", op->estate->probe_seq, set, failed);
601	set &= ~(failed | op->addr_tried);
602	trace_afs_rotate(op, afs_rotate_trace_iterate_addr, set);
603	if (!set)
604		goto wait_for_more_probe_results;
605
606	alist = op->estate->addresses;
607	best_prio = -1;
608	addr_index = 0;
609	for (i = 0; i < alist->nr_addrs; i++) {
610		if (!(set & (1 << i)))
611			continue;
612		if (alist->addrs[i].prio > best_prio) {
613			addr_index = i;
614			best_prio = alist->addrs[i].prio;
615		}
616	}
617
618	alist->preferred = addr_index;
619
620	op->addr_index = addr_index;
621	set_bit(addr_index, &op->addr_tried);
622
623	op->volsync.creation = TIME64_MIN;
624	op->volsync.update = TIME64_MIN;
625	op->call_responded = false;
626	_debug("address [%u] %u/%u %pISp",
627	       op->server_index, addr_index, alist->nr_addrs,
628	       rxrpc_kernel_remote_addr(alist->addrs[op->addr_index].peer));
629	_leave(" = t");
630	return true;
631
632wait_for_more_probe_results:
633	error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
634					  !(op->flags & AFS_OPERATION_UNINTR));
635	if (error == 1)
636		goto iterate_address;
637	if (!error)
638		goto restart_from_beginning;
639
640	/* We've now had a failure to respond on all of a server's addresses -
641	 * immediately probe them again and consider retrying the server.
642	 */
643	trace_afs_rotate(op, afs_rotate_trace_probe_fileserver, 0);
644	afs_probe_fileserver(op->net, op->server);
645	if (op->flags & AFS_OPERATION_RETRY_SERVER) {
646		error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
647						  !(op->flags & AFS_OPERATION_UNINTR));
 
648		switch (error) {
649		case 1:
650			op->flags &= ~AFS_OPERATION_RETRY_SERVER;
651			trace_afs_rotate(op, afs_rotate_trace_retry_server, 1);
652			goto retry_server;
653		case 0:
654			trace_afs_rotate(op, afs_rotate_trace_retry_server, 0);
655			goto restart_from_beginning;
656		case -ERESTARTSYS:
657			afs_op_set_error(op, error);
658			goto failed;
659		case -ETIME:
660		case -EDESTADDRREQ:
661			goto next_server;
662		}
663	}
664
665next_server:
666	trace_afs_rotate(op, afs_rotate_trace_next_server, 0);
667	_debug("next");
668	op->estate = NULL;
669	goto pick_server;
670
671no_more_servers:
672	/* That's all the servers poked to no good effect.  Try again if some
673	 * of them were busy.
674	 */
675	trace_afs_rotate(op, afs_rotate_trace_no_more_servers, 0);
676	if (op->flags & AFS_OPERATION_VBUSY) {
677		afs_sleep_and_retry(op);
678		op->flags &= ~AFS_OPERATION_VBUSY;
679		goto restart_from_beginning;
680	}
681
682	rcu_read_lock();
 
683	for (i = 0; i < op->server_list->nr_servers; i++) {
684		struct afs_endpoint_state *estate;
685
686		estate = op->server_states[i].endpoint_state;
687		error = READ_ONCE(estate->error);
688		if (error < 0)
689			afs_op_accumulate_error(op, error, estate->abort_code);
690	}
691	rcu_read_unlock();
692
 
 
 
 
693failed:
694	trace_afs_rotate(op, afs_rotate_trace_failed, 0);
695	op->flags |= AFS_OPERATION_STOP;
696	op->estate = NULL;
697	_leave(" = f [failed %d]", afs_op_error(op));
698	return false;
699}
700
701/*
702 * Dump cursor state in the case of the error being EDESTADDRREQ.
703 */
704void afs_dump_edestaddrreq(const struct afs_operation *op)
705{
706	static int count;
707	int i;
708
709	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
710		return;
711	count++;
712
713	rcu_read_lock();
714
715	pr_notice("EDESTADDR occurred\n");
716	pr_notice("OP: cbb=%x cbb2=%x fl=%x err=%hd\n",
717		  op->file[0].cb_break_before,
718		  op->file[1].cb_break_before, op->flags, op->cumul_error.error);
719	pr_notice("OP: ut=%lx ix=%d ni=%u\n",
720		  op->untried_servers, op->server_index, op->nr_iterations);
721	pr_notice("OP: call  er=%d ac=%d r=%u\n",
722		  op->call_error, op->call_abort_code, op->call_responded);
723
724	if (op->server_list) {
725		const struct afs_server_list *sl = op->server_list;
726
727		pr_notice("FC: SL nr=%u vnov=%hx\n",
728			  sl->nr_servers, sl->vnovol_mask);
729		for (i = 0; i < sl->nr_servers; i++) {
730			const struct afs_server *s = sl->servers[i].server;
731			const struct afs_endpoint_state *e =
732				rcu_dereference(s->endpoint_state);
733			const struct afs_addr_list *a = e->addresses;
734
735			pr_notice("FC: server fl=%lx av=%u %pU\n",
736				  s->flags, s->addr_version, &s->uuid);
737			pr_notice("FC:  - pq=%x R=%lx F=%lx\n",
738				  e->probe_seq, e->responsive_set, e->failed_set);
739			if (a) {
740				pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
741					  a->version,
742					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
743					  a->preferred);
744				if (a == e->addresses)
 
 
745					pr_notice("FC:  - current\n");
746			}
747		}
748	}
749
750	pr_notice("AC: t=%lx ax=%d\n", op->addr_tried, op->addr_index);
 
 
751	rcu_read_unlock();
752}