Linux Audio

Check our new training course

Loading...
v6.2
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/* Handle fileserver selection and rotation.
  3 *
  4 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
  5 * Written by David Howells (dhowells@redhat.com)
  6 */
  7
  8#include <linux/kernel.h>
  9#include <linux/slab.h>
 10#include <linux/fs.h>
 11#include <linux/sched.h>
 12#include <linux/delay.h>
 13#include <linux/sched/signal.h>
 14#include "internal.h"
 15#include "afs_fs.h"
 16
 17/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 18 * Begin iteration through a server list, starting with the vnode's last used
 19 * server if possible, or the last recorded good server if not.
 20 */
 21static bool afs_start_fs_iteration(struct afs_operation *op,
 22				   struct afs_vnode *vnode)
 23{
 24	struct afs_server *server;
 25	void *cb_server;
 26	int i;
 27
 28	read_lock(&op->volume->servers_lock);
 29	op->server_list = afs_get_serverlist(
 30		rcu_dereference_protected(op->volume->servers,
 31					  lockdep_is_held(&op->volume->servers_lock)));
 32	read_unlock(&op->volume->servers_lock);
 33
 34	op->untried = (1UL << op->server_list->nr_servers) - 1;
 35	op->index = READ_ONCE(op->server_list->preferred);
 36
 37	cb_server = vnode->cb_server;
 38	if (cb_server) {
 39		/* See if the vnode's preferred record is still available */
 40		for (i = 0; i < op->server_list->nr_servers; i++) {
 41			server = op->server_list->servers[i].server;
 42			if (server == cb_server) {
 43				op->index = i;
 44				goto found_interest;
 45			}
 46		}
 47
 48		/* If we have a lock outstanding on a server that's no longer
 49		 * serving this vnode, then we can't switch to another server
 50		 * and have to return an error.
 51		 */
 52		if (op->flags & AFS_OPERATION_CUR_ONLY) {
 53			op->error = -ESTALE;
 54			return false;
 55		}
 56
 57		/* Note that the callback promise is effectively broken */
 58		write_seqlock(&vnode->cb_lock);
 59		ASSERTCMP(cb_server, ==, vnode->cb_server);
 60		vnode->cb_server = NULL;
 61		if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
 62			vnode->cb_break++;
 63		write_sequnlock(&vnode->cb_lock);
 
 
 
 64	}
 65
 66found_interest:
 67	return true;
 68}
 69
 70/*
 71 * Post volume busy note.
 72 */
 73static void afs_busy(struct afs_volume *volume, u32 abort_code)
 74{
 75	const char *m;
 76
 77	switch (abort_code) {
 78	case VOFFLINE:		m = "offline";		break;
 79	case VRESTARTING:	m = "restarting";	break;
 80	case VSALVAGING:	m = "being salvaged";	break;
 81	default:		m = "busy";		break;
 82	}
 83
 84	pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
 85}
 86
 87/*
 88 * Sleep and retry the operation to the same fileserver.
 89 */
 90static bool afs_sleep_and_retry(struct afs_operation *op)
 91{
 92	if (!(op->flags & AFS_OPERATION_UNINTR)) {
 93		msleep_interruptible(1000);
 94		if (signal_pending(current)) {
 95			op->error = -ERESTARTSYS;
 96			return false;
 97		}
 98	} else {
 99		msleep(1000);
100	}
101
102	return true;
103}
104
105/*
106 * Select the fileserver to use.  May be called multiple times to rotate
107 * through the fileservers.
108 */
109bool afs_select_fileserver(struct afs_operation *op)
110{
111	struct afs_addr_list *alist;
112	struct afs_server *server;
113	struct afs_vnode *vnode = op->file[0].vnode;
114	struct afs_error e;
115	u32 rtt;
116	int error = op->ac.error, i;
117
118	_enter("%lx[%d],%lx[%d],%d,%d",
119	       op->untried, op->index,
120	       op->ac.tried, op->ac.index,
121	       error, op->ac.abort_code);
122
123	if (op->flags & AFS_OPERATION_STOP) {
124		_leave(" = f [stopped]");
125		return false;
126	}
127
128	op->nr_iterations++;
129
130	/* Evaluate the result of the previous operation, if there was one. */
131	switch (error) {
132	case SHRT_MAX:
133		goto start;
134
135	case 0:
136	default:
137		/* Success or local failure.  Stop. */
138		op->error = error;
139		op->flags |= AFS_OPERATION_STOP;
140		_leave(" = f [okay/local %d]", error);
141		return false;
142
143	case -ECONNABORTED:
144		/* The far side rejected the operation on some grounds.  This
145		 * might involve the server being busy or the volume having been moved.
146		 */
147		switch (op->ac.abort_code) {
148		case VNOVOL:
149			/* This fileserver doesn't know about the volume.
150			 * - May indicate that the VL is wrong - retry once and compare
151			 *   the results.
152			 * - May indicate that the fileserver couldn't attach to the vol.
153			 */
154			if (op->flags & AFS_OPERATION_VNOVOL) {
155				op->error = -EREMOTEIO;
156				goto next_server;
157			}
158
159			write_lock(&op->volume->servers_lock);
160			op->server_list->vnovol_mask |= 1 << op->index;
161			write_unlock(&op->volume->servers_lock);
162
163			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
164			error = afs_check_volume_status(op->volume, op);
165			if (error < 0)
166				goto failed_set_error;
167
168			if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
169				op->error = -ENOMEDIUM;
170				goto failed;
171			}
172
173			/* If the server list didn't change, then assume that
174			 * it's the fileserver having trouble.
175			 */
176			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
177				op->error = -EREMOTEIO;
178				goto next_server;
179			}
180
181			/* Try again */
182			op->flags |= AFS_OPERATION_VNOVOL;
183			_leave(" = t [vnovol]");
184			return true;
185
186		case VSALVAGE: /* TODO: Should this return an error or iterate? */
187		case VVOLEXISTS:
188		case VNOSERVICE:
189		case VONLINE:
190		case VDISKFULL:
191		case VOVERQUOTA:
192			op->error = afs_abort_to_error(op->ac.abort_code);
193			goto next_server;
194
195		case VOFFLINE:
196			if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) {
197				afs_busy(op->volume, op->ac.abort_code);
198				clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
199			}
200			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
201				op->error = -EADV;
202				goto failed;
203			}
204			if (op->flags & AFS_OPERATION_CUR_ONLY) {
205				op->error = -ESTALE;
206				goto failed;
207			}
208			goto busy;
209
210		case VSALVAGING:
211		case VRESTARTING:
212		case VBUSY:
213			/* Retry after going round all the servers unless we
214			 * have a file lock we need to maintain.
215			 */
216			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
217				op->error = -EBUSY;
218				goto failed;
219			}
220			if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) {
221				afs_busy(op->volume, op->ac.abort_code);
222				clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
223			}
224		busy:
225			if (op->flags & AFS_OPERATION_CUR_ONLY) {
226				if (!afs_sleep_and_retry(op))
227					goto failed;
228
229				 /* Retry with same server & address */
230				_leave(" = t [vbusy]");
231				return true;
232			}
233
234			op->flags |= AFS_OPERATION_VBUSY;
235			goto next_server;
236
237		case VMOVED:
238			/* The volume migrated to another server.  We consider
239			 * consider all locks and callbacks broken and request
240			 * an update from the VLDB.
241			 *
242			 * We also limit the number of VMOVED hops we will
243			 * honour, just in case someone sets up a loop.
244			 */
245			if (op->flags & AFS_OPERATION_VMOVED) {
246				op->error = -EREMOTEIO;
247				goto failed;
248			}
249			op->flags |= AFS_OPERATION_VMOVED;
250
251			set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
252			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
253			error = afs_check_volume_status(op->volume, op);
254			if (error < 0)
255				goto failed_set_error;
256
257			/* If the server list didn't change, then the VLDB is
258			 * out of sync with the fileservers.  This is hopefully
259			 * a temporary condition, however, so we don't want to
260			 * permanently block access to the file.
261			 *
262			 * TODO: Try other fileservers if we can.
263			 *
264			 * TODO: Retry a few times with sleeps.
265			 */
266			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
267				op->error = -ENOMEDIUM;
268				goto failed;
269			}
270
271			goto restart_from_beginning;
272
273		default:
274			clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
275			clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
276			op->error = afs_abort_to_error(op->ac.abort_code);
277			goto failed;
278		}
279
280	case -ETIMEDOUT:
281	case -ETIME:
282		if (op->error != -EDESTADDRREQ)
283			goto iterate_address;
284		fallthrough;
285	case -ERFKILL:
286	case -EADDRNOTAVAIL:
287	case -ENETUNREACH:
288	case -EHOSTUNREACH:
289	case -EHOSTDOWN:
290	case -ECONNREFUSED:
291		_debug("no conn");
292		op->error = error;
293		goto iterate_address;
294
295	case -ENETRESET:
296		pr_warn("kAFS: Peer reset %s (op=%x)\n",
297			op->type ? op->type->name : "???", op->debug_id);
298		fallthrough;
299	case -ECONNRESET:
300		_debug("call reset");
301		op->error = error;
302		goto failed;
303	}
304
305restart_from_beginning:
306	_debug("restart");
307	afs_end_cursor(&op->ac);
308	op->server = NULL;
309	afs_put_serverlist(op->net, op->server_list);
310	op->server_list = NULL;
 
311start:
312	_debug("start");
313	/* See if we need to do an update of the volume record.  Note that the
314	 * volume may have moved or even have been deleted.
315	 */
316	error = afs_check_volume_status(op->volume, op);
317	if (error < 0)
318		goto failed_set_error;
319
320	if (!afs_start_fs_iteration(op, vnode))
321		goto failed;
322
323	_debug("__ VOL %llx __", op->volume->vid);
 
 
 
324
325pick_server:
326	_debug("pick [%lx]", op->untried);
327
328	error = afs_wait_for_fs_probes(op->server_list, op->untried);
329	if (error < 0)
330		goto failed_set_error;
331
332	/* Pick the untried server with the lowest RTT.  If we have outstanding
333	 * callbacks, we stick with the server we're already using if we can.
334	 */
335	if (op->server) {
336		_debug("server %u", op->index);
337		if (test_bit(op->index, &op->untried))
338			goto selected_server;
339		op->server = NULL;
340		_debug("no server");
 
341	}
342
343	op->index = -1;
344	rtt = U32_MAX;
345	for (i = 0; i < op->server_list->nr_servers; i++) {
346		struct afs_server *s = op->server_list->servers[i].server;
347
348		if (!test_bit(i, &op->untried) ||
349		    !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
350			continue;
351		if (s->probe.rtt < rtt) {
352			op->index = i;
353			rtt = s->probe.rtt;
354		}
355	}
356
357	if (op->index == -1)
358		goto no_more_servers;
359
360selected_server:
361	_debug("use %d", op->index);
362	__clear_bit(op->index, &op->untried);
363
364	/* We're starting on a different fileserver from the list.  We need to
365	 * check it, create a callback intercept, find its address list and
366	 * probe its capabilities before we use it.
367	 */
368	ASSERTCMP(op->ac.alist, ==, NULL);
369	server = op->server_list->servers[op->index].server;
370
371	if (!afs_check_server_record(op, server))
372		goto failed;
373
374	_debug("USING SERVER: %pU", &server->uuid);
375
376	op->flags |= AFS_OPERATION_RETRY_SERVER;
377	op->server = server;
378	if (vnode->cb_server != server) {
379		vnode->cb_server = server;
380		vnode->cb_s_break = server->cb_s_break;
381		vnode->cb_fs_s_break = atomic_read(&server->cell->fs_s_break);
382		vnode->cb_v_break = vnode->volume->cb_v_break;
383		clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
384	}
 
 
 
 
385
386	read_lock(&server->fs_lock);
387	alist = rcu_dereference_protected(server->addresses,
388					  lockdep_is_held(&server->fs_lock));
389	afs_get_addrlist(alist);
390	read_unlock(&server->fs_lock);
391
392retry_server:
393	memset(&op->ac, 0, sizeof(op->ac));
394
395	if (!op->ac.alist)
396		op->ac.alist = alist;
397	else
398		afs_put_addrlist(alist);
399
400	op->ac.index = -1;
401
402iterate_address:
403	ASSERT(op->ac.alist);
404	/* Iterate over the current server's address list to try and find an
405	 * address on which it will respond to us.
406	 */
407	if (!afs_iterate_addresses(&op->ac))
408		goto out_of_addresses;
409
410	_debug("address [%u] %u/%u %pISp",
411	       op->index, op->ac.index, op->ac.alist->nr_addrs,
412	       &op->ac.alist->addrs[op->ac.index].transport);
413
414	_leave(" = t");
415	return true;
416
417out_of_addresses:
418	/* We've now had a failure to respond on all of a server's addresses -
419	 * immediately probe them again and consider retrying the server.
420	 */
421	afs_probe_fileserver(op->net, op->server);
422	if (op->flags & AFS_OPERATION_RETRY_SERVER) {
423		alist = op->ac.alist;
424		error = afs_wait_for_one_fs_probe(
425			op->server, !(op->flags & AFS_OPERATION_UNINTR));
426		switch (error) {
427		case 0:
428			op->flags &= ~AFS_OPERATION_RETRY_SERVER;
429			goto retry_server;
430		case -ERESTARTSYS:
431			goto failed_set_error;
432		case -ETIME:
433		case -EDESTADDRREQ:
434			goto next_server;
435		}
436	}
437
438next_server:
439	_debug("next");
440	afs_end_cursor(&op->ac);
441	goto pick_server;
442
443no_more_servers:
444	/* That's all the servers poked to no good effect.  Try again if some
445	 * of them were busy.
446	 */
447	if (op->flags & AFS_OPERATION_VBUSY)
448		goto restart_from_beginning;
449
450	e.error = -EDESTADDRREQ;
451	e.responded = false;
452	for (i = 0; i < op->server_list->nr_servers; i++) {
453		struct afs_server *s = op->server_list->servers[i].server;
454
455		afs_prioritise_error(&e, READ_ONCE(s->probe.error),
456				     s->probe.abort_code);
457	}
458
459	error = e.error;
460
461failed_set_error:
462	op->error = error;
463failed:
464	op->flags |= AFS_OPERATION_STOP;
465	afs_end_cursor(&op->ac);
466	_leave(" = f [failed %d]", op->error);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467	return false;
468}
469
470/*
471 * Dump cursor state in the case of the error being EDESTADDRREQ.
472 */
473void afs_dump_edestaddrreq(const struct afs_operation *op)
474{
475	static int count;
476	int i;
477
478	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
479		return;
480	count++;
481
482	rcu_read_lock();
483
484	pr_notice("EDESTADDR occurred\n");
485	pr_notice("FC: cbb=%x cbb2=%x fl=%x err=%hd\n",
486		  op->file[0].cb_break_before,
487		  op->file[1].cb_break_before, op->flags, op->error);
488	pr_notice("FC: ut=%lx ix=%d ni=%u\n",
489		  op->untried, op->index, op->nr_iterations);
490
491	if (op->server_list) {
492		const struct afs_server_list *sl = op->server_list;
493		pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
494			  sl->nr_servers, sl->preferred, sl->vnovol_mask);
495		for (i = 0; i < sl->nr_servers; i++) {
496			const struct afs_server *s = sl->servers[i].server;
497			pr_notice("FC: server fl=%lx av=%u %pU\n",
498				  s->flags, s->addr_version, &s->uuid);
499			if (s->addresses) {
500				const struct afs_addr_list *a =
501					rcu_dereference(s->addresses);
502				pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
503					  a->version,
504					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
505					  a->preferred);
506				pr_notice("FC:  - R=%lx F=%lx\n",
507					  a->responded, a->failed);
508				if (a == op->ac.alist)
509					pr_notice("FC:  - current\n");
510			}
511		}
512	}
513
514	pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
515		  op->ac.tried, op->ac.index, op->ac.abort_code, op->ac.error,
516		  op->ac.responded, op->ac.nr_iterations);
517	rcu_read_unlock();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
518}
v5.4
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/* Handle fileserver selection and rotation.
  3 *
  4 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
  5 * Written by David Howells (dhowells@redhat.com)
  6 */
  7
  8#include <linux/kernel.h>
  9#include <linux/slab.h>
 10#include <linux/fs.h>
 11#include <linux/sched.h>
 12#include <linux/delay.h>
 13#include <linux/sched/signal.h>
 14#include "internal.h"
 15#include "afs_fs.h"
 16
 17/*
 18 * Begin an operation on the fileserver.
 19 *
 20 * Fileserver operations are serialised on the server by vnode, so we serialise
 21 * them here also using the io_lock.
 22 */
 23bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 24			       struct key *key, bool intr)
 25{
 26	memset(fc, 0, sizeof(*fc));
 27	fc->vnode = vnode;
 28	fc->key = key;
 29	fc->ac.error = SHRT_MAX;
 30	fc->error = -EDESTADDRREQ;
 31
 32	if (intr) {
 33		fc->flags |= AFS_FS_CURSOR_INTR;
 34		if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
 35			fc->error = -EINTR;
 36			fc->flags |= AFS_FS_CURSOR_STOP;
 37			return false;
 38		}
 39	} else {
 40		mutex_lock(&vnode->io_lock);
 41	}
 42
 43	if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
 44		fc->flags |= AFS_FS_CURSOR_CUR_ONLY;
 45	return true;
 46}
 47
 48/*
 49 * Begin iteration through a server list, starting with the vnode's last used
 50 * server if possible, or the last recorded good server if not.
 51 */
 52static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
 53				   struct afs_vnode *vnode)
 54{
 55	struct afs_cb_interest *cbi;
 
 56	int i;
 57
 58	read_lock(&vnode->volume->servers_lock);
 59	fc->server_list = afs_get_serverlist(vnode->volume->servers);
 60	read_unlock(&vnode->volume->servers_lock);
 61
 62	fc->untried = (1UL << fc->server_list->nr_servers) - 1;
 63	fc->index = READ_ONCE(fc->server_list->preferred);
 64
 65	cbi = rcu_dereference_protected(vnode->cb_interest,
 66					lockdep_is_held(&vnode->io_lock));
 67	if (cbi) {
 
 68		/* See if the vnode's preferred record is still available */
 69		for (i = 0; i < fc->server_list->nr_servers; i++) {
 70			if (fc->server_list->servers[i].cb_interest == cbi) {
 71				fc->index = i;
 
 72				goto found_interest;
 73			}
 74		}
 75
 76		/* If we have a lock outstanding on a server that's no longer
 77		 * serving this vnode, then we can't switch to another server
 78		 * and have to return an error.
 79		 */
 80		if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
 81			fc->error = -ESTALE;
 82			return false;
 83		}
 84
 85		/* Note that the callback promise is effectively broken */
 86		write_seqlock(&vnode->cb_lock);
 87		ASSERTCMP(cbi, ==, rcu_access_pointer(vnode->cb_interest));
 88		rcu_assign_pointer(vnode->cb_interest, NULL);
 89		if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
 90			vnode->cb_break++;
 91		write_sequnlock(&vnode->cb_lock);
 92
 93		afs_put_cb_interest(afs_v2net(vnode), cbi);
 94		cbi = NULL;
 95	}
 96
 97found_interest:
 98	return true;
 99}
100
101/*
102 * Post volume busy note.
103 */
104static void afs_busy(struct afs_volume *volume, u32 abort_code)
105{
106	const char *m;
107
108	switch (abort_code) {
109	case VOFFLINE:		m = "offline";		break;
110	case VRESTARTING:	m = "restarting";	break;
111	case VSALVAGING:	m = "being salvaged";	break;
112	default:		m = "busy";		break;
113	}
114
115	pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
116}
117
118/*
119 * Sleep and retry the operation to the same fileserver.
120 */
121static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
122{
123	if (fc->flags & AFS_FS_CURSOR_INTR) {
124		msleep_interruptible(1000);
125		if (signal_pending(current)) {
126			fc->error = -ERESTARTSYS;
127			return false;
128		}
129	} else {
130		msleep(1000);
131	}
132
133	return true;
134}
135
136/*
137 * Select the fileserver to use.  May be called multiple times to rotate
138 * through the fileservers.
139 */
140bool afs_select_fileserver(struct afs_fs_cursor *fc)
141{
142	struct afs_addr_list *alist;
143	struct afs_server *server;
144	struct afs_vnode *vnode = fc->vnode;
145	struct afs_error e;
146	u32 rtt;
147	int error = fc->ac.error, i;
148
149	_enter("%lx[%d],%lx[%d],%d,%d",
150	       fc->untried, fc->index,
151	       fc->ac.tried, fc->ac.index,
152	       error, fc->ac.abort_code);
153
154	if (fc->flags & AFS_FS_CURSOR_STOP) {
155		_leave(" = f [stopped]");
156		return false;
157	}
158
159	fc->nr_iterations++;
160
161	/* Evaluate the result of the previous operation, if there was one. */
162	switch (error) {
163	case SHRT_MAX:
164		goto start;
165
166	case 0:
167	default:
168		/* Success or local failure.  Stop. */
169		fc->error = error;
170		fc->flags |= AFS_FS_CURSOR_STOP;
171		_leave(" = f [okay/local %d]", error);
172		return false;
173
174	case -ECONNABORTED:
175		/* The far side rejected the operation on some grounds.  This
176		 * might involve the server being busy or the volume having been moved.
177		 */
178		switch (fc->ac.abort_code) {
179		case VNOVOL:
180			/* This fileserver doesn't know about the volume.
181			 * - May indicate that the VL is wrong - retry once and compare
182			 *   the results.
183			 * - May indicate that the fileserver couldn't attach to the vol.
184			 */
185			if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
186				fc->error = -EREMOTEIO;
187				goto next_server;
188			}
189
190			write_lock(&vnode->volume->servers_lock);
191			fc->server_list->vnovol_mask |= 1 << fc->index;
192			write_unlock(&vnode->volume->servers_lock);
193
194			set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
195			error = afs_check_volume_status(vnode->volume, fc->key);
196			if (error < 0)
197				goto failed_set_error;
198
199			if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) {
200				fc->error = -ENOMEDIUM;
201				goto failed;
202			}
203
204			/* If the server list didn't change, then assume that
205			 * it's the fileserver having trouble.
206			 */
207			if (vnode->volume->servers == fc->server_list) {
208				fc->error = -EREMOTEIO;
209				goto next_server;
210			}
211
212			/* Try again */
213			fc->flags |= AFS_FS_CURSOR_VNOVOL;
214			_leave(" = t [vnovol]");
215			return true;
216
217		case VSALVAGE: /* TODO: Should this return an error or iterate? */
218		case VVOLEXISTS:
219		case VNOSERVICE:
220		case VONLINE:
221		case VDISKFULL:
222		case VOVERQUOTA:
223			fc->error = afs_abort_to_error(fc->ac.abort_code);
224			goto next_server;
225
226		case VOFFLINE:
227			if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) {
228				afs_busy(vnode->volume, fc->ac.abort_code);
229				clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
230			}
231			if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
232				fc->error = -EADV;
233				goto failed;
234			}
235			if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
236				fc->error = -ESTALE;
237				goto failed;
238			}
239			goto busy;
240
241		case VSALVAGING:
242		case VRESTARTING:
243		case VBUSY:
244			/* Retry after going round all the servers unless we
245			 * have a file lock we need to maintain.
246			 */
247			if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
248				fc->error = -EBUSY;
249				goto failed;
250			}
251			if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) {
252				afs_busy(vnode->volume, fc->ac.abort_code);
253				clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
254			}
255		busy:
256			if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
257				if (!afs_sleep_and_retry(fc))
258					goto failed;
259
260				 /* Retry with same server & address */
261				_leave(" = t [vbusy]");
262				return true;
263			}
264
265			fc->flags |= AFS_FS_CURSOR_VBUSY;
266			goto next_server;
267
268		case VMOVED:
269			/* The volume migrated to another server.  We consider
270			 * consider all locks and callbacks broken and request
271			 * an update from the VLDB.
272			 *
273			 * We also limit the number of VMOVED hops we will
274			 * honour, just in case someone sets up a loop.
275			 */
276			if (fc->flags & AFS_FS_CURSOR_VMOVED) {
277				fc->error = -EREMOTEIO;
278				goto failed;
279			}
280			fc->flags |= AFS_FS_CURSOR_VMOVED;
281
282			set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags);
283			set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
284			error = afs_check_volume_status(vnode->volume, fc->key);
285			if (error < 0)
286				goto failed_set_error;
287
288			/* If the server list didn't change, then the VLDB is
289			 * out of sync with the fileservers.  This is hopefully
290			 * a temporary condition, however, so we don't want to
291			 * permanently block access to the file.
292			 *
293			 * TODO: Try other fileservers if we can.
294			 *
295			 * TODO: Retry a few times with sleeps.
296			 */
297			if (vnode->volume->servers == fc->server_list) {
298				fc->error = -ENOMEDIUM;
299				goto failed;
300			}
301
302			goto restart_from_beginning;
303
304		default:
305			clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
306			clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
307			fc->error = afs_abort_to_error(fc->ac.abort_code);
308			goto failed;
309		}
310
311	case -ETIMEDOUT:
312	case -ETIME:
313		if (fc->error != -EDESTADDRREQ)
314			goto iterate_address;
315		/* Fall through */
316	case -ERFKILL:
317	case -EADDRNOTAVAIL:
318	case -ENETUNREACH:
319	case -EHOSTUNREACH:
320	case -EHOSTDOWN:
321	case -ECONNREFUSED:
322		_debug("no conn");
323		fc->error = error;
324		goto iterate_address;
325
 
 
 
 
326	case -ECONNRESET:
327		_debug("call reset");
328		fc->error = error;
329		goto failed;
330	}
331
332restart_from_beginning:
333	_debug("restart");
334	afs_end_cursor(&fc->ac);
335	afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
336	fc->cbi = NULL;
337	afs_put_serverlist(afs_v2net(vnode), fc->server_list);
338	fc->server_list = NULL;
339start:
340	_debug("start");
341	/* See if we need to do an update of the volume record.  Note that the
342	 * volume may have moved or even have been deleted.
343	 */
344	error = afs_check_volume_status(vnode->volume, fc->key);
345	if (error < 0)
346		goto failed_set_error;
347
348	if (!afs_start_fs_iteration(fc, vnode))
349		goto failed;
350
351	_debug("__ VOL %llx __", vnode->volume->vid);
352	error = afs_probe_fileservers(afs_v2net(vnode), fc->key, fc->server_list);
353	if (error < 0)
354		goto failed_set_error;
355
356pick_server:
357	_debug("pick [%lx]", fc->untried);
358
359	error = afs_wait_for_fs_probes(fc->server_list, fc->untried);
360	if (error < 0)
361		goto failed_set_error;
362
363	/* Pick the untried server with the lowest RTT.  If we have outstanding
364	 * callbacks, we stick with the server we're already using if we can.
365	 */
366	if (fc->cbi) {
367		_debug("cbi %u", fc->index);
368		if (test_bit(fc->index, &fc->untried))
369			goto selected_server;
370		afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
371		fc->cbi = NULL;
372		_debug("nocbi");
373	}
374
375	fc->index = -1;
376	rtt = U32_MAX;
377	for (i = 0; i < fc->server_list->nr_servers; i++) {
378		struct afs_server *s = fc->server_list->servers[i].server;
379
380		if (!test_bit(i, &fc->untried) || !s->probe.responded)
 
381			continue;
382		if (s->probe.rtt < rtt) {
383			fc->index = i;
384			rtt = s->probe.rtt;
385		}
386	}
387
388	if (fc->index == -1)
389		goto no_more_servers;
390
391selected_server:
392	_debug("use %d", fc->index);
393	__clear_bit(fc->index, &fc->untried);
394
395	/* We're starting on a different fileserver from the list.  We need to
396	 * check it, create a callback intercept, find its address list and
397	 * probe its capabilities before we use it.
398	 */
399	ASSERTCMP(fc->ac.alist, ==, NULL);
400	server = fc->server_list->servers[fc->index].server;
401
402	if (!afs_check_server_record(fc, server))
403		goto failed;
404
405	_debug("USING SERVER: %pU", &server->uuid);
406
407	/* Make sure we've got a callback interest record for this server.  We
408	 * have to link it in before we send the request as we can be sent a
409	 * break request before we've finished decoding the reply and
410	 * installing the vnode.
411	 */
412	error = afs_register_server_cb_interest(vnode, fc->server_list,
413						fc->index);
414	if (error < 0)
415		goto failed_set_error;
416
417	fc->cbi = afs_get_cb_interest(
418		rcu_dereference_protected(vnode->cb_interest,
419					  lockdep_is_held(&vnode->io_lock)));
420
421	read_lock(&server->fs_lock);
422	alist = rcu_dereference_protected(server->addresses,
423					  lockdep_is_held(&server->fs_lock));
424	afs_get_addrlist(alist);
425	read_unlock(&server->fs_lock);
426
427	memset(&fc->ac, 0, sizeof(fc->ac));
 
428
429	if (!fc->ac.alist)
430		fc->ac.alist = alist;
431	else
432		afs_put_addrlist(alist);
433
434	fc->ac.index = -1;
435
436iterate_address:
437	ASSERT(fc->ac.alist);
438	/* Iterate over the current server's address list to try and find an
439	 * address on which it will respond to us.
440	 */
441	if (!afs_iterate_addresses(&fc->ac))
442		goto next_server;
443
444	_debug("address [%u] %u/%u", fc->index, fc->ac.index, fc->ac.alist->nr_addrs);
 
 
445
446	_leave(" = t");
447	return true;
448
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449next_server:
450	_debug("next");
451	afs_end_cursor(&fc->ac);
452	goto pick_server;
453
454no_more_servers:
455	/* That's all the servers poked to no good effect.  Try again if some
456	 * of them were busy.
457	 */
458	if (fc->flags & AFS_FS_CURSOR_VBUSY)
459		goto restart_from_beginning;
460
461	e.error = -EDESTADDRREQ;
462	e.responded = false;
463	for (i = 0; i < fc->server_list->nr_servers; i++) {
464		struct afs_server *s = fc->server_list->servers[i].server;
465
466		afs_prioritise_error(&e, READ_ONCE(s->probe.error),
467				     s->probe.abort_code);
468	}
469
470	error = e.error;
471
472failed_set_error:
473	fc->error = error;
474failed:
475	fc->flags |= AFS_FS_CURSOR_STOP;
476	afs_end_cursor(&fc->ac);
477	_leave(" = f [failed %d]", fc->error);
478	return false;
479}
480
481/*
482 * Select the same fileserver we used for a vnode before and only that
483 * fileserver.  We use this when we have a lock on that file, which is backed
484 * only by the fileserver we obtained it from.
485 */
486bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
487{
488	struct afs_vnode *vnode = fc->vnode;
489	struct afs_cb_interest *cbi;
490	struct afs_addr_list *alist;
491	int error = fc->ac.error;
492
493	_enter("");
494
495	cbi = rcu_dereference_protected(vnode->cb_interest,
496					lockdep_is_held(&vnode->io_lock));
497
498	switch (error) {
499	case SHRT_MAX:
500		if (!cbi) {
501			fc->error = -ESTALE;
502			fc->flags |= AFS_FS_CURSOR_STOP;
503			return false;
504		}
505
506		fc->cbi = afs_get_cb_interest(cbi);
507
508		read_lock(&cbi->server->fs_lock);
509		alist = rcu_dereference_protected(cbi->server->addresses,
510						  lockdep_is_held(&cbi->server->fs_lock));
511		afs_get_addrlist(alist);
512		read_unlock(&cbi->server->fs_lock);
513		if (!alist) {
514			fc->error = -ESTALE;
515			fc->flags |= AFS_FS_CURSOR_STOP;
516			return false;
517		}
518
519		memset(&fc->ac, 0, sizeof(fc->ac));
520		fc->ac.alist = alist;
521		fc->ac.index = -1;
522		goto iterate_address;
523
524	case 0:
525	default:
526		/* Success or local failure.  Stop. */
527		fc->error = error;
528		fc->flags |= AFS_FS_CURSOR_STOP;
529		_leave(" = f [okay/local %d]", error);
530		return false;
531
532	case -ECONNABORTED:
533		fc->error = afs_abort_to_error(fc->ac.abort_code);
534		fc->flags |= AFS_FS_CURSOR_STOP;
535		_leave(" = f [abort]");
536		return false;
537
538	case -ERFKILL:
539	case -EADDRNOTAVAIL:
540	case -ENETUNREACH:
541	case -EHOSTUNREACH:
542	case -EHOSTDOWN:
543	case -ECONNREFUSED:
544	case -ETIMEDOUT:
545	case -ETIME:
546		_debug("no conn");
547		fc->error = error;
548		goto iterate_address;
549	}
550
551iterate_address:
552	/* Iterate over the current server's address list to try and find an
553	 * address on which it will respond to us.
554	 */
555	if (afs_iterate_addresses(&fc->ac)) {
556		_leave(" = t");
557		return true;
558	}
559
560	afs_end_cursor(&fc->ac);
561	return false;
562}
563
564/*
565 * Dump cursor state in the case of the error being EDESTADDRREQ.
566 */
567static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
568{
569	static int count;
570	int i;
571
572	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
573		return;
574	count++;
575
576	rcu_read_lock();
577
578	pr_notice("EDESTADDR occurred\n");
579	pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n",
580		  fc->cb_break, fc->cb_break_2, fc->flags, fc->error);
 
581	pr_notice("FC: ut=%lx ix=%d ni=%u\n",
582		  fc->untried, fc->index, fc->nr_iterations);
583
584	if (fc->server_list) {
585		const struct afs_server_list *sl = fc->server_list;
586		pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
587			  sl->nr_servers, sl->preferred, sl->vnovol_mask);
588		for (i = 0; i < sl->nr_servers; i++) {
589			const struct afs_server *s = sl->servers[i].server;
590			pr_notice("FC: server fl=%lx av=%u %pU\n",
591				  s->flags, s->addr_version, &s->uuid);
592			if (s->addresses) {
593				const struct afs_addr_list *a =
594					rcu_dereference(s->addresses);
595				pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
596					  a->version,
597					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
598					  a->preferred);
599				pr_notice("FC:  - pr=%lx R=%lx F=%lx\n",
600					  a->probed, a->responded, a->failed);
601				if (a == fc->ac.alist)
602					pr_notice("FC:  - current\n");
603			}
604		}
605	}
606
607	pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
608		  fc->ac.tried, fc->ac.index, fc->ac.abort_code, fc->ac.error,
609		  fc->ac.responded, fc->ac.nr_iterations);
610	rcu_read_unlock();
611}
612
613/*
614 * Tidy up a filesystem cursor and unlock the vnode.
615 */
616int afs_end_vnode_operation(struct afs_fs_cursor *fc)
617{
618	struct afs_net *net = afs_v2net(fc->vnode);
619
620	if (fc->error == -EDESTADDRREQ ||
621	    fc->error == -EADDRNOTAVAIL ||
622	    fc->error == -ENETUNREACH ||
623	    fc->error == -EHOSTUNREACH)
624		afs_dump_edestaddrreq(fc);
625
626	mutex_unlock(&fc->vnode->io_lock);
627
628	afs_end_cursor(&fc->ac);
629	afs_put_cb_interest(net, fc->cbi);
630	afs_put_serverlist(net, fc->server_list);
631
632	if (fc->error == -ECONNABORTED)
633		fc->error = afs_abort_to_error(fc->ac.abort_code);
634
635	return fc->error;
636}