Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.10.11.
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/* Handle fileserver selection and rotation.
  3 *
  4 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
  5 * Written by David Howells (dhowells@redhat.com)
  6 */
  7
  8#include <linux/kernel.h>
  9#include <linux/slab.h>
 10#include <linux/fs.h>
 11#include <linux/sched.h>
 12#include <linux/delay.h>
 13#include <linux/sched/signal.h>
 14#include "internal.h"
 15#include "afs_fs.h"
 16
 17/*
 18 * Begin iteration through a server list, starting with the vnode's last used
 19 * server if possible, or the last recorded good server if not.
 20 */
 21static bool afs_start_fs_iteration(struct afs_operation *op,
 22				   struct afs_vnode *vnode)
 23{
 24	struct afs_server *server;
 25	void *cb_server;
 26	int i;
 27
 28	read_lock(&op->volume->servers_lock);
 29	op->server_list = afs_get_serverlist(
 30		rcu_dereference_protected(op->volume->servers,
 31					  lockdep_is_held(&op->volume->servers_lock)));
 32	read_unlock(&op->volume->servers_lock);
 33
 34	op->untried = (1UL << op->server_list->nr_servers) - 1;
 35	op->index = READ_ONCE(op->server_list->preferred);
 36
 37	cb_server = vnode->cb_server;
 38	if (cb_server) {
 39		/* See if the vnode's preferred record is still available */
 40		for (i = 0; i < op->server_list->nr_servers; i++) {
 41			server = op->server_list->servers[i].server;
 42			if (server == cb_server) {
 43				op->index = i;
 44				goto found_interest;
 45			}
 46		}
 47
 48		/* If we have a lock outstanding on a server that's no longer
 49		 * serving this vnode, then we can't switch to another server
 50		 * and have to return an error.
 51		 */
 52		if (op->flags & AFS_OPERATION_CUR_ONLY) {
 53			op->error = -ESTALE;
 54			return false;
 55		}
 56
 57		/* Note that the callback promise is effectively broken */
 58		write_seqlock(&vnode->cb_lock);
 59		ASSERTCMP(cb_server, ==, vnode->cb_server);
 60		vnode->cb_server = NULL;
 61		if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
 62			vnode->cb_break++;
 63		write_sequnlock(&vnode->cb_lock);
 64	}
 65
 66found_interest:
 67	return true;
 68}
 69
 70/*
 71 * Post volume busy note.
 72 */
 73static void afs_busy(struct afs_volume *volume, u32 abort_code)
 74{
 75	const char *m;
 76
 77	switch (abort_code) {
 78	case VOFFLINE:		m = "offline";		break;
 79	case VRESTARTING:	m = "restarting";	break;
 80	case VSALVAGING:	m = "being salvaged";	break;
 81	default:		m = "busy";		break;
 82	}
 83
 84	pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
 85}
 86
 87/*
 88 * Sleep and retry the operation to the same fileserver.
 89 */
 90static bool afs_sleep_and_retry(struct afs_operation *op)
 91{
 92	if (!(op->flags & AFS_OPERATION_UNINTR)) {
 93		msleep_interruptible(1000);
 94		if (signal_pending(current)) {
 95			op->error = -ERESTARTSYS;
 96			return false;
 97		}
 98	} else {
 99		msleep(1000);
100	}
101
102	return true;
103}
104
105/*
106 * Select the fileserver to use.  May be called multiple times to rotate
107 * through the fileservers.
108 */
109bool afs_select_fileserver(struct afs_operation *op)
110{
111	struct afs_addr_list *alist;
112	struct afs_server *server;
113	struct afs_vnode *vnode = op->file[0].vnode;
114	struct afs_error e;
115	u32 rtt;
116	int error = op->ac.error, i;
117
118	_enter("%lx[%d],%lx[%d],%d,%d",
119	       op->untried, op->index,
120	       op->ac.tried, op->ac.index,
121	       error, op->ac.abort_code);
122
123	if (op->flags & AFS_OPERATION_STOP) {
124		_leave(" = f [stopped]");
125		return false;
126	}
127
128	op->nr_iterations++;
129
130	/* Evaluate the result of the previous operation, if there was one. */
131	switch (error) {
132	case SHRT_MAX:
133		goto start;
134
135	case 0:
136	default:
137		/* Success or local failure.  Stop. */
138		op->error = error;
139		op->flags |= AFS_OPERATION_STOP;
140		_leave(" = f [okay/local %d]", error);
141		return false;
142
143	case -ECONNABORTED:
144		/* The far side rejected the operation on some grounds.  This
145		 * might involve the server being busy or the volume having been moved.
146		 */
147		switch (op->ac.abort_code) {
148		case VNOVOL:
149			/* This fileserver doesn't know about the volume.
150			 * - May indicate that the VL is wrong - retry once and compare
151			 *   the results.
152			 * - May indicate that the fileserver couldn't attach to the vol.
153			 */
154			if (op->flags & AFS_OPERATION_VNOVOL) {
155				op->error = -EREMOTEIO;
156				goto next_server;
157			}
158
159			write_lock(&op->volume->servers_lock);
160			op->server_list->vnovol_mask |= 1 << op->index;
161			write_unlock(&op->volume->servers_lock);
162
163			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
164			error = afs_check_volume_status(op->volume, op);
165			if (error < 0)
166				goto failed_set_error;
167
168			if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
169				op->error = -ENOMEDIUM;
170				goto failed;
171			}
172
173			/* If the server list didn't change, then assume that
174			 * it's the fileserver having trouble.
175			 */
176			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
177				op->error = -EREMOTEIO;
178				goto next_server;
179			}
180
181			/* Try again */
182			op->flags |= AFS_OPERATION_VNOVOL;
183			_leave(" = t [vnovol]");
184			return true;
185
186		case VSALVAGE: /* TODO: Should this return an error or iterate? */
187		case VVOLEXISTS:
188		case VNOSERVICE:
189		case VONLINE:
190		case VDISKFULL:
191		case VOVERQUOTA:
192			op->error = afs_abort_to_error(op->ac.abort_code);
193			goto next_server;
194
195		case VOFFLINE:
196			if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) {
197				afs_busy(op->volume, op->ac.abort_code);
198				clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
199			}
200			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
201				op->error = -EADV;
202				goto failed;
203			}
204			if (op->flags & AFS_OPERATION_CUR_ONLY) {
205				op->error = -ESTALE;
206				goto failed;
207			}
208			goto busy;
209
210		case VSALVAGING:
211		case VRESTARTING:
212		case VBUSY:
213			/* Retry after going round all the servers unless we
214			 * have a file lock we need to maintain.
215			 */
216			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
217				op->error = -EBUSY;
218				goto failed;
219			}
220			if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) {
221				afs_busy(op->volume, op->ac.abort_code);
222				clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
223			}
224		busy:
225			if (op->flags & AFS_OPERATION_CUR_ONLY) {
226				if (!afs_sleep_and_retry(op))
227					goto failed;
228
229				 /* Retry with same server & address */
230				_leave(" = t [vbusy]");
231				return true;
232			}
233
234			op->flags |= AFS_OPERATION_VBUSY;
235			goto next_server;
236
237		case VMOVED:
238			/* The volume migrated to another server.  We consider
239			 * consider all locks and callbacks broken and request
240			 * an update from the VLDB.
241			 *
242			 * We also limit the number of VMOVED hops we will
243			 * honour, just in case someone sets up a loop.
244			 */
245			if (op->flags & AFS_OPERATION_VMOVED) {
246				op->error = -EREMOTEIO;
247				goto failed;
248			}
249			op->flags |= AFS_OPERATION_VMOVED;
250
251			set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
252			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
253			error = afs_check_volume_status(op->volume, op);
254			if (error < 0)
255				goto failed_set_error;
256
257			/* If the server list didn't change, then the VLDB is
258			 * out of sync with the fileservers.  This is hopefully
259			 * a temporary condition, however, so we don't want to
260			 * permanently block access to the file.
261			 *
262			 * TODO: Try other fileservers if we can.
263			 *
264			 * TODO: Retry a few times with sleeps.
265			 */
266			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
267				op->error = -ENOMEDIUM;
268				goto failed;
269			}
270
271			goto restart_from_beginning;
272
273		default:
274			clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
275			clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
276			op->error = afs_abort_to_error(op->ac.abort_code);
277			goto failed;
278		}
279
280	case -ETIMEDOUT:
281	case -ETIME:
282		if (op->error != -EDESTADDRREQ)
283			goto iterate_address;
284		fallthrough;
285	case -ERFKILL:
286	case -EADDRNOTAVAIL:
287	case -ENETUNREACH:
288	case -EHOSTUNREACH:
289	case -EHOSTDOWN:
290	case -ECONNREFUSED:
291		_debug("no conn");
292		op->error = error;
293		goto iterate_address;
294
295	case -ENETRESET:
296		pr_warn("kAFS: Peer reset %s (op=%x)\n",
297			op->type ? op->type->name : "???", op->debug_id);
298		fallthrough;
299	case -ECONNRESET:
300		_debug("call reset");
301		op->error = error;
302		goto failed;
303	}
304
305restart_from_beginning:
306	_debug("restart");
307	afs_end_cursor(&op->ac);
308	op->server = NULL;
309	afs_put_serverlist(op->net, op->server_list);
310	op->server_list = NULL;
311start:
312	_debug("start");
313	/* See if we need to do an update of the volume record.  Note that the
314	 * volume may have moved or even have been deleted.
315	 */
316	error = afs_check_volume_status(op->volume, op);
317	if (error < 0)
318		goto failed_set_error;
319
320	if (!afs_start_fs_iteration(op, vnode))
321		goto failed;
322
323	_debug("__ VOL %llx __", op->volume->vid);
324
325pick_server:
326	_debug("pick [%lx]", op->untried);
327
328	error = afs_wait_for_fs_probes(op->server_list, op->untried);
329	if (error < 0)
330		goto failed_set_error;
331
332	/* Pick the untried server with the lowest RTT.  If we have outstanding
333	 * callbacks, we stick with the server we're already using if we can.
334	 */
335	if (op->server) {
336		_debug("server %u", op->index);
337		if (test_bit(op->index, &op->untried))
338			goto selected_server;
339		op->server = NULL;
340		_debug("no server");
341	}
342
343	op->index = -1;
344	rtt = U32_MAX;
345	for (i = 0; i < op->server_list->nr_servers; i++) {
346		struct afs_server *s = op->server_list->servers[i].server;
347
348		if (!test_bit(i, &op->untried) ||
349		    !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
350			continue;
351		if (s->probe.rtt < rtt) {
352			op->index = i;
353			rtt = s->probe.rtt;
354		}
355	}
356
357	if (op->index == -1)
358		goto no_more_servers;
359
360selected_server:
361	_debug("use %d", op->index);
362	__clear_bit(op->index, &op->untried);
363
364	/* We're starting on a different fileserver from the list.  We need to
365	 * check it, create a callback intercept, find its address list and
366	 * probe its capabilities before we use it.
367	 */
368	ASSERTCMP(op->ac.alist, ==, NULL);
369	server = op->server_list->servers[op->index].server;
370
371	if (!afs_check_server_record(op, server))
372		goto failed;
373
374	_debug("USING SERVER: %pU", &server->uuid);
375
376	op->flags |= AFS_OPERATION_RETRY_SERVER;
377	op->server = server;
378	if (vnode->cb_server != server) {
379		vnode->cb_server = server;
380		vnode->cb_s_break = server->cb_s_break;
381		vnode->cb_fs_s_break = atomic_read(&server->cell->fs_s_break);
382		vnode->cb_v_break = vnode->volume->cb_v_break;
383		clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
384	}
385
386	read_lock(&server->fs_lock);
387	alist = rcu_dereference_protected(server->addresses,
388					  lockdep_is_held(&server->fs_lock));
389	afs_get_addrlist(alist);
390	read_unlock(&server->fs_lock);
391
392retry_server:
393	memset(&op->ac, 0, sizeof(op->ac));
394
395	if (!op->ac.alist)
396		op->ac.alist = alist;
397	else
398		afs_put_addrlist(alist);
399
400	op->ac.index = -1;
401
402iterate_address:
403	ASSERT(op->ac.alist);
404	/* Iterate over the current server's address list to try and find an
405	 * address on which it will respond to us.
406	 */
407	if (!afs_iterate_addresses(&op->ac))
408		goto out_of_addresses;
409
410	_debug("address [%u] %u/%u %pISp",
411	       op->index, op->ac.index, op->ac.alist->nr_addrs,
412	       &op->ac.alist->addrs[op->ac.index].transport);
413
414	_leave(" = t");
415	return true;
416
417out_of_addresses:
418	/* We've now had a failure to respond on all of a server's addresses -
419	 * immediately probe them again and consider retrying the server.
420	 */
421	afs_probe_fileserver(op->net, op->server);
422	if (op->flags & AFS_OPERATION_RETRY_SERVER) {
423		alist = op->ac.alist;
424		error = afs_wait_for_one_fs_probe(
425			op->server, !(op->flags & AFS_OPERATION_UNINTR));
426		switch (error) {
427		case 0:
428			op->flags &= ~AFS_OPERATION_RETRY_SERVER;
429			goto retry_server;
430		case -ERESTARTSYS:
431			goto failed_set_error;
432		case -ETIME:
433		case -EDESTADDRREQ:
434			goto next_server;
435		}
436	}
437
438next_server:
439	_debug("next");
440	afs_end_cursor(&op->ac);
441	goto pick_server;
442
443no_more_servers:
444	/* That's all the servers poked to no good effect.  Try again if some
445	 * of them were busy.
446	 */
447	if (op->flags & AFS_OPERATION_VBUSY)
448		goto restart_from_beginning;
449
450	e.error = -EDESTADDRREQ;
451	e.responded = false;
452	for (i = 0; i < op->server_list->nr_servers; i++) {
453		struct afs_server *s = op->server_list->servers[i].server;
454
455		afs_prioritise_error(&e, READ_ONCE(s->probe.error),
456				     s->probe.abort_code);
457	}
458
459	error = e.error;
460
461failed_set_error:
462	op->error = error;
463failed:
464	op->flags |= AFS_OPERATION_STOP;
465	afs_end_cursor(&op->ac);
466	_leave(" = f [failed %d]", op->error);
467	return false;
468}
469
470/*
471 * Dump cursor state in the case of the error being EDESTADDRREQ.
472 */
473void afs_dump_edestaddrreq(const struct afs_operation *op)
474{
475	static int count;
476	int i;
477
478	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
479		return;
480	count++;
481
482	rcu_read_lock();
483
484	pr_notice("EDESTADDR occurred\n");
485	pr_notice("FC: cbb=%x cbb2=%x fl=%x err=%hd\n",
486		  op->file[0].cb_break_before,
487		  op->file[1].cb_break_before, op->flags, op->error);
488	pr_notice("FC: ut=%lx ix=%d ni=%u\n",
489		  op->untried, op->index, op->nr_iterations);
490
491	if (op->server_list) {
492		const struct afs_server_list *sl = op->server_list;
493		pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
494			  sl->nr_servers, sl->preferred, sl->vnovol_mask);
495		for (i = 0; i < sl->nr_servers; i++) {
496			const struct afs_server *s = sl->servers[i].server;
497			pr_notice("FC: server fl=%lx av=%u %pU\n",
498				  s->flags, s->addr_version, &s->uuid);
499			if (s->addresses) {
500				const struct afs_addr_list *a =
501					rcu_dereference(s->addresses);
502				pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
503					  a->version,
504					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
505					  a->preferred);
506				pr_notice("FC:  - R=%lx F=%lx\n",
507					  a->responded, a->failed);
508				if (a == op->ac.alist)
509					pr_notice("FC:  - current\n");
510			}
511		}
512	}
513
514	pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
515		  op->ac.tried, op->ac.index, op->ac.abort_code, op->ac.error,
516		  op->ac.responded, op->ac.nr_iterations);
517	rcu_read_unlock();
518}