Loading...
1/* AFS volume management
2 *
3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/slab.h>
14#include "internal.h"
15
16unsigned __read_mostly afs_volume_gc_delay = 10;
17unsigned __read_mostly afs_volume_record_life = 60 * 60;
18
19static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" };
20
21/*
22 * Allocate a volume record and load it up from a vldb record.
23 */
24static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params,
25 struct afs_vldb_entry *vldb,
26 unsigned long type_mask)
27{
28 struct afs_server_list *slist;
29 struct afs_volume *volume;
30 int ret = -ENOMEM, nr_servers = 0, i;
31
32 for (i = 0; i < vldb->nr_servers; i++)
33 if (vldb->fs_mask[i] & type_mask)
34 nr_servers++;
35
36 volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
37 if (!volume)
38 goto error_0;
39
40 volume->vid = vldb->vid[params->type];
41 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
42 volume->cell = afs_get_cell(params->cell);
43 volume->type = params->type;
44 volume->type_force = params->force;
45 volume->name_len = vldb->name_len;
46
47 atomic_set(&volume->usage, 1);
48 INIT_LIST_HEAD(&volume->proc_link);
49 rwlock_init(&volume->servers_lock);
50 memcpy(volume->name, vldb->name, vldb->name_len + 1);
51
52 slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
53 if (IS_ERR(slist)) {
54 ret = PTR_ERR(slist);
55 goto error_1;
56 }
57
58 refcount_set(&slist->usage, 1);
59 volume->servers = slist;
60 return volume;
61
62error_1:
63 afs_put_cell(params->net, volume->cell);
64 kfree(volume);
65error_0:
66 return ERR_PTR(ret);
67}
68
69/*
70 * Look up a VLDB record for a volume.
71 */
72static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
73 struct key *key,
74 const char *volname,
75 size_t volnamesz)
76{
77 struct afs_addr_cursor ac;
78 struct afs_vldb_entry *vldb;
79 int ret;
80
81 ret = afs_set_vl_cursor(&ac, cell);
82 if (ret < 0)
83 return ERR_PTR(ret);
84
85 while (afs_iterate_addresses(&ac)) {
86 if (!test_bit(ac.index, &ac.alist->probed)) {
87 ret = afs_vl_get_capabilities(cell->net, &ac, key);
88 switch (ret) {
89 case VL_SERVICE:
90 clear_bit(ac.index, &ac.alist->yfs);
91 set_bit(ac.index, &ac.alist->probed);
92 ac.addr->srx_service = ret;
93 break;
94 case YFS_VL_SERVICE:
95 set_bit(ac.index, &ac.alist->yfs);
96 set_bit(ac.index, &ac.alist->probed);
97 ac.addr->srx_service = ret;
98 break;
99 }
100 }
101
102 vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key,
103 volname, volnamesz);
104 switch (ac.error) {
105 case 0:
106 afs_end_cursor(&ac);
107 return vldb;
108 case -ECONNABORTED:
109 ac.error = afs_abort_to_error(ac.abort_code);
110 goto error;
111 case -ENOMEM:
112 case -ENONET:
113 goto error;
114 case -ENETUNREACH:
115 case -EHOSTUNREACH:
116 case -ECONNREFUSED:
117 break;
118 default:
119 ac.error = -EIO;
120 goto error;
121 }
122 }
123
124error:
125 return ERR_PTR(afs_end_cursor(&ac));
126}
127
128/*
129 * Look up a volume in the VL server and create a candidate volume record for
130 * it.
131 *
132 * The volume name can be one of the following:
133 * "%[cell:]volume[.]" R/W volume
134 * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
135 * or R/W (rwparent=1) volume
136 * "%[cell:]volume.readonly" R/O volume
137 * "#[cell:]volume.readonly" R/O volume
138 * "%[cell:]volume.backup" Backup volume
139 * "#[cell:]volume.backup" Backup volume
140 *
141 * The cell name is optional, and defaults to the current cell.
142 *
143 * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
144 * Guide
145 * - Rule 1: Explicit type suffix forces access of that type or nothing
146 * (no suffix, then use Rule 2 & 3)
147 * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
148 * if not available
149 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
150 * explicitly told otherwise
151 */
152struct afs_volume *afs_create_volume(struct afs_mount_params *params)
153{
154 struct afs_vldb_entry *vldb;
155 struct afs_volume *volume;
156 unsigned long type_mask = 1UL << params->type;
157
158 vldb = afs_vl_lookup_vldb(params->cell, params->key,
159 params->volname, params->volnamesz);
160 if (IS_ERR(vldb))
161 return ERR_CAST(vldb);
162
163 if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) {
164 volume = ERR_PTR(vldb->error);
165 goto error;
166 }
167
168 /* Make the final decision on the type we want */
169 volume = ERR_PTR(-ENOMEDIUM);
170 if (params->force) {
171 if (!(vldb->flags & type_mask))
172 goto error;
173 } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) {
174 params->type = AFSVL_ROVOL;
175 } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) {
176 params->type = AFSVL_RWVOL;
177 } else {
178 goto error;
179 }
180
181 type_mask = 1UL << params->type;
182 volume = afs_alloc_volume(params, vldb, type_mask);
183
184error:
185 kfree(vldb);
186 return volume;
187}
188
189/*
190 * Destroy a volume record
191 */
192static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
193{
194 _enter("%p", volume);
195
196#ifdef CONFIG_AFS_FSCACHE
197 ASSERTCMP(volume->cache, ==, NULL);
198#endif
199
200 afs_put_serverlist(net, volume->servers);
201 afs_put_cell(net, volume->cell);
202 kfree(volume);
203
204 _leave(" [destroyed]");
205}
206
207/*
208 * Drop a reference on a volume record.
209 */
210void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume)
211{
212 if (volume) {
213 _enter("%s", volume->name);
214
215 if (atomic_dec_and_test(&volume->usage))
216 afs_destroy_volume(cell->net, volume);
217 }
218}
219
220/*
221 * Activate a volume.
222 */
223void afs_activate_volume(struct afs_volume *volume)
224{
225#ifdef CONFIG_AFS_FSCACHE
226 volume->cache = fscache_acquire_cookie(volume->cell->cache,
227 &afs_volume_cache_index_def,
228 &volume->vid, sizeof(volume->vid),
229 NULL, 0,
230 volume, 0, true);
231#endif
232
233 write_lock(&volume->cell->proc_lock);
234 list_add_tail(&volume->proc_link, &volume->cell->proc_volumes);
235 write_unlock(&volume->cell->proc_lock);
236}
237
238/*
239 * Deactivate a volume.
240 */
241void afs_deactivate_volume(struct afs_volume *volume)
242{
243 _enter("%s", volume->name);
244
245 write_lock(&volume->cell->proc_lock);
246 list_del_init(&volume->proc_link);
247 write_unlock(&volume->cell->proc_lock);
248
249#ifdef CONFIG_AFS_FSCACHE
250 fscache_relinquish_cookie(volume->cache, NULL,
251 test_bit(AFS_VOLUME_DELETED, &volume->flags));
252 volume->cache = NULL;
253#endif
254
255 _leave("");
256}
257
258/*
259 * Query the VL service to update the volume status.
260 */
261static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
262{
263 struct afs_server_list *new, *old, *discard;
264 struct afs_vldb_entry *vldb;
265 char idbuf[16];
266 int ret, idsz;
267
268 _enter("");
269
270 /* We look up an ID by passing it as a decimal string in the
271 * operation's name parameter.
272 */
273 idsz = sprintf(idbuf, "%u", volume->vid);
274
275 vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
276 if (IS_ERR(vldb)) {
277 ret = PTR_ERR(vldb);
278 goto error;
279 }
280
281 /* See if the volume got renamed. */
282 if (vldb->name_len != volume->name_len ||
283 memcmp(vldb->name, volume->name, vldb->name_len) != 0) {
284 /* TODO: Use RCU'd string. */
285 memcpy(volume->name, vldb->name, AFS_MAXVOLNAME);
286 volume->name_len = vldb->name_len;
287 }
288
289 /* See if the volume's server list got updated. */
290 new = afs_alloc_server_list(volume->cell, key,
291 vldb, (1 << volume->type));
292 if (IS_ERR(new)) {
293 ret = PTR_ERR(new);
294 goto error_vldb;
295 }
296
297 write_lock(&volume->servers_lock);
298
299 discard = new;
300 old = volume->servers;
301 if (afs_annotate_server_list(new, old)) {
302 new->seq = volume->servers_seq + 1;
303 volume->servers = new;
304 smp_wmb();
305 volume->servers_seq++;
306 discard = old;
307 }
308
309 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
310 clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
311 write_unlock(&volume->servers_lock);
312 ret = 0;
313
314 afs_put_serverlist(volume->cell->net, discard);
315error_vldb:
316 kfree(vldb);
317error:
318 _leave(" = %d", ret);
319 return ret;
320}
321
322/*
323 * Make sure the volume record is up to date.
324 */
325int afs_check_volume_status(struct afs_volume *volume, struct key *key)
326{
327 time64_t now = ktime_get_real_seconds();
328 int ret, retries = 0;
329
330 _enter("");
331
332 if (volume->update_at <= now)
333 set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
334
335retry:
336 if (!test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags) &&
337 !test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
338 _leave(" = 0");
339 return 0;
340 }
341
342 if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) {
343 ret = afs_update_volume_status(volume, key);
344 clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags);
345 clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags);
346 wake_up_bit(&volume->flags, AFS_VOLUME_WAIT);
347 _leave(" = %d", ret);
348 return ret;
349 }
350
351 if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
352 _leave(" = 0 [no wait]");
353 return 0;
354 }
355
356 ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, TASK_INTERRUPTIBLE);
357 if (ret == -ERESTARTSYS) {
358 _leave(" = %d", ret);
359 return ret;
360 }
361
362 retries++;
363 if (retries == 4) {
364 _leave(" = -ESTALE");
365 return -ESTALE;
366 }
367 goto retry;
368}
1/* AFS volume management
2 *
3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/slab.h>
16#include <linux/fs.h>
17#include <linux/pagemap.h>
18#include <linux/sched.h>
19#include "internal.h"
20
21static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
22
23/*
24 * lookup a volume by name
25 * - this can be one of the following:
26 * "%[cell:]volume[.]" R/W volume
27 * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
28 * or R/W (rwparent=1) volume
29 * "%[cell:]volume.readonly" R/O volume
30 * "#[cell:]volume.readonly" R/O volume
31 * "%[cell:]volume.backup" Backup volume
32 * "#[cell:]volume.backup" Backup volume
33 *
34 * The cell name is optional, and defaults to the current cell.
35 *
36 * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
37 * Guide
38 * - Rule 1: Explicit type suffix forces access of that type or nothing
39 * (no suffix, then use Rule 2 & 3)
40 * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
41 * if not available
42 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
43 * explicitly told otherwise
44 */
45struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
46{
47 struct afs_vlocation *vlocation = NULL;
48 struct afs_volume *volume = NULL;
49 struct afs_server *server = NULL;
50 char srvtmask;
51 int ret, loop;
52
53 _enter("{%*.*s,%d}",
54 params->volnamesz, params->volnamesz, params->volname, params->rwpath);
55
56 /* lookup the volume location record */
57 vlocation = afs_vlocation_lookup(params->cell, params->key,
58 params->volname, params->volnamesz);
59 if (IS_ERR(vlocation)) {
60 ret = PTR_ERR(vlocation);
61 vlocation = NULL;
62 goto error;
63 }
64
65 /* make the final decision on the type we want */
66 ret = -ENOMEDIUM;
67 if (params->force && !(vlocation->vldb.vidmask & (1 << params->type)))
68 goto error;
69
70 srvtmask = 0;
71 for (loop = 0; loop < vlocation->vldb.nservers; loop++)
72 srvtmask |= vlocation->vldb.srvtmask[loop];
73
74 if (params->force) {
75 if (!(srvtmask & (1 << params->type)))
76 goto error;
77 } else if (srvtmask & AFS_VOL_VTM_RO) {
78 params->type = AFSVL_ROVOL;
79 } else if (srvtmask & AFS_VOL_VTM_RW) {
80 params->type = AFSVL_RWVOL;
81 } else {
82 goto error;
83 }
84
85 down_write(¶ms->cell->vl_sem);
86
87 /* is the volume already active? */
88 if (vlocation->vols[params->type]) {
89 /* yes - re-use it */
90 volume = vlocation->vols[params->type];
91 afs_get_volume(volume);
92 goto success;
93 }
94
95 /* create a new volume record */
96 _debug("creating new volume record");
97
98 ret = -ENOMEM;
99 volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
100 if (!volume)
101 goto error_up;
102
103 atomic_set(&volume->usage, 1);
104 volume->type = params->type;
105 volume->type_force = params->force;
106 volume->cell = params->cell;
107 volume->vid = vlocation->vldb.vid[params->type];
108
109 ret = bdi_setup_and_register(&volume->bdi, "afs");
110 if (ret)
111 goto error_bdi;
112
113 init_rwsem(&volume->server_sem);
114
115 /* look up all the applicable server records */
116 for (loop = 0; loop < 8; loop++) {
117 if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
118 server = afs_lookup_server(
119 volume->cell, &vlocation->vldb.servers[loop]);
120 if (IS_ERR(server)) {
121 ret = PTR_ERR(server);
122 goto error_discard;
123 }
124
125 volume->servers[volume->nservers] = server;
126 volume->nservers++;
127 }
128 }
129
130 /* attach the cache and volume location */
131#ifdef CONFIG_AFS_FSCACHE
132 volume->cache = fscache_acquire_cookie(vlocation->cache,
133 &afs_volume_cache_index_def,
134 volume, true);
135#endif
136 afs_get_vlocation(vlocation);
137 volume->vlocation = vlocation;
138
139 vlocation->vols[volume->type] = volume;
140
141success:
142 _debug("kAFS selected %s volume %08x",
143 afs_voltypes[volume->type], volume->vid);
144 up_write(¶ms->cell->vl_sem);
145 afs_put_vlocation(vlocation);
146 _leave(" = %p", volume);
147 return volume;
148
149 /* clean up */
150error_up:
151 up_write(¶ms->cell->vl_sem);
152error:
153 afs_put_vlocation(vlocation);
154 _leave(" = %d", ret);
155 return ERR_PTR(ret);
156
157error_discard:
158 bdi_destroy(&volume->bdi);
159error_bdi:
160 up_write(¶ms->cell->vl_sem);
161
162 for (loop = volume->nservers - 1; loop >= 0; loop--)
163 afs_put_server(volume->servers[loop]);
164
165 kfree(volume);
166 goto error;
167}
168
169/*
170 * destroy a volume record
171 */
172void afs_put_volume(struct afs_volume *volume)
173{
174 struct afs_vlocation *vlocation;
175 int loop;
176
177 if (!volume)
178 return;
179
180 _enter("%p", volume);
181
182 ASSERTCMP(atomic_read(&volume->usage), >, 0);
183
184 vlocation = volume->vlocation;
185
186 /* to prevent a race, the decrement and the dequeue must be effectively
187 * atomic */
188 down_write(&vlocation->cell->vl_sem);
189
190 if (likely(!atomic_dec_and_test(&volume->usage))) {
191 up_write(&vlocation->cell->vl_sem);
192 _leave("");
193 return;
194 }
195
196 vlocation->vols[volume->type] = NULL;
197
198 up_write(&vlocation->cell->vl_sem);
199
200 /* finish cleaning up the volume */
201#ifdef CONFIG_AFS_FSCACHE
202 fscache_relinquish_cookie(volume->cache, 0);
203#endif
204 afs_put_vlocation(vlocation);
205
206 for (loop = volume->nservers - 1; loop >= 0; loop--)
207 afs_put_server(volume->servers[loop]);
208
209 bdi_destroy(&volume->bdi);
210 kfree(volume);
211
212 _leave(" [destroyed]");
213}
214
215/*
216 * pick a server to use to try accessing this volume
217 * - returns with an elevated usage count on the server chosen
218 */
219struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
220{
221 struct afs_volume *volume = vnode->volume;
222 struct afs_server *server;
223 int ret, state, loop;
224
225 _enter("%s", volume->vlocation->vldb.name);
226
227 /* stick with the server we're already using if we can */
228 if (vnode->server && vnode->server->fs_state == 0) {
229 afs_get_server(vnode->server);
230 _leave(" = %p [current]", vnode->server);
231 return vnode->server;
232 }
233
234 down_read(&volume->server_sem);
235
236 /* handle the no-server case */
237 if (volume->nservers == 0) {
238 ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
239 up_read(&volume->server_sem);
240 _leave(" = %d [no servers]", ret);
241 return ERR_PTR(ret);
242 }
243
244 /* basically, just search the list for the first live server and use
245 * that */
246 ret = 0;
247 for (loop = 0; loop < volume->nservers; loop++) {
248 server = volume->servers[loop];
249 state = server->fs_state;
250
251 _debug("consider %d [%d]", loop, state);
252
253 switch (state) {
254 /* found an apparently healthy server */
255 case 0:
256 afs_get_server(server);
257 up_read(&volume->server_sem);
258 _leave(" = %p (picked %08x)",
259 server, ntohl(server->addr.s_addr));
260 return server;
261
262 case -ENETUNREACH:
263 if (ret == 0)
264 ret = state;
265 break;
266
267 case -EHOSTUNREACH:
268 if (ret == 0 ||
269 ret == -ENETUNREACH)
270 ret = state;
271 break;
272
273 case -ECONNREFUSED:
274 if (ret == 0 ||
275 ret == -ENETUNREACH ||
276 ret == -EHOSTUNREACH)
277 ret = state;
278 break;
279
280 default:
281 case -EREMOTEIO:
282 if (ret == 0 ||
283 ret == -ENETUNREACH ||
284 ret == -EHOSTUNREACH ||
285 ret == -ECONNREFUSED)
286 ret = state;
287 break;
288 }
289 }
290
291 /* no available servers
292 * - TODO: handle the no active servers case better
293 */
294 up_read(&volume->server_sem);
295 _leave(" = %d", ret);
296 return ERR_PTR(ret);
297}
298
299/*
300 * release a server after use
301 * - releases the ref on the server struct that was acquired by picking
302 * - records result of using a particular server to access a volume
303 * - return 0 to try again, 1 if okay or to issue error
304 * - the caller must release the server struct if result was 0
305 */
306int afs_volume_release_fileserver(struct afs_vnode *vnode,
307 struct afs_server *server,
308 int result)
309{
310 struct afs_volume *volume = vnode->volume;
311 unsigned loop;
312
313 _enter("%s,%08x,%d",
314 volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
315 result);
316
317 switch (result) {
318 /* success */
319 case 0:
320 server->fs_act_jif = jiffies;
321 server->fs_state = 0;
322 _leave("");
323 return 1;
324
325 /* the fileserver denied all knowledge of the volume */
326 case -ENOMEDIUM:
327 server->fs_act_jif = jiffies;
328 down_write(&volume->server_sem);
329
330 /* firstly, find where the server is in the active list (if it
331 * is) */
332 for (loop = 0; loop < volume->nservers; loop++)
333 if (volume->servers[loop] == server)
334 goto present;
335
336 /* no longer there - may have been discarded by another op */
337 goto try_next_server_upw;
338
339 present:
340 volume->nservers--;
341 memmove(&volume->servers[loop],
342 &volume->servers[loop + 1],
343 sizeof(volume->servers[loop]) *
344 (volume->nservers - loop));
345 volume->servers[volume->nservers] = NULL;
346 afs_put_server(server);
347 volume->rjservers++;
348
349 if (volume->nservers > 0)
350 /* another server might acknowledge its existence */
351 goto try_next_server_upw;
352
353 /* handle the case where all the fileservers have rejected the
354 * volume
355 * - TODO: try asking the fileservers for volume information
356 * - TODO: contact the VL server again to see if the volume is
357 * no longer registered
358 */
359 up_write(&volume->server_sem);
360 afs_put_server(server);
361 _leave(" [completely rejected]");
362 return 1;
363
364 /* problem reaching the server */
365 case -ENETUNREACH:
366 case -EHOSTUNREACH:
367 case -ECONNREFUSED:
368 case -ETIME:
369 case -ETIMEDOUT:
370 case -EREMOTEIO:
371 /* mark the server as dead
372 * TODO: vary dead timeout depending on error
373 */
374 spin_lock(&server->fs_lock);
375 if (!server->fs_state) {
376 server->fs_dead_jif = jiffies + HZ * 10;
377 server->fs_state = result;
378 printk("kAFS: SERVER DEAD state=%d\n", result);
379 }
380 spin_unlock(&server->fs_lock);
381 goto try_next_server;
382
383 /* miscellaneous error */
384 default:
385 server->fs_act_jif = jiffies;
386 case -ENOMEM:
387 case -ENONET:
388 /* tell the caller to accept the result */
389 afs_put_server(server);
390 _leave(" [local failure]");
391 return 1;
392 }
393
394 /* tell the caller to loop around and try the next server */
395try_next_server_upw:
396 up_write(&volume->server_sem);
397try_next_server:
398 afs_put_server(server);
399 _leave(" [try next server]");
400 return 0;
401}