Linux Audio

Check our new training course

Loading...
v5.9
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * quota.c - CephFS quota
  4 *
  5 * Copyright (C) 2017-2018 SUSE
  6 */
  7
  8#include <linux/statfs.h>
  9
 10#include "super.h"
 11#include "mds_client.h"
 12
 13void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
 14{
 15	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 16	if (inc)
 17		atomic64_inc(&mdsc->quotarealms_count);
 18	else
 19		atomic64_dec(&mdsc->quotarealms_count);
 20}
 21
 22static inline bool ceph_has_realms_with_quotas(struct inode *inode)
 23{
 24	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 25	struct super_block *sb = mdsc->fsc->sb;
 26	struct inode *root = d_inode(sb->s_root);
 27
 28	if (atomic64_read(&mdsc->quotarealms_count) > 0)
 29		return true;
 30	/* if root is the real CephFS root, we don't have quota realms */
 31	if (root && ceph_ino(root) == CEPH_INO_ROOT)
 32		return false;
 
 
 
 33	/* otherwise, we can't know for sure */
 34	return true;
 35}
 36
 37void ceph_handle_quota(struct ceph_mds_client *mdsc,
 38		       struct ceph_mds_session *session,
 39		       struct ceph_msg *msg)
 40{
 41	struct super_block *sb = mdsc->fsc->sb;
 42	struct ceph_mds_quota *h = msg->front.iov_base;
 
 43	struct ceph_vino vino;
 44	struct inode *inode;
 45	struct ceph_inode_info *ci;
 46
 
 
 
 47	if (msg->front.iov_len < sizeof(*h)) {
 48		pr_err("%s corrupt message mds%d len %d\n", __func__,
 49		       session->s_mds, (int)msg->front.iov_len);
 50		ceph_msg_dump(msg);
 51		return;
 52	}
 53
 54	/* increment msg sequence number */
 55	mutex_lock(&session->s_mutex);
 56	session->s_seq++;
 57	mutex_unlock(&session->s_mutex);
 58
 59	/* lookup inode */
 60	vino.ino = le64_to_cpu(h->ino);
 61	vino.snap = CEPH_NOSNAP;
 62	inode = ceph_find_inode(sb, vino);
 63	if (!inode) {
 64		pr_warn("Failed to find inode %llu\n", vino.ino);
 65		return;
 66	}
 67	ci = ceph_inode(inode);
 68
 69	spin_lock(&ci->i_ceph_lock);
 70	ci->i_rbytes = le64_to_cpu(h->rbytes);
 71	ci->i_rfiles = le64_to_cpu(h->rfiles);
 72	ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
 73	__ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
 74		            le64_to_cpu(h->max_files));
 75	spin_unlock(&ci->i_ceph_lock);
 76
 77	/* avoid calling iput_final() in dispatch thread */
 78	ceph_async_iput(inode);
 
 79}
 80
 81static struct ceph_quotarealm_inode *
 82find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
 83{
 84	struct ceph_quotarealm_inode *qri = NULL;
 85	struct rb_node **node, *parent = NULL;
 
 86
 87	mutex_lock(&mdsc->quotarealms_inodes_mutex);
 88	node = &(mdsc->quotarealms_inodes.rb_node);
 89	while (*node) {
 90		parent = *node;
 91		qri = container_of(*node, struct ceph_quotarealm_inode, node);
 92
 93		if (ino < qri->ino)
 94			node = &((*node)->rb_left);
 95		else if (ino > qri->ino)
 96			node = &((*node)->rb_right);
 97		else
 98			break;
 99	}
100	if (!qri || (qri->ino != ino)) {
101		/* Not found, create a new one and insert it */
102		qri = kmalloc(sizeof(*qri), GFP_KERNEL);
103		if (qri) {
104			qri->ino = ino;
105			qri->inode = NULL;
106			qri->timeout = 0;
107			mutex_init(&qri->mutex);
108			rb_link_node(&qri->node, parent, node);
109			rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
110		} else
111			pr_warn("Failed to alloc quotarealms_inode\n");
112	}
113	mutex_unlock(&mdsc->quotarealms_inodes_mutex);
114
115	return qri;
116}
117
118/*
119 * This function will try to lookup a realm inode which isn't visible in the
120 * filesystem mountpoint.  A list of these kind of inodes (not visible) is
121 * maintained in the mdsc and freed only when the filesystem is umounted.
122 *
123 * Note that these inodes are kept in this list even if the lookup fails, which
124 * allows to prevent useless lookup requests.
125 */
126static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
127					     struct super_block *sb,
128					     struct ceph_snap_realm *realm)
129{
 
130	struct ceph_quotarealm_inode *qri;
131	struct inode *in;
132
133	qri = find_quotarealm_inode(mdsc, realm->ino);
134	if (!qri)
135		return NULL;
136
137	mutex_lock(&qri->mutex);
138	if (qri->inode && ceph_is_any_caps(qri->inode)) {
139		/* A request has already returned the inode */
140		mutex_unlock(&qri->mutex);
141		return qri->inode;
142	}
143	/* Check if this inode lookup has failed recently */
144	if (qri->timeout &&
145	    time_before_eq(jiffies, qri->timeout)) {
146		mutex_unlock(&qri->mutex);
147		return NULL;
148	}
149	if (qri->inode) {
150		/* get caps */
151		int ret = __ceph_do_getattr(qri->inode, NULL,
152					    CEPH_STAT_CAP_INODE, true);
153		if (ret >= 0)
154			in = qri->inode;
155		else
156			in = ERR_PTR(ret);
157	}  else {
158		in = ceph_lookup_inode(sb, realm->ino);
159	}
160
161	if (IS_ERR(in)) {
162		dout("Can't lookup inode %llx (err: %ld)\n",
163		     realm->ino, PTR_ERR(in));
164		qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
165	} else {
166		qri->timeout = 0;
167		qri->inode = in;
168	}
169	mutex_unlock(&qri->mutex);
170
171	return in;
172}
173
174void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
175{
176	struct ceph_quotarealm_inode *qri;
177	struct rb_node *node;
178
179	/*
180	 * It should now be safe to clean quotarealms_inode tree without holding
181	 * mdsc->quotarealms_inodes_mutex...
182	 */
183	mutex_lock(&mdsc->quotarealms_inodes_mutex);
184	while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
185		node = rb_first(&mdsc->quotarealms_inodes);
186		qri = rb_entry(node, struct ceph_quotarealm_inode, node);
187		rb_erase(node, &mdsc->quotarealms_inodes);
188		iput(qri->inode);
189		kfree(qri);
190	}
191	mutex_unlock(&mdsc->quotarealms_inodes_mutex);
192}
193
194/*
195 * This function walks through the snaprealm for an inode and returns the
196 * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
197 * or max_bytes).  If the root is reached, return the root ceph_snap_realm
198 * instead.
199 *
200 * Note that the caller is responsible for calling ceph_put_snap_realm() on the
201 * returned realm.
202 *
203 * Callers of this function need to hold mdsc->snap_rwsem.  However, if there's
204 * a need to do an inode lookup, this rwsem will be temporarily dropped.  Hence
205 * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
206 * this function will return -EAGAIN; otherwise, the snaprealms walk-through
207 * will be restarted.
208 */
209static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
210					       struct inode *inode, bool retry)
 
211{
 
212	struct ceph_inode_info *ci = NULL;
213	struct ceph_snap_realm *realm, *next;
214	struct inode *in;
215	bool has_quota;
216
 
 
217	if (ceph_snap(inode) != CEPH_NOSNAP)
218		return NULL;
219
220restart:
221	realm = ceph_inode(inode)->i_snap_realm;
222	if (realm)
223		ceph_get_snap_realm(mdsc, realm);
224	else
225		pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
226				   "null i_snap_realm\n", ceph_vinop(inode));
 
227	while (realm) {
228		bool has_inode;
229
230		spin_lock(&realm->inodes_with_caps_lock);
231		has_inode = realm->inode;
232		in = has_inode ? igrab(realm->inode) : NULL;
233		spin_unlock(&realm->inodes_with_caps_lock);
234		if (has_inode && !in)
235			break;
236		if (!in) {
237			up_read(&mdsc->snap_rwsem);
238			in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
239			down_read(&mdsc->snap_rwsem);
240			if (IS_ERR_OR_NULL(in))
241				break;
242			ceph_put_snap_realm(mdsc, realm);
243			if (!retry)
244				return ERR_PTR(-EAGAIN);
245			goto restart;
246		}
247
248		ci = ceph_inode(in);
249		has_quota = __ceph_has_any_quota(ci);
250		/* avoid calling iput_final() while holding mdsc->snap_rwsem */
251		ceph_async_iput(in);
252
253		next = realm->parent;
254		if (has_quota || !next)
255		       return realm;
 
 
 
256
257		ceph_get_snap_realm(mdsc, next);
258		ceph_put_snap_realm(mdsc, realm);
259		realm = next;
260	}
261	if (realm)
262		ceph_put_snap_realm(mdsc, realm);
263
264	return NULL;
265}
266
267static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
268{
269	struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
270	struct ceph_snap_realm *old_realm, *new_realm;
271	bool is_same;
 
272
273restart:
274	/*
275	 * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
276	 * However, get_quota_realm may drop it temporarily.  By setting the
277	 * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
278	 * dropped and we can then restart the whole operation.
279	 */
280	down_read(&mdsc->snap_rwsem);
281	old_realm = get_quota_realm(mdsc, old, true);
282	new_realm = get_quota_realm(mdsc, new, false);
283	if (PTR_ERR(new_realm) == -EAGAIN) {
284		up_read(&mdsc->snap_rwsem);
285		if (old_realm)
286			ceph_put_snap_realm(mdsc, old_realm);
287		goto restart;
288	}
289	is_same = (old_realm == new_realm);
290	up_read(&mdsc->snap_rwsem);
291
292	if (old_realm)
293		ceph_put_snap_realm(mdsc, old_realm);
294	if (new_realm)
295		ceph_put_snap_realm(mdsc, new_realm);
296
297	return is_same;
298}
299
300enum quota_check_op {
301	QUOTA_CHECK_MAX_FILES_OP,	/* check quota max_files limit */
302	QUOTA_CHECK_MAX_BYTES_OP,	/* check quota max_files limit */
303	QUOTA_CHECK_MAX_BYTES_APPROACHING_OP	/* check if quota max_files
304						   limit is approaching */
305};
306
307/*
308 * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
309 * realm, it will execute quota check operation defined by the 'op' parameter.
310 * The snaprealm walk is interrupted if the quota check detects that the quota
311 * is exceeded or if the root inode is reached.
312 */
313static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
314				 loff_t delta)
315{
316	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 
317	struct ceph_inode_info *ci;
318	struct ceph_snap_realm *realm, *next;
319	struct inode *in;
320	u64 max, rvalue;
321	bool exceeded = false;
322
323	if (ceph_snap(inode) != CEPH_NOSNAP)
324		return false;
325
326	down_read(&mdsc->snap_rwsem);
327restart:
328	realm = ceph_inode(inode)->i_snap_realm;
329	if (realm)
330		ceph_get_snap_realm(mdsc, realm);
331	else
332		pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
333				   "null i_snap_realm\n", ceph_vinop(inode));
 
334	while (realm) {
335		bool has_inode;
336
337		spin_lock(&realm->inodes_with_caps_lock);
338		has_inode = realm->inode;
339		in = has_inode ? igrab(realm->inode) : NULL;
340		spin_unlock(&realm->inodes_with_caps_lock);
341		if (has_inode && !in)
342			break;
343		if (!in) {
344			up_read(&mdsc->snap_rwsem);
345			in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
346			down_read(&mdsc->snap_rwsem);
347			if (IS_ERR_OR_NULL(in))
348				break;
349			ceph_put_snap_realm(mdsc, realm);
350			goto restart;
351		}
352		ci = ceph_inode(in);
353		spin_lock(&ci->i_ceph_lock);
354		if (op == QUOTA_CHECK_MAX_FILES_OP) {
355			max = ci->i_max_files;
356			rvalue = ci->i_rfiles + ci->i_rsubdirs;
357		} else {
358			max = ci->i_max_bytes;
359			rvalue = ci->i_rbytes;
360		}
361		spin_unlock(&ci->i_ceph_lock);
362		switch (op) {
363		case QUOTA_CHECK_MAX_FILES_OP:
364		case QUOTA_CHECK_MAX_BYTES_OP:
365			exceeded = (max && (rvalue + delta > max));
366			break;
367		case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
368			if (max) {
369				if (rvalue >= max)
370					exceeded = true;
371				else {
372					/*
373					 * when we're writing more that 1/16th
374					 * of the available space
375					 */
376					exceeded =
377						(((max - rvalue) >> 4) < delta);
378				}
379			}
380			break;
381		default:
382			/* Shouldn't happen */
383			pr_warn("Invalid quota check op (%d)\n", op);
384			exceeded = true; /* Just break the loop */
385		}
386		/* avoid calling iput_final() while holding mdsc->snap_rwsem */
387		ceph_async_iput(in);
388
389		next = realm->parent;
390		if (exceeded || !next)
391			break;
392		ceph_get_snap_realm(mdsc, next);
393		ceph_put_snap_realm(mdsc, realm);
394		realm = next;
395	}
396	if (realm)
397		ceph_put_snap_realm(mdsc, realm);
398	up_read(&mdsc->snap_rwsem);
399
400	return exceeded;
401}
402
403/*
404 * ceph_quota_is_max_files_exceeded - check if we can create a new file
405 * @inode:	directory where a new file is being created
406 *
407 * This functions returns true is max_files quota allows a new file to be
408 * created.  It is necessary to walk through the snaprealm hierarchy (until the
409 * FS root) to check all realms with quotas set.
410 */
411bool ceph_quota_is_max_files_exceeded(struct inode *inode)
412{
413	if (!ceph_has_realms_with_quotas(inode))
414		return false;
415
416	WARN_ON(!S_ISDIR(inode->i_mode));
417
418	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 1);
419}
420
421/*
422 * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
423 * @inode:	inode being written
424 * @newsize:	new size if write succeeds
425 *
426 * This functions returns true is max_bytes quota allows a file size to reach
427 * @newsize; it returns false otherwise.
428 */
429bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
430{
431	loff_t size = i_size_read(inode);
432
433	if (!ceph_has_realms_with_quotas(inode))
434		return false;
435
436	/* return immediately if we're decreasing file size */
437	if (newsize <= size)
438		return false;
439
440	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
441}
442
443/*
444 * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
445 * @inode:	inode being written
446 * @newsize:	new size if write succeeds
447 *
448 * This function returns true if the new file size @newsize will be consuming
449 * more than 1/16th of the available quota space; it returns false otherwise.
450 */
451bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
452{
453	loff_t size = ceph_inode(inode)->i_reported_size;
454
455	if (!ceph_has_realms_with_quotas(inode))
456		return false;
457
458	/* return immediately if we're decreasing file size */
459	if (newsize <= size)
460		return false;
461
462	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
463				    (newsize - size));
464}
465
466/*
467 * ceph_quota_update_statfs - if root has quota update statfs with quota status
468 * @fsc:	filesystem client instance
469 * @buf:	statfs to update
470 *
471 * If the mounted filesystem root has max_bytes quota set, update the filesystem
472 * statistics with the quota status.
473 *
474 * This function returns true if the stats have been updated, false otherwise.
475 */
476bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
477{
478	struct ceph_mds_client *mdsc = fsc->mdsc;
479	struct ceph_inode_info *ci;
480	struct ceph_snap_realm *realm;
481	struct inode *in;
482	u64 total = 0, used, free;
483	bool is_updated = false;
484
485	down_read(&mdsc->snap_rwsem);
486	realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
 
487	up_read(&mdsc->snap_rwsem);
488	if (!realm)
489		return false;
490
491	spin_lock(&realm->inodes_with_caps_lock);
492	in = realm->inode ? igrab(realm->inode) : NULL;
493	spin_unlock(&realm->inodes_with_caps_lock);
494	if (in) {
495		ci = ceph_inode(in);
496		spin_lock(&ci->i_ceph_lock);
497		if (ci->i_max_bytes) {
498			total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
499			used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
 
 
 
 
 
 
500			/* It is possible for a quota to be exceeded.
501			 * Report 'zero' in that case
502			 */
503			free = total > used ? total - used : 0;
 
 
 
 
 
 
 
 
504		}
505		spin_unlock(&ci->i_ceph_lock);
506		if (total) {
507			buf->f_blocks = total;
508			buf->f_bfree = free;
509			buf->f_bavail = free;
510			is_updated = true;
511		}
512		iput(in);
513	}
514	ceph_put_snap_realm(mdsc, realm);
515
516	return is_updated;
517}
518
519/*
520 * ceph_quota_check_rename - check if a rename can be executed
521 * @mdsc:	MDS client instance
522 * @old:	inode to be copied
523 * @new:	destination inode (directory)
524 *
525 * This function verifies if a rename (e.g. moving a file or directory) can be
526 * executed.  It forces an rstat update in the @new target directory (and in the
527 * source @old as well, if it's a directory).  The actual check is done both for
528 * max_files and max_bytes.
529 *
530 * This function returns 0 if it's OK to do the rename, or, if quotas are
531 * exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
532 */
533int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
534			    struct inode *old, struct inode *new)
535{
536	struct ceph_inode_info *ci_old = ceph_inode(old);
537	int ret = 0;
538
539	if (ceph_quota_is_same_realm(old, new))
540		return 0;
541
542	/*
543	 * Get the latest rstat for target directory (and for source, if a
544	 * directory)
545	 */
546	ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
547	if (ret)
548		return ret;
549
550	if (S_ISDIR(old->i_mode)) {
551		ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
552		if (ret)
553			return ret;
554		ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
555					   ci_old->i_rbytes);
556		if (!ret)
557			ret = check_quota_exceeded(new,
558						   QUOTA_CHECK_MAX_FILES_OP,
559						   ci_old->i_rfiles +
560						   ci_old->i_rsubdirs);
561		if (ret)
562			ret = -EXDEV;
563	} else {
564		ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
565					   i_size_read(old));
566		if (!ret)
567			ret = check_quota_exceeded(new,
568						   QUOTA_CHECK_MAX_FILES_OP, 1);
569		if (ret)
570			ret = -EDQUOT;
571	}
572
573	return ret;
574}
v6.9.4
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * quota.c - CephFS quota
  4 *
  5 * Copyright (C) 2017-2018 SUSE
  6 */
  7
  8#include <linux/statfs.h>
  9
 10#include "super.h"
 11#include "mds_client.h"
 12
 13void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
 14{
 15	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 16	if (inc)
 17		atomic64_inc(&mdsc->quotarealms_count);
 18	else
 19		atomic64_dec(&mdsc->quotarealms_count);
 20}
 21
 22static inline bool ceph_has_realms_with_quotas(struct inode *inode)
 23{
 24	struct super_block *sb = inode->i_sb;
 25	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
 26	struct inode *root = d_inode(sb->s_root);
 27
 28	if (atomic64_read(&mdsc->quotarealms_count) > 0)
 29		return true;
 30	/* if root is the real CephFS root, we don't have quota realms */
 31	if (root && ceph_ino(root) == CEPH_INO_ROOT)
 32		return false;
 33	/* MDS stray dirs have no quota realms */
 34	if (ceph_vino_is_reserved(ceph_inode(inode)->i_vino))
 35		return false;
 36	/* otherwise, we can't know for sure */
 37	return true;
 38}
 39
 40void ceph_handle_quota(struct ceph_mds_client *mdsc,
 41		       struct ceph_mds_session *session,
 42		       struct ceph_msg *msg)
 43{
 44	struct super_block *sb = mdsc->fsc->sb;
 45	struct ceph_mds_quota *h = msg->front.iov_base;
 46	struct ceph_client *cl = mdsc->fsc->client;
 47	struct ceph_vino vino;
 48	struct inode *inode;
 49	struct ceph_inode_info *ci;
 50
 51	if (!ceph_inc_mds_stopping_blocker(mdsc, session))
 52		return;
 53
 54	if (msg->front.iov_len < sizeof(*h)) {
 55		pr_err_client(cl, "corrupt message mds%d len %d\n",
 56			      session->s_mds, (int)msg->front.iov_len);
 57		ceph_msg_dump(msg);
 58		goto out;
 59	}
 60
 
 
 
 
 
 61	/* lookup inode */
 62	vino.ino = le64_to_cpu(h->ino);
 63	vino.snap = CEPH_NOSNAP;
 64	inode = ceph_find_inode(sb, vino);
 65	if (!inode) {
 66		pr_warn_client(cl, "failed to find inode %llx\n", vino.ino);
 67		goto out;
 68	}
 69	ci = ceph_inode(inode);
 70
 71	spin_lock(&ci->i_ceph_lock);
 72	ci->i_rbytes = le64_to_cpu(h->rbytes);
 73	ci->i_rfiles = le64_to_cpu(h->rfiles);
 74	ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
 75	__ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
 76		            le64_to_cpu(h->max_files));
 77	spin_unlock(&ci->i_ceph_lock);
 78
 79	iput(inode);
 80out:
 81	ceph_dec_mds_stopping_blocker(mdsc);
 82}
 83
 84static struct ceph_quotarealm_inode *
 85find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
 86{
 87	struct ceph_quotarealm_inode *qri = NULL;
 88	struct rb_node **node, *parent = NULL;
 89	struct ceph_client *cl = mdsc->fsc->client;
 90
 91	mutex_lock(&mdsc->quotarealms_inodes_mutex);
 92	node = &(mdsc->quotarealms_inodes.rb_node);
 93	while (*node) {
 94		parent = *node;
 95		qri = container_of(*node, struct ceph_quotarealm_inode, node);
 96
 97		if (ino < qri->ino)
 98			node = &((*node)->rb_left);
 99		else if (ino > qri->ino)
100			node = &((*node)->rb_right);
101		else
102			break;
103	}
104	if (!qri || (qri->ino != ino)) {
105		/* Not found, create a new one and insert it */
106		qri = kmalloc(sizeof(*qri), GFP_KERNEL);
107		if (qri) {
108			qri->ino = ino;
109			qri->inode = NULL;
110			qri->timeout = 0;
111			mutex_init(&qri->mutex);
112			rb_link_node(&qri->node, parent, node);
113			rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
114		} else
115			pr_warn_client(cl, "Failed to alloc quotarealms_inode\n");
116	}
117	mutex_unlock(&mdsc->quotarealms_inodes_mutex);
118
119	return qri;
120}
121
122/*
123 * This function will try to lookup a realm inode which isn't visible in the
124 * filesystem mountpoint.  A list of these kind of inodes (not visible) is
125 * maintained in the mdsc and freed only when the filesystem is umounted.
126 *
127 * Note that these inodes are kept in this list even if the lookup fails, which
128 * allows to prevent useless lookup requests.
129 */
130static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
131					     struct super_block *sb,
132					     struct ceph_snap_realm *realm)
133{
134	struct ceph_client *cl = mdsc->fsc->client;
135	struct ceph_quotarealm_inode *qri;
136	struct inode *in;
137
138	qri = find_quotarealm_inode(mdsc, realm->ino);
139	if (!qri)
140		return NULL;
141
142	mutex_lock(&qri->mutex);
143	if (qri->inode && ceph_is_any_caps(qri->inode)) {
144		/* A request has already returned the inode */
145		mutex_unlock(&qri->mutex);
146		return qri->inode;
147	}
148	/* Check if this inode lookup has failed recently */
149	if (qri->timeout &&
150	    time_before_eq(jiffies, qri->timeout)) {
151		mutex_unlock(&qri->mutex);
152		return NULL;
153	}
154	if (qri->inode) {
155		/* get caps */
156		int ret = __ceph_do_getattr(qri->inode, NULL,
157					    CEPH_STAT_CAP_INODE, true);
158		if (ret >= 0)
159			in = qri->inode;
160		else
161			in = ERR_PTR(ret);
162	}  else {
163		in = ceph_lookup_inode(sb, realm->ino);
164	}
165
166	if (IS_ERR(in)) {
167		doutc(cl, "Can't lookup inode %llx (err: %ld)\n", realm->ino,
168		      PTR_ERR(in));
169		qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
170	} else {
171		qri->timeout = 0;
172		qri->inode = in;
173	}
174	mutex_unlock(&qri->mutex);
175
176	return in;
177}
178
179void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
180{
181	struct ceph_quotarealm_inode *qri;
182	struct rb_node *node;
183
184	/*
185	 * It should now be safe to clean quotarealms_inode tree without holding
186	 * mdsc->quotarealms_inodes_mutex...
187	 */
188	mutex_lock(&mdsc->quotarealms_inodes_mutex);
189	while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
190		node = rb_first(&mdsc->quotarealms_inodes);
191		qri = rb_entry(node, struct ceph_quotarealm_inode, node);
192		rb_erase(node, &mdsc->quotarealms_inodes);
193		iput(qri->inode);
194		kfree(qri);
195	}
196	mutex_unlock(&mdsc->quotarealms_inodes_mutex);
197}
198
199/*
200 * This function walks through the snaprealm for an inode and set the
201 * realmp with the first snaprealm that has quotas set (max_files,
202 * max_bytes, or any, depending on the 'which_quota' argument).  If the root is
203 * reached, set the realmp with the root ceph_snap_realm instead.
204 *
205 * Note that the caller is responsible for calling ceph_put_snap_realm() on the
206 * returned realm.
207 *
208 * Callers of this function need to hold mdsc->snap_rwsem.  However, if there's
209 * a need to do an inode lookup, this rwsem will be temporarily dropped.  Hence
210 * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
211 * this function will return -EAGAIN; otherwise, the snaprealms walk-through
212 * will be restarted.
213 */
214static int get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode,
215			   enum quota_get_realm which_quota,
216			   struct ceph_snap_realm **realmp, bool retry)
217{
218	struct ceph_client *cl = mdsc->fsc->client;
219	struct ceph_inode_info *ci = NULL;
220	struct ceph_snap_realm *realm, *next;
221	struct inode *in;
222	bool has_quota;
223
224	if (realmp)
225		*realmp = NULL;
226	if (ceph_snap(inode) != CEPH_NOSNAP)
227		return 0;
228
229restart:
230	realm = ceph_inode(inode)->i_snap_realm;
231	if (realm)
232		ceph_get_snap_realm(mdsc, realm);
233	else
234		pr_err_ratelimited_client(cl,
235				"%p %llx.%llx null i_snap_realm\n",
236				inode, ceph_vinop(inode));
237	while (realm) {
238		bool has_inode;
239
240		spin_lock(&realm->inodes_with_caps_lock);
241		has_inode = realm->inode;
242		in = has_inode ? igrab(realm->inode) : NULL;
243		spin_unlock(&realm->inodes_with_caps_lock);
244		if (has_inode && !in)
245			break;
246		if (!in) {
247			up_read(&mdsc->snap_rwsem);
248			in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
249			down_read(&mdsc->snap_rwsem);
250			if (IS_ERR_OR_NULL(in))
251				break;
252			ceph_put_snap_realm(mdsc, realm);
253			if (!retry)
254				return -EAGAIN;
255			goto restart;
256		}
257
258		ci = ceph_inode(in);
259		has_quota = __ceph_has_quota(ci, which_quota);
260		iput(in);
 
261
262		next = realm->parent;
263		if (has_quota || !next) {
264			if (realmp)
265				*realmp = realm;
266			return 0;
267		}
268
269		ceph_get_snap_realm(mdsc, next);
270		ceph_put_snap_realm(mdsc, realm);
271		realm = next;
272	}
273	if (realm)
274		ceph_put_snap_realm(mdsc, realm);
275
276	return 0;
277}
278
279bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
280{
281	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
282	struct ceph_snap_realm *old_realm, *new_realm;
283	bool is_same;
284	int ret;
285
286restart:
287	/*
288	 * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
289	 * However, get_quota_realm may drop it temporarily.  By setting the
290	 * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
291	 * dropped and we can then restart the whole operation.
292	 */
293	down_read(&mdsc->snap_rwsem);
294	get_quota_realm(mdsc, old, QUOTA_GET_ANY, &old_realm, true);
295	ret = get_quota_realm(mdsc, new, QUOTA_GET_ANY, &new_realm, false);
296	if (ret == -EAGAIN) {
297		up_read(&mdsc->snap_rwsem);
298		if (old_realm)
299			ceph_put_snap_realm(mdsc, old_realm);
300		goto restart;
301	}
302	is_same = (old_realm == new_realm);
303	up_read(&mdsc->snap_rwsem);
304
305	if (old_realm)
306		ceph_put_snap_realm(mdsc, old_realm);
307	if (new_realm)
308		ceph_put_snap_realm(mdsc, new_realm);
309
310	return is_same;
311}
312
313enum quota_check_op {
314	QUOTA_CHECK_MAX_FILES_OP,	/* check quota max_files limit */
315	QUOTA_CHECK_MAX_BYTES_OP,	/* check quota max_files limit */
316	QUOTA_CHECK_MAX_BYTES_APPROACHING_OP	/* check if quota max_files
317						   limit is approaching */
318};
319
320/*
321 * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
322 * realm, it will execute quota check operation defined by the 'op' parameter.
323 * The snaprealm walk is interrupted if the quota check detects that the quota
324 * is exceeded or if the root inode is reached.
325 */
326static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
327				 loff_t delta)
328{
329	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
330	struct ceph_client *cl = mdsc->fsc->client;
331	struct ceph_inode_info *ci;
332	struct ceph_snap_realm *realm, *next;
333	struct inode *in;
334	u64 max, rvalue;
335	bool exceeded = false;
336
337	if (ceph_snap(inode) != CEPH_NOSNAP)
338		return false;
339
340	down_read(&mdsc->snap_rwsem);
341restart:
342	realm = ceph_inode(inode)->i_snap_realm;
343	if (realm)
344		ceph_get_snap_realm(mdsc, realm);
345	else
346		pr_err_ratelimited_client(cl,
347				"%p %llx.%llx null i_snap_realm\n",
348				inode, ceph_vinop(inode));
349	while (realm) {
350		bool has_inode;
351
352		spin_lock(&realm->inodes_with_caps_lock);
353		has_inode = realm->inode;
354		in = has_inode ? igrab(realm->inode) : NULL;
355		spin_unlock(&realm->inodes_with_caps_lock);
356		if (has_inode && !in)
357			break;
358		if (!in) {
359			up_read(&mdsc->snap_rwsem);
360			in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
361			down_read(&mdsc->snap_rwsem);
362			if (IS_ERR_OR_NULL(in))
363				break;
364			ceph_put_snap_realm(mdsc, realm);
365			goto restart;
366		}
367		ci = ceph_inode(in);
368		spin_lock(&ci->i_ceph_lock);
369		if (op == QUOTA_CHECK_MAX_FILES_OP) {
370			max = ci->i_max_files;
371			rvalue = ci->i_rfiles + ci->i_rsubdirs;
372		} else {
373			max = ci->i_max_bytes;
374			rvalue = ci->i_rbytes;
375		}
376		spin_unlock(&ci->i_ceph_lock);
377		switch (op) {
378		case QUOTA_CHECK_MAX_FILES_OP:
379		case QUOTA_CHECK_MAX_BYTES_OP:
380			exceeded = (max && (rvalue + delta > max));
381			break;
382		case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
383			if (max) {
384				if (rvalue >= max)
385					exceeded = true;
386				else {
387					/*
388					 * when we're writing more that 1/16th
389					 * of the available space
390					 */
391					exceeded =
392						(((max - rvalue) >> 4) < delta);
393				}
394			}
395			break;
396		default:
397			/* Shouldn't happen */
398			pr_warn_client(cl, "Invalid quota check op (%d)\n", op);
399			exceeded = true; /* Just break the loop */
400		}
401		iput(in);
 
402
403		next = realm->parent;
404		if (exceeded || !next)
405			break;
406		ceph_get_snap_realm(mdsc, next);
407		ceph_put_snap_realm(mdsc, realm);
408		realm = next;
409	}
410	if (realm)
411		ceph_put_snap_realm(mdsc, realm);
412	up_read(&mdsc->snap_rwsem);
413
414	return exceeded;
415}
416
417/*
418 * ceph_quota_is_max_files_exceeded - check if we can create a new file
419 * @inode:	directory where a new file is being created
420 *
421 * This functions returns true is max_files quota allows a new file to be
422 * created.  It is necessary to walk through the snaprealm hierarchy (until the
423 * FS root) to check all realms with quotas set.
424 */
425bool ceph_quota_is_max_files_exceeded(struct inode *inode)
426{
427	if (!ceph_has_realms_with_quotas(inode))
428		return false;
429
430	WARN_ON(!S_ISDIR(inode->i_mode));
431
432	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 1);
433}
434
435/*
436 * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
437 * @inode:	inode being written
438 * @newsize:	new size if write succeeds
439 *
440 * This functions returns true is max_bytes quota allows a file size to reach
441 * @newsize; it returns false otherwise.
442 */
443bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
444{
445	loff_t size = i_size_read(inode);
446
447	if (!ceph_has_realms_with_quotas(inode))
448		return false;
449
450	/* return immediately if we're decreasing file size */
451	if (newsize <= size)
452		return false;
453
454	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
455}
456
457/*
458 * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
459 * @inode:	inode being written
460 * @newsize:	new size if write succeeds
461 *
462 * This function returns true if the new file size @newsize will be consuming
463 * more than 1/16th of the available quota space; it returns false otherwise.
464 */
465bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
466{
467	loff_t size = ceph_inode(inode)->i_reported_size;
468
469	if (!ceph_has_realms_with_quotas(inode))
470		return false;
471
472	/* return immediately if we're decreasing file size */
473	if (newsize <= size)
474		return false;
475
476	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
477				    (newsize - size));
478}
479
480/*
481 * ceph_quota_update_statfs - if root has quota update statfs with quota status
482 * @fsc:	filesystem client instance
483 * @buf:	statfs to update
484 *
485 * If the mounted filesystem root has max_bytes quota set, update the filesystem
486 * statistics with the quota status.
487 *
488 * This function returns true if the stats have been updated, false otherwise.
489 */
490bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
491{
492	struct ceph_mds_client *mdsc = fsc->mdsc;
493	struct ceph_inode_info *ci;
494	struct ceph_snap_realm *realm;
495	struct inode *in;
496	u64 total = 0, used, free;
497	bool is_updated = false;
498
499	down_read(&mdsc->snap_rwsem);
500	get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES,
501			&realm, true);
502	up_read(&mdsc->snap_rwsem);
503	if (!realm)
504		return false;
505
506	spin_lock(&realm->inodes_with_caps_lock);
507	in = realm->inode ? igrab(realm->inode) : NULL;
508	spin_unlock(&realm->inodes_with_caps_lock);
509	if (in) {
510		ci = ceph_inode(in);
511		spin_lock(&ci->i_ceph_lock);
512		if (ci->i_max_bytes) {
513			total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
514			used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
515			/* For quota size less than 4MB, use 4KB block size */
516			if (!total) {
517				total = ci->i_max_bytes >> CEPH_4K_BLOCK_SHIFT;
518				used = ci->i_rbytes >> CEPH_4K_BLOCK_SHIFT;
519	                        buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
520			}
521			/* It is possible for a quota to be exceeded.
522			 * Report 'zero' in that case
523			 */
524			free = total > used ? total - used : 0;
525			/* For quota size less than 4KB, report the
526			 * total=used=4KB,free=0 when quota is full
527			 * and total=free=4KB, used=0 otherwise */
528			if (!total) {
529				total = 1;
530				free = ci->i_max_bytes > ci->i_rbytes ? 1 : 0;
531	                        buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
532			}
533		}
534		spin_unlock(&ci->i_ceph_lock);
535		if (total) {
536			buf->f_blocks = total;
537			buf->f_bfree = free;
538			buf->f_bavail = free;
539			is_updated = true;
540		}
541		iput(in);
542	}
543	ceph_put_snap_realm(mdsc, realm);
544
545	return is_updated;
546}
547