Linux Audio

Check our new training course

Loading...
v5.9
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * quota.c - CephFS quota
  4 *
  5 * Copyright (C) 2017-2018 SUSE
  6 */
  7
  8#include <linux/statfs.h>
  9
 10#include "super.h"
 11#include "mds_client.h"
 12
 13void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
 14{
 15	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 16	if (inc)
 17		atomic64_inc(&mdsc->quotarealms_count);
 18	else
 19		atomic64_dec(&mdsc->quotarealms_count);
 20}
 21
 22static inline bool ceph_has_realms_with_quotas(struct inode *inode)
 23{
 24	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 25	struct super_block *sb = mdsc->fsc->sb;
 26	struct inode *root = d_inode(sb->s_root);
 27
 28	if (atomic64_read(&mdsc->quotarealms_count) > 0)
 29		return true;
 30	/* if root is the real CephFS root, we don't have quota realms */
 31	if (root && ceph_ino(root) == CEPH_INO_ROOT)
 
 32		return false;
 33	/* otherwise, we can't know for sure */
 34	return true;
 35}
 36
 37void ceph_handle_quota(struct ceph_mds_client *mdsc,
 38		       struct ceph_mds_session *session,
 39		       struct ceph_msg *msg)
 40{
 41	struct super_block *sb = mdsc->fsc->sb;
 42	struct ceph_mds_quota *h = msg->front.iov_base;
 43	struct ceph_vino vino;
 44	struct inode *inode;
 45	struct ceph_inode_info *ci;
 46
 47	if (msg->front.iov_len < sizeof(*h)) {
 48		pr_err("%s corrupt message mds%d len %d\n", __func__,
 49		       session->s_mds, (int)msg->front.iov_len);
 50		ceph_msg_dump(msg);
 51		return;
 52	}
 53
 54	/* increment msg sequence number */
 55	mutex_lock(&session->s_mutex);
 56	session->s_seq++;
 57	mutex_unlock(&session->s_mutex);
 58
 59	/* lookup inode */
 60	vino.ino = le64_to_cpu(h->ino);
 61	vino.snap = CEPH_NOSNAP;
 62	inode = ceph_find_inode(sb, vino);
 63	if (!inode) {
 64		pr_warn("Failed to find inode %llu\n", vino.ino);
 65		return;
 66	}
 67	ci = ceph_inode(inode);
 68
 69	spin_lock(&ci->i_ceph_lock);
 70	ci->i_rbytes = le64_to_cpu(h->rbytes);
 71	ci->i_rfiles = le64_to_cpu(h->rfiles);
 72	ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
 73	__ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
 74		            le64_to_cpu(h->max_files));
 75	spin_unlock(&ci->i_ceph_lock);
 76
 77	/* avoid calling iput_final() in dispatch thread */
 78	ceph_async_iput(inode);
 79}
 80
 81static struct ceph_quotarealm_inode *
 82find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
 83{
 84	struct ceph_quotarealm_inode *qri = NULL;
 85	struct rb_node **node, *parent = NULL;
 86
 87	mutex_lock(&mdsc->quotarealms_inodes_mutex);
 88	node = &(mdsc->quotarealms_inodes.rb_node);
 89	while (*node) {
 90		parent = *node;
 91		qri = container_of(*node, struct ceph_quotarealm_inode, node);
 92
 93		if (ino < qri->ino)
 94			node = &((*node)->rb_left);
 95		else if (ino > qri->ino)
 96			node = &((*node)->rb_right);
 97		else
 98			break;
 99	}
100	if (!qri || (qri->ino != ino)) {
101		/* Not found, create a new one and insert it */
102		qri = kmalloc(sizeof(*qri), GFP_KERNEL);
103		if (qri) {
104			qri->ino = ino;
105			qri->inode = NULL;
106			qri->timeout = 0;
107			mutex_init(&qri->mutex);
108			rb_link_node(&qri->node, parent, node);
109			rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
110		} else
111			pr_warn("Failed to alloc quotarealms_inode\n");
112	}
113	mutex_unlock(&mdsc->quotarealms_inodes_mutex);
114
115	return qri;
116}
117
118/*
119 * This function will try to lookup a realm inode which isn't visible in the
120 * filesystem mountpoint.  A list of these kind of inodes (not visible) is
121 * maintained in the mdsc and freed only when the filesystem is umounted.
122 *
123 * Note that these inodes are kept in this list even if the lookup fails, which
124 * allows to prevent useless lookup requests.
125 */
126static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
127					     struct super_block *sb,
128					     struct ceph_snap_realm *realm)
129{
130	struct ceph_quotarealm_inode *qri;
131	struct inode *in;
132
133	qri = find_quotarealm_inode(mdsc, realm->ino);
134	if (!qri)
135		return NULL;
136
137	mutex_lock(&qri->mutex);
138	if (qri->inode && ceph_is_any_caps(qri->inode)) {
139		/* A request has already returned the inode */
140		mutex_unlock(&qri->mutex);
141		return qri->inode;
142	}
143	/* Check if this inode lookup has failed recently */
144	if (qri->timeout &&
145	    time_before_eq(jiffies, qri->timeout)) {
146		mutex_unlock(&qri->mutex);
147		return NULL;
148	}
149	if (qri->inode) {
150		/* get caps */
151		int ret = __ceph_do_getattr(qri->inode, NULL,
152					    CEPH_STAT_CAP_INODE, true);
153		if (ret >= 0)
154			in = qri->inode;
155		else
156			in = ERR_PTR(ret);
157	}  else {
158		in = ceph_lookup_inode(sb, realm->ino);
159	}
160
161	if (IS_ERR(in)) {
162		dout("Can't lookup inode %llx (err: %ld)\n",
163		     realm->ino, PTR_ERR(in));
164		qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
165	} else {
166		qri->timeout = 0;
167		qri->inode = in;
168	}
169	mutex_unlock(&qri->mutex);
170
171	return in;
172}
173
174void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
175{
176	struct ceph_quotarealm_inode *qri;
177	struct rb_node *node;
178
179	/*
180	 * It should now be safe to clean quotarealms_inode tree without holding
181	 * mdsc->quotarealms_inodes_mutex...
182	 */
183	mutex_lock(&mdsc->quotarealms_inodes_mutex);
184	while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
185		node = rb_first(&mdsc->quotarealms_inodes);
186		qri = rb_entry(node, struct ceph_quotarealm_inode, node);
187		rb_erase(node, &mdsc->quotarealms_inodes);
188		iput(qri->inode);
189		kfree(qri);
190	}
191	mutex_unlock(&mdsc->quotarealms_inodes_mutex);
192}
193
194/*
195 * This function walks through the snaprealm for an inode and returns the
196 * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
197 * or max_bytes).  If the root is reached, return the root ceph_snap_realm
198 * instead.
199 *
200 * Note that the caller is responsible for calling ceph_put_snap_realm() on the
201 * returned realm.
202 *
203 * Callers of this function need to hold mdsc->snap_rwsem.  However, if there's
204 * a need to do an inode lookup, this rwsem will be temporarily dropped.  Hence
205 * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
206 * this function will return -EAGAIN; otherwise, the snaprealms walk-through
207 * will be restarted.
208 */
209static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
210					       struct inode *inode, bool retry)
211{
212	struct ceph_inode_info *ci = NULL;
213	struct ceph_snap_realm *realm, *next;
214	struct inode *in;
215	bool has_quota;
216
217	if (ceph_snap(inode) != CEPH_NOSNAP)
218		return NULL;
219
220restart:
221	realm = ceph_inode(inode)->i_snap_realm;
222	if (realm)
223		ceph_get_snap_realm(mdsc, realm);
224	else
225		pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
226				   "null i_snap_realm\n", ceph_vinop(inode));
227	while (realm) {
228		bool has_inode;
229
230		spin_lock(&realm->inodes_with_caps_lock);
231		has_inode = realm->inode;
232		in = has_inode ? igrab(realm->inode) : NULL;
233		spin_unlock(&realm->inodes_with_caps_lock);
234		if (has_inode && !in)
235			break;
236		if (!in) {
237			up_read(&mdsc->snap_rwsem);
238			in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
239			down_read(&mdsc->snap_rwsem);
240			if (IS_ERR_OR_NULL(in))
241				break;
242			ceph_put_snap_realm(mdsc, realm);
243			if (!retry)
244				return ERR_PTR(-EAGAIN);
245			goto restart;
246		}
247
248		ci = ceph_inode(in);
249		has_quota = __ceph_has_any_quota(ci);
250		/* avoid calling iput_final() while holding mdsc->snap_rwsem */
251		ceph_async_iput(in);
252
253		next = realm->parent;
254		if (has_quota || !next)
255		       return realm;
256
257		ceph_get_snap_realm(mdsc, next);
258		ceph_put_snap_realm(mdsc, realm);
259		realm = next;
260	}
261	if (realm)
262		ceph_put_snap_realm(mdsc, realm);
263
264	return NULL;
265}
266
267static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
268{
269	struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
270	struct ceph_snap_realm *old_realm, *new_realm;
271	bool is_same;
272
273restart:
274	/*
275	 * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
276	 * However, get_quota_realm may drop it temporarily.  By setting the
277	 * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
278	 * dropped and we can then restart the whole operation.
279	 */
280	down_read(&mdsc->snap_rwsem);
281	old_realm = get_quota_realm(mdsc, old, true);
282	new_realm = get_quota_realm(mdsc, new, false);
283	if (PTR_ERR(new_realm) == -EAGAIN) {
284		up_read(&mdsc->snap_rwsem);
285		if (old_realm)
286			ceph_put_snap_realm(mdsc, old_realm);
287		goto restart;
288	}
289	is_same = (old_realm == new_realm);
290	up_read(&mdsc->snap_rwsem);
291
292	if (old_realm)
293		ceph_put_snap_realm(mdsc, old_realm);
294	if (new_realm)
295		ceph_put_snap_realm(mdsc, new_realm);
296
297	return is_same;
298}
299
300enum quota_check_op {
301	QUOTA_CHECK_MAX_FILES_OP,	/* check quota max_files limit */
302	QUOTA_CHECK_MAX_BYTES_OP,	/* check quota max_files limit */
303	QUOTA_CHECK_MAX_BYTES_APPROACHING_OP	/* check if quota max_files
304						   limit is approaching */
305};
306
307/*
308 * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
309 * realm, it will execute quota check operation defined by the 'op' parameter.
310 * The snaprealm walk is interrupted if the quota check detects that the quota
311 * is exceeded or if the root inode is reached.
312 */
313static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
314				 loff_t delta)
315{
316	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
317	struct ceph_inode_info *ci;
318	struct ceph_snap_realm *realm, *next;
319	struct inode *in;
320	u64 max, rvalue;
321	bool exceeded = false;
322
323	if (ceph_snap(inode) != CEPH_NOSNAP)
324		return false;
325
326	down_read(&mdsc->snap_rwsem);
327restart:
328	realm = ceph_inode(inode)->i_snap_realm;
329	if (realm)
330		ceph_get_snap_realm(mdsc, realm);
331	else
332		pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
333				   "null i_snap_realm\n", ceph_vinop(inode));
334	while (realm) {
335		bool has_inode;
336
337		spin_lock(&realm->inodes_with_caps_lock);
338		has_inode = realm->inode;
339		in = has_inode ? igrab(realm->inode) : NULL;
340		spin_unlock(&realm->inodes_with_caps_lock);
341		if (has_inode && !in)
342			break;
343		if (!in) {
344			up_read(&mdsc->snap_rwsem);
345			in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
346			down_read(&mdsc->snap_rwsem);
347			if (IS_ERR_OR_NULL(in))
348				break;
349			ceph_put_snap_realm(mdsc, realm);
350			goto restart;
351		}
352		ci = ceph_inode(in);
353		spin_lock(&ci->i_ceph_lock);
354		if (op == QUOTA_CHECK_MAX_FILES_OP) {
355			max = ci->i_max_files;
356			rvalue = ci->i_rfiles + ci->i_rsubdirs;
357		} else {
358			max = ci->i_max_bytes;
359			rvalue = ci->i_rbytes;
360		}
361		spin_unlock(&ci->i_ceph_lock);
362		switch (op) {
363		case QUOTA_CHECK_MAX_FILES_OP:
 
 
364		case QUOTA_CHECK_MAX_BYTES_OP:
365			exceeded = (max && (rvalue + delta > max));
366			break;
367		case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
368			if (max) {
369				if (rvalue >= max)
370					exceeded = true;
371				else {
372					/*
373					 * when we're writing more that 1/16th
374					 * of the available space
375					 */
376					exceeded =
377						(((max - rvalue) >> 4) < delta);
378				}
379			}
380			break;
381		default:
382			/* Shouldn't happen */
383			pr_warn("Invalid quota check op (%d)\n", op);
384			exceeded = true; /* Just break the loop */
385		}
386		/* avoid calling iput_final() while holding mdsc->snap_rwsem */
387		ceph_async_iput(in);
388
389		next = realm->parent;
390		if (exceeded || !next)
391			break;
392		ceph_get_snap_realm(mdsc, next);
393		ceph_put_snap_realm(mdsc, realm);
394		realm = next;
395	}
396	if (realm)
397		ceph_put_snap_realm(mdsc, realm);
398	up_read(&mdsc->snap_rwsem);
399
400	return exceeded;
401}
402
403/*
404 * ceph_quota_is_max_files_exceeded - check if we can create a new file
405 * @inode:	directory where a new file is being created
406 *
407 * This functions returns true is max_files quota allows a new file to be
408 * created.  It is necessary to walk through the snaprealm hierarchy (until the
409 * FS root) to check all realms with quotas set.
410 */
411bool ceph_quota_is_max_files_exceeded(struct inode *inode)
412{
413	if (!ceph_has_realms_with_quotas(inode))
414		return false;
415
416	WARN_ON(!S_ISDIR(inode->i_mode));
417
418	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 1);
419}
420
421/*
422 * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
423 * @inode:	inode being written
424 * @newsize:	new size if write succeeds
425 *
426 * This functions returns true is max_bytes quota allows a file size to reach
427 * @newsize; it returns false otherwise.
428 */
429bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
430{
431	loff_t size = i_size_read(inode);
432
433	if (!ceph_has_realms_with_quotas(inode))
434		return false;
435
436	/* return immediately if we're decreasing file size */
437	if (newsize <= size)
438		return false;
439
440	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
441}
442
443/*
444 * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
445 * @inode:	inode being written
446 * @newsize:	new size if write succeeds
447 *
448 * This function returns true if the new file size @newsize will be consuming
449 * more than 1/16th of the available quota space; it returns false otherwise.
450 */
451bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
452{
453	loff_t size = ceph_inode(inode)->i_reported_size;
454
455	if (!ceph_has_realms_with_quotas(inode))
456		return false;
457
458	/* return immediately if we're decreasing file size */
459	if (newsize <= size)
460		return false;
461
462	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
463				    (newsize - size));
464}
465
466/*
467 * ceph_quota_update_statfs - if root has quota update statfs with quota status
468 * @fsc:	filesystem client instance
469 * @buf:	statfs to update
470 *
471 * If the mounted filesystem root has max_bytes quota set, update the filesystem
472 * statistics with the quota status.
473 *
474 * This function returns true if the stats have been updated, false otherwise.
475 */
476bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
477{
478	struct ceph_mds_client *mdsc = fsc->mdsc;
479	struct ceph_inode_info *ci;
480	struct ceph_snap_realm *realm;
481	struct inode *in;
482	u64 total = 0, used, free;
483	bool is_updated = false;
484
485	down_read(&mdsc->snap_rwsem);
486	realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
487	up_read(&mdsc->snap_rwsem);
488	if (!realm)
489		return false;
490
491	spin_lock(&realm->inodes_with_caps_lock);
492	in = realm->inode ? igrab(realm->inode) : NULL;
493	spin_unlock(&realm->inodes_with_caps_lock);
494	if (in) {
495		ci = ceph_inode(in);
496		spin_lock(&ci->i_ceph_lock);
497		if (ci->i_max_bytes) {
498			total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
499			used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
500			/* It is possible for a quota to be exceeded.
501			 * Report 'zero' in that case
502			 */
503			free = total > used ? total - used : 0;
504		}
505		spin_unlock(&ci->i_ceph_lock);
506		if (total) {
507			buf->f_blocks = total;
508			buf->f_bfree = free;
509			buf->f_bavail = free;
510			is_updated = true;
511		}
512		iput(in);
513	}
514	ceph_put_snap_realm(mdsc, realm);
515
516	return is_updated;
517}
518
519/*
520 * ceph_quota_check_rename - check if a rename can be executed
521 * @mdsc:	MDS client instance
522 * @old:	inode to be copied
523 * @new:	destination inode (directory)
524 *
525 * This function verifies if a rename (e.g. moving a file or directory) can be
526 * executed.  It forces an rstat update in the @new target directory (and in the
527 * source @old as well, if it's a directory).  The actual check is done both for
528 * max_files and max_bytes.
529 *
530 * This function returns 0 if it's OK to do the rename, or, if quotas are
531 * exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
532 */
533int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
534			    struct inode *old, struct inode *new)
535{
536	struct ceph_inode_info *ci_old = ceph_inode(old);
537	int ret = 0;
538
539	if (ceph_quota_is_same_realm(old, new))
540		return 0;
541
542	/*
543	 * Get the latest rstat for target directory (and for source, if a
544	 * directory)
545	 */
546	ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
547	if (ret)
548		return ret;
549
550	if (S_ISDIR(old->i_mode)) {
551		ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
552		if (ret)
553			return ret;
554		ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
555					   ci_old->i_rbytes);
556		if (!ret)
557			ret = check_quota_exceeded(new,
558						   QUOTA_CHECK_MAX_FILES_OP,
559						   ci_old->i_rfiles +
560						   ci_old->i_rsubdirs);
561		if (ret)
562			ret = -EXDEV;
563	} else {
564		ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
565					   i_size_read(old));
566		if (!ret)
567			ret = check_quota_exceeded(new,
568						   QUOTA_CHECK_MAX_FILES_OP, 1);
569		if (ret)
570			ret = -EDQUOT;
571	}
572
573	return ret;
574}
v5.4
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * quota.c - CephFS quota
  4 *
  5 * Copyright (C) 2017-2018 SUSE
  6 */
  7
  8#include <linux/statfs.h>
  9
 10#include "super.h"
 11#include "mds_client.h"
 12
 13void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
 14{
 15	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 16	if (inc)
 17		atomic64_inc(&mdsc->quotarealms_count);
 18	else
 19		atomic64_dec(&mdsc->quotarealms_count);
 20}
 21
 22static inline bool ceph_has_realms_with_quotas(struct inode *inode)
 23{
 24	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 25	struct super_block *sb = mdsc->fsc->sb;
 
 26
 27	if (atomic64_read(&mdsc->quotarealms_count) > 0)
 28		return true;
 29	/* if root is the real CephFS root, we don't have quota realms */
 30	if (sb->s_root->d_inode &&
 31	    (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT))
 32		return false;
 33	/* otherwise, we can't know for sure */
 34	return true;
 35}
 36
 37void ceph_handle_quota(struct ceph_mds_client *mdsc,
 38		       struct ceph_mds_session *session,
 39		       struct ceph_msg *msg)
 40{
 41	struct super_block *sb = mdsc->fsc->sb;
 42	struct ceph_mds_quota *h = msg->front.iov_base;
 43	struct ceph_vino vino;
 44	struct inode *inode;
 45	struct ceph_inode_info *ci;
 46
 47	if (msg->front.iov_len < sizeof(*h)) {
 48		pr_err("%s corrupt message mds%d len %d\n", __func__,
 49		       session->s_mds, (int)msg->front.iov_len);
 50		ceph_msg_dump(msg);
 51		return;
 52	}
 53
 54	/* increment msg sequence number */
 55	mutex_lock(&session->s_mutex);
 56	session->s_seq++;
 57	mutex_unlock(&session->s_mutex);
 58
 59	/* lookup inode */
 60	vino.ino = le64_to_cpu(h->ino);
 61	vino.snap = CEPH_NOSNAP;
 62	inode = ceph_find_inode(sb, vino);
 63	if (!inode) {
 64		pr_warn("Failed to find inode %llu\n", vino.ino);
 65		return;
 66	}
 67	ci = ceph_inode(inode);
 68
 69	spin_lock(&ci->i_ceph_lock);
 70	ci->i_rbytes = le64_to_cpu(h->rbytes);
 71	ci->i_rfiles = le64_to_cpu(h->rfiles);
 72	ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
 73	__ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
 74		            le64_to_cpu(h->max_files));
 75	spin_unlock(&ci->i_ceph_lock);
 76
 77	/* avoid calling iput_final() in dispatch thread */
 78	ceph_async_iput(inode);
 79}
 80
 81static struct ceph_quotarealm_inode *
 82find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
 83{
 84	struct ceph_quotarealm_inode *qri = NULL;
 85	struct rb_node **node, *parent = NULL;
 86
 87	mutex_lock(&mdsc->quotarealms_inodes_mutex);
 88	node = &(mdsc->quotarealms_inodes.rb_node);
 89	while (*node) {
 90		parent = *node;
 91		qri = container_of(*node, struct ceph_quotarealm_inode, node);
 92
 93		if (ino < qri->ino)
 94			node = &((*node)->rb_left);
 95		else if (ino > qri->ino)
 96			node = &((*node)->rb_right);
 97		else
 98			break;
 99	}
100	if (!qri || (qri->ino != ino)) {
101		/* Not found, create a new one and insert it */
102		qri = kmalloc(sizeof(*qri), GFP_KERNEL);
103		if (qri) {
104			qri->ino = ino;
105			qri->inode = NULL;
106			qri->timeout = 0;
107			mutex_init(&qri->mutex);
108			rb_link_node(&qri->node, parent, node);
109			rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
110		} else
111			pr_warn("Failed to alloc quotarealms_inode\n");
112	}
113	mutex_unlock(&mdsc->quotarealms_inodes_mutex);
114
115	return qri;
116}
117
118/*
119 * This function will try to lookup a realm inode which isn't visible in the
120 * filesystem mountpoint.  A list of these kind of inodes (not visible) is
121 * maintained in the mdsc and freed only when the filesystem is umounted.
122 *
123 * Note that these inodes are kept in this list even if the lookup fails, which
124 * allows to prevent useless lookup requests.
125 */
126static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
127					     struct super_block *sb,
128					     struct ceph_snap_realm *realm)
129{
130	struct ceph_quotarealm_inode *qri;
131	struct inode *in;
132
133	qri = find_quotarealm_inode(mdsc, realm->ino);
134	if (!qri)
135		return NULL;
136
137	mutex_lock(&qri->mutex);
138	if (qri->inode && ceph_is_any_caps(qri->inode)) {
139		/* A request has already returned the inode */
140		mutex_unlock(&qri->mutex);
141		return qri->inode;
142	}
143	/* Check if this inode lookup has failed recently */
144	if (qri->timeout &&
145	    time_before_eq(jiffies, qri->timeout)) {
146		mutex_unlock(&qri->mutex);
147		return NULL;
148	}
149	if (qri->inode) {
150		/* get caps */
151		int ret = __ceph_do_getattr(qri->inode, NULL,
152					    CEPH_STAT_CAP_INODE, true);
153		if (ret >= 0)
154			in = qri->inode;
155		else
156			in = ERR_PTR(ret);
157	}  else {
158		in = ceph_lookup_inode(sb, realm->ino);
159	}
160
161	if (IS_ERR(in)) {
162		pr_warn("Can't lookup inode %llx (err: %ld)\n",
163			realm->ino, PTR_ERR(in));
164		qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
165	} else {
166		qri->timeout = 0;
167		qri->inode = in;
168	}
169	mutex_unlock(&qri->mutex);
170
171	return in;
172}
173
174void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
175{
176	struct ceph_quotarealm_inode *qri;
177	struct rb_node *node;
178
179	/*
180	 * It should now be safe to clean quotarealms_inode tree without holding
181	 * mdsc->quotarealms_inodes_mutex...
182	 */
183	mutex_lock(&mdsc->quotarealms_inodes_mutex);
184	while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
185		node = rb_first(&mdsc->quotarealms_inodes);
186		qri = rb_entry(node, struct ceph_quotarealm_inode, node);
187		rb_erase(node, &mdsc->quotarealms_inodes);
188		iput(qri->inode);
189		kfree(qri);
190	}
191	mutex_unlock(&mdsc->quotarealms_inodes_mutex);
192}
193
194/*
195 * This function walks through the snaprealm for an inode and returns the
196 * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
197 * or max_bytes).  If the root is reached, return the root ceph_snap_realm
198 * instead.
199 *
200 * Note that the caller is responsible for calling ceph_put_snap_realm() on the
201 * returned realm.
202 *
203 * Callers of this function need to hold mdsc->snap_rwsem.  However, if there's
204 * a need to do an inode lookup, this rwsem will be temporarily dropped.  Hence
205 * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
206 * this function will return -EAGAIN; otherwise, the snaprealms walk-through
207 * will be restarted.
208 */
209static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
210					       struct inode *inode, bool retry)
211{
212	struct ceph_inode_info *ci = NULL;
213	struct ceph_snap_realm *realm, *next;
214	struct inode *in;
215	bool has_quota;
216
217	if (ceph_snap(inode) != CEPH_NOSNAP)
218		return NULL;
219
220restart:
221	realm = ceph_inode(inode)->i_snap_realm;
222	if (realm)
223		ceph_get_snap_realm(mdsc, realm);
224	else
225		pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
226				   "null i_snap_realm\n", ceph_vinop(inode));
227	while (realm) {
228		bool has_inode;
229
230		spin_lock(&realm->inodes_with_caps_lock);
231		has_inode = realm->inode;
232		in = has_inode ? igrab(realm->inode) : NULL;
233		spin_unlock(&realm->inodes_with_caps_lock);
234		if (has_inode && !in)
235			break;
236		if (!in) {
237			up_read(&mdsc->snap_rwsem);
238			in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
239			down_read(&mdsc->snap_rwsem);
240			if (IS_ERR_OR_NULL(in))
241				break;
242			ceph_put_snap_realm(mdsc, realm);
243			if (!retry)
244				return ERR_PTR(-EAGAIN);
245			goto restart;
246		}
247
248		ci = ceph_inode(in);
249		has_quota = __ceph_has_any_quota(ci);
250		/* avoid calling iput_final() while holding mdsc->snap_rwsem */
251		ceph_async_iput(in);
252
253		next = realm->parent;
254		if (has_quota || !next)
255		       return realm;
256
257		ceph_get_snap_realm(mdsc, next);
258		ceph_put_snap_realm(mdsc, realm);
259		realm = next;
260	}
261	if (realm)
262		ceph_put_snap_realm(mdsc, realm);
263
264	return NULL;
265}
266
267bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
268{
269	struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
270	struct ceph_snap_realm *old_realm, *new_realm;
271	bool is_same;
272
273restart:
274	/*
275	 * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
276	 * However, get_quota_realm may drop it temporarily.  By setting the
277	 * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
278	 * dropped and we can then restart the whole operation.
279	 */
280	down_read(&mdsc->snap_rwsem);
281	old_realm = get_quota_realm(mdsc, old, true);
282	new_realm = get_quota_realm(mdsc, new, false);
283	if (PTR_ERR(new_realm) == -EAGAIN) {
284		up_read(&mdsc->snap_rwsem);
285		if (old_realm)
286			ceph_put_snap_realm(mdsc, old_realm);
287		goto restart;
288	}
289	is_same = (old_realm == new_realm);
290	up_read(&mdsc->snap_rwsem);
291
292	if (old_realm)
293		ceph_put_snap_realm(mdsc, old_realm);
294	if (new_realm)
295		ceph_put_snap_realm(mdsc, new_realm);
296
297	return is_same;
298}
299
300enum quota_check_op {
301	QUOTA_CHECK_MAX_FILES_OP,	/* check quota max_files limit */
302	QUOTA_CHECK_MAX_BYTES_OP,	/* check quota max_files limit */
303	QUOTA_CHECK_MAX_BYTES_APPROACHING_OP	/* check if quota max_files
304						   limit is approaching */
305};
306
307/*
308 * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
309 * realm, it will execute quota check operation defined by the 'op' parameter.
310 * The snaprealm walk is interrupted if the quota check detects that the quota
311 * is exceeded or if the root inode is reached.
312 */
313static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
314				 loff_t delta)
315{
316	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
317	struct ceph_inode_info *ci;
318	struct ceph_snap_realm *realm, *next;
319	struct inode *in;
320	u64 max, rvalue;
321	bool exceeded = false;
322
323	if (ceph_snap(inode) != CEPH_NOSNAP)
324		return false;
325
326	down_read(&mdsc->snap_rwsem);
327restart:
328	realm = ceph_inode(inode)->i_snap_realm;
329	if (realm)
330		ceph_get_snap_realm(mdsc, realm);
331	else
332		pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
333				   "null i_snap_realm\n", ceph_vinop(inode));
334	while (realm) {
335		bool has_inode;
336
337		spin_lock(&realm->inodes_with_caps_lock);
338		has_inode = realm->inode;
339		in = has_inode ? igrab(realm->inode) : NULL;
340		spin_unlock(&realm->inodes_with_caps_lock);
341		if (has_inode && !in)
342			break;
343		if (!in) {
344			up_read(&mdsc->snap_rwsem);
345			in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
346			down_read(&mdsc->snap_rwsem);
347			if (IS_ERR_OR_NULL(in))
348				break;
349			ceph_put_snap_realm(mdsc, realm);
350			goto restart;
351		}
352		ci = ceph_inode(in);
353		spin_lock(&ci->i_ceph_lock);
354		if (op == QUOTA_CHECK_MAX_FILES_OP) {
355			max = ci->i_max_files;
356			rvalue = ci->i_rfiles + ci->i_rsubdirs;
357		} else {
358			max = ci->i_max_bytes;
359			rvalue = ci->i_rbytes;
360		}
361		spin_unlock(&ci->i_ceph_lock);
362		switch (op) {
363		case QUOTA_CHECK_MAX_FILES_OP:
364			exceeded = (max && (rvalue >= max));
365			break;
366		case QUOTA_CHECK_MAX_BYTES_OP:
367			exceeded = (max && (rvalue + delta > max));
368			break;
369		case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
370			if (max) {
371				if (rvalue >= max)
372					exceeded = true;
373				else {
374					/*
375					 * when we're writing more that 1/16th
376					 * of the available space
377					 */
378					exceeded =
379						(((max - rvalue) >> 4) < delta);
380				}
381			}
382			break;
383		default:
384			/* Shouldn't happen */
385			pr_warn("Invalid quota check op (%d)\n", op);
386			exceeded = true; /* Just break the loop */
387		}
388		/* avoid calling iput_final() while holding mdsc->snap_rwsem */
389		ceph_async_iput(in);
390
391		next = realm->parent;
392		if (exceeded || !next)
393			break;
394		ceph_get_snap_realm(mdsc, next);
395		ceph_put_snap_realm(mdsc, realm);
396		realm = next;
397	}
398	if (realm)
399		ceph_put_snap_realm(mdsc, realm);
400	up_read(&mdsc->snap_rwsem);
401
402	return exceeded;
403}
404
405/*
406 * ceph_quota_is_max_files_exceeded - check if we can create a new file
407 * @inode:	directory where a new file is being created
408 *
409 * This functions returns true is max_files quota allows a new file to be
410 * created.  It is necessary to walk through the snaprealm hierarchy (until the
411 * FS root) to check all realms with quotas set.
412 */
413bool ceph_quota_is_max_files_exceeded(struct inode *inode)
414{
415	if (!ceph_has_realms_with_quotas(inode))
416		return false;
417
418	WARN_ON(!S_ISDIR(inode->i_mode));
419
420	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0);
421}
422
423/*
424 * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
425 * @inode:	inode being written
426 * @newsize:	new size if write succeeds
427 *
428 * This functions returns true is max_bytes quota allows a file size to reach
429 * @newsize; it returns false otherwise.
430 */
431bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
432{
433	loff_t size = i_size_read(inode);
434
435	if (!ceph_has_realms_with_quotas(inode))
436		return false;
437
438	/* return immediately if we're decreasing file size */
439	if (newsize <= size)
440		return false;
441
442	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
443}
444
445/*
446 * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
447 * @inode:	inode being written
448 * @newsize:	new size if write succeeds
449 *
450 * This function returns true if the new file size @newsize will be consuming
451 * more than 1/16th of the available quota space; it returns false otherwise.
452 */
453bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
454{
455	loff_t size = ceph_inode(inode)->i_reported_size;
456
457	if (!ceph_has_realms_with_quotas(inode))
458		return false;
459
460	/* return immediately if we're decreasing file size */
461	if (newsize <= size)
462		return false;
463
464	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
465				    (newsize - size));
466}
467
468/*
469 * ceph_quota_update_statfs - if root has quota update statfs with quota status
470 * @fsc:	filesystem client instance
471 * @buf:	statfs to update
472 *
473 * If the mounted filesystem root has max_bytes quota set, update the filesystem
474 * statistics with the quota status.
475 *
476 * This function returns true if the stats have been updated, false otherwise.
477 */
478bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
479{
480	struct ceph_mds_client *mdsc = fsc->mdsc;
481	struct ceph_inode_info *ci;
482	struct ceph_snap_realm *realm;
483	struct inode *in;
484	u64 total = 0, used, free;
485	bool is_updated = false;
486
487	down_read(&mdsc->snap_rwsem);
488	realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
489	up_read(&mdsc->snap_rwsem);
490	if (!realm)
491		return false;
492
493	spin_lock(&realm->inodes_with_caps_lock);
494	in = realm->inode ? igrab(realm->inode) : NULL;
495	spin_unlock(&realm->inodes_with_caps_lock);
496	if (in) {
497		ci = ceph_inode(in);
498		spin_lock(&ci->i_ceph_lock);
499		if (ci->i_max_bytes) {
500			total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
501			used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
502			/* It is possible for a quota to be exceeded.
503			 * Report 'zero' in that case
504			 */
505			free = total > used ? total - used : 0;
506		}
507		spin_unlock(&ci->i_ceph_lock);
508		if (total) {
509			buf->f_blocks = total;
510			buf->f_bfree = free;
511			buf->f_bavail = free;
512			is_updated = true;
513		}
514		iput(in);
515	}
516	ceph_put_snap_realm(mdsc, realm);
517
518	return is_updated;
519}
520