Loading...
1/*
2 * fs/kernfs/dir.c - kernfs directory implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7 *
8 * This file is released under the GPLv2.
9 */
10
11#include <linux/sched.h>
12#include <linux/fs.h>
13#include <linux/namei.h>
14#include <linux/idr.h>
15#include <linux/slab.h>
16#include <linux/security.h>
17#include <linux/hash.h>
18
19#include "kernfs-internal.h"
20
21DEFINE_MUTEX(kernfs_mutex);
22static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
23static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
24
25#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
26
27static bool kernfs_active(struct kernfs_node *kn)
28{
29 lockdep_assert_held(&kernfs_mutex);
30 return atomic_read(&kn->active) >= 0;
31}
32
33static bool kernfs_lockdep(struct kernfs_node *kn)
34{
35#ifdef CONFIG_DEBUG_LOCK_ALLOC
36 return kn->flags & KERNFS_LOCKDEP;
37#else
38 return false;
39#endif
40}
41
42static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
43{
44 return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
45}
46
47/* kernfs_node_depth - compute depth from @from to @to */
48static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
49{
50 size_t depth = 0;
51
52 while (to->parent && to != from) {
53 depth++;
54 to = to->parent;
55 }
56 return depth;
57}
58
59static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
60 struct kernfs_node *b)
61{
62 size_t da, db;
63 struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b);
64
65 if (ra != rb)
66 return NULL;
67
68 da = kernfs_depth(ra->kn, a);
69 db = kernfs_depth(rb->kn, b);
70
71 while (da > db) {
72 a = a->parent;
73 da--;
74 }
75 while (db > da) {
76 b = b->parent;
77 db--;
78 }
79
80 /* worst case b and a will be the same at root */
81 while (b != a) {
82 b = b->parent;
83 a = a->parent;
84 }
85
86 return a;
87}
88
89/**
90 * kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to,
91 * where kn_from is treated as root of the path.
92 * @kn_from: kernfs node which should be treated as root for the path
93 * @kn_to: kernfs node to which path is needed
94 * @buf: buffer to copy the path into
95 * @buflen: size of @buf
96 *
97 * We need to handle couple of scenarios here:
98 * [1] when @kn_from is an ancestor of @kn_to at some level
99 * kn_from: /n1/n2/n3
100 * kn_to: /n1/n2/n3/n4/n5
101 * result: /n4/n5
102 *
103 * [2] when @kn_from is on a different hierarchy and we need to find common
104 * ancestor between @kn_from and @kn_to.
105 * kn_from: /n1/n2/n3/n4
106 * kn_to: /n1/n2/n5
107 * result: /../../n5
108 * OR
109 * kn_from: /n1/n2/n3/n4/n5 [depth=5]
110 * kn_to: /n1/n2/n3 [depth=3]
111 * result: /../..
112 *
113 * Returns the length of the full path. If the full length is equal to or
114 * greater than @buflen, @buf contains the truncated path with the trailing
115 * '\0'. On error, -errno is returned.
116 */
117static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
118 struct kernfs_node *kn_from,
119 char *buf, size_t buflen)
120{
121 struct kernfs_node *kn, *common;
122 const char parent_str[] = "/..";
123 size_t depth_from, depth_to, len = 0;
124 int i, j;
125
126 if (!kn_from)
127 kn_from = kernfs_root(kn_to)->kn;
128
129 if (kn_from == kn_to)
130 return strlcpy(buf, "/", buflen);
131
132 common = kernfs_common_ancestor(kn_from, kn_to);
133 if (WARN_ON(!common))
134 return -EINVAL;
135
136 depth_to = kernfs_depth(common, kn_to);
137 depth_from = kernfs_depth(common, kn_from);
138
139 if (buf)
140 buf[0] = '\0';
141
142 for (i = 0; i < depth_from; i++)
143 len += strlcpy(buf + len, parent_str,
144 len < buflen ? buflen - len : 0);
145
146 /* Calculate how many bytes we need for the rest */
147 for (i = depth_to - 1; i >= 0; i--) {
148 for (kn = kn_to, j = 0; j < i; j++)
149 kn = kn->parent;
150 len += strlcpy(buf + len, "/",
151 len < buflen ? buflen - len : 0);
152 len += strlcpy(buf + len, kn->name,
153 len < buflen ? buflen - len : 0);
154 }
155
156 return len;
157}
158
159/**
160 * kernfs_name - obtain the name of a given node
161 * @kn: kernfs_node of interest
162 * @buf: buffer to copy @kn's name into
163 * @buflen: size of @buf
164 *
165 * Copies the name of @kn into @buf of @buflen bytes. The behavior is
166 * similar to strlcpy(). It returns the length of @kn's name and if @buf
167 * isn't long enough, it's filled upto @buflen-1 and nul terminated.
168 *
169 * This function can be called from any context.
170 */
171int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
172{
173 unsigned long flags;
174 int ret;
175
176 spin_lock_irqsave(&kernfs_rename_lock, flags);
177 ret = kernfs_name_locked(kn, buf, buflen);
178 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
179 return ret;
180}
181
182/**
183 * kernfs_path_from_node - build path of node @to relative to @from.
184 * @from: parent kernfs_node relative to which we need to build the path
185 * @to: kernfs_node of interest
186 * @buf: buffer to copy @to's path into
187 * @buflen: size of @buf
188 *
189 * Builds @to's path relative to @from in @buf. @from and @to must
190 * be on the same kernfs-root. If @from is not parent of @to, then a relative
191 * path (which includes '..'s) as needed to reach from @from to @to is
192 * returned.
193 *
194 * Returns the length of the full path. If the full length is equal to or
195 * greater than @buflen, @buf contains the truncated path with the trailing
196 * '\0'. On error, -errno is returned.
197 */
198int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
199 char *buf, size_t buflen)
200{
201 unsigned long flags;
202 int ret;
203
204 spin_lock_irqsave(&kernfs_rename_lock, flags);
205 ret = kernfs_path_from_node_locked(to, from, buf, buflen);
206 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
207 return ret;
208}
209EXPORT_SYMBOL_GPL(kernfs_path_from_node);
210
211/**
212 * pr_cont_kernfs_name - pr_cont name of a kernfs_node
213 * @kn: kernfs_node of interest
214 *
215 * This function can be called from any context.
216 */
217void pr_cont_kernfs_name(struct kernfs_node *kn)
218{
219 unsigned long flags;
220
221 spin_lock_irqsave(&kernfs_rename_lock, flags);
222
223 kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
224 pr_cont("%s", kernfs_pr_cont_buf);
225
226 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
227}
228
229/**
230 * pr_cont_kernfs_path - pr_cont path of a kernfs_node
231 * @kn: kernfs_node of interest
232 *
233 * This function can be called from any context.
234 */
235void pr_cont_kernfs_path(struct kernfs_node *kn)
236{
237 unsigned long flags;
238 int sz;
239
240 spin_lock_irqsave(&kernfs_rename_lock, flags);
241
242 sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
243 sizeof(kernfs_pr_cont_buf));
244 if (sz < 0) {
245 pr_cont("(error)");
246 goto out;
247 }
248
249 if (sz >= sizeof(kernfs_pr_cont_buf)) {
250 pr_cont("(name too long)");
251 goto out;
252 }
253
254 pr_cont("%s", kernfs_pr_cont_buf);
255
256out:
257 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
258}
259
260/**
261 * kernfs_get_parent - determine the parent node and pin it
262 * @kn: kernfs_node of interest
263 *
264 * Determines @kn's parent, pins and returns it. This function can be
265 * called from any context.
266 */
267struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
268{
269 struct kernfs_node *parent;
270 unsigned long flags;
271
272 spin_lock_irqsave(&kernfs_rename_lock, flags);
273 parent = kn->parent;
274 kernfs_get(parent);
275 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
276
277 return parent;
278}
279
280/**
281 * kernfs_name_hash
282 * @name: Null terminated string to hash
283 * @ns: Namespace tag to hash
284 *
285 * Returns 31 bit hash of ns + name (so it fits in an off_t )
286 */
287static unsigned int kernfs_name_hash(const char *name, const void *ns)
288{
289 unsigned long hash = init_name_hash(ns);
290 unsigned int len = strlen(name);
291 while (len--)
292 hash = partial_name_hash(*name++, hash);
293 hash = end_name_hash(hash);
294 hash &= 0x7fffffffU;
295 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
296 if (hash < 2)
297 hash += 2;
298 if (hash >= INT_MAX)
299 hash = INT_MAX - 1;
300 return hash;
301}
302
303static int kernfs_name_compare(unsigned int hash, const char *name,
304 const void *ns, const struct kernfs_node *kn)
305{
306 if (hash < kn->hash)
307 return -1;
308 if (hash > kn->hash)
309 return 1;
310 if (ns < kn->ns)
311 return -1;
312 if (ns > kn->ns)
313 return 1;
314 return strcmp(name, kn->name);
315}
316
317static int kernfs_sd_compare(const struct kernfs_node *left,
318 const struct kernfs_node *right)
319{
320 return kernfs_name_compare(left->hash, left->name, left->ns, right);
321}
322
323/**
324 * kernfs_link_sibling - link kernfs_node into sibling rbtree
325 * @kn: kernfs_node of interest
326 *
327 * Link @kn into its sibling rbtree which starts from
328 * @kn->parent->dir.children.
329 *
330 * Locking:
331 * mutex_lock(kernfs_mutex)
332 *
333 * RETURNS:
334 * 0 on susccess -EEXIST on failure.
335 */
336static int kernfs_link_sibling(struct kernfs_node *kn)
337{
338 struct rb_node **node = &kn->parent->dir.children.rb_node;
339 struct rb_node *parent = NULL;
340
341 while (*node) {
342 struct kernfs_node *pos;
343 int result;
344
345 pos = rb_to_kn(*node);
346 parent = *node;
347 result = kernfs_sd_compare(kn, pos);
348 if (result < 0)
349 node = &pos->rb.rb_left;
350 else if (result > 0)
351 node = &pos->rb.rb_right;
352 else
353 return -EEXIST;
354 }
355
356 /* add new node and rebalance the tree */
357 rb_link_node(&kn->rb, parent, node);
358 rb_insert_color(&kn->rb, &kn->parent->dir.children);
359
360 /* successfully added, account subdir number */
361 if (kernfs_type(kn) == KERNFS_DIR)
362 kn->parent->dir.subdirs++;
363
364 return 0;
365}
366
367/**
368 * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
369 * @kn: kernfs_node of interest
370 *
371 * Try to unlink @kn from its sibling rbtree which starts from
372 * kn->parent->dir.children. Returns %true if @kn was actually
373 * removed, %false if @kn wasn't on the rbtree.
374 *
375 * Locking:
376 * mutex_lock(kernfs_mutex)
377 */
378static bool kernfs_unlink_sibling(struct kernfs_node *kn)
379{
380 if (RB_EMPTY_NODE(&kn->rb))
381 return false;
382
383 if (kernfs_type(kn) == KERNFS_DIR)
384 kn->parent->dir.subdirs--;
385
386 rb_erase(&kn->rb, &kn->parent->dir.children);
387 RB_CLEAR_NODE(&kn->rb);
388 return true;
389}
390
391/**
392 * kernfs_get_active - get an active reference to kernfs_node
393 * @kn: kernfs_node to get an active reference to
394 *
395 * Get an active reference of @kn. This function is noop if @kn
396 * is NULL.
397 *
398 * RETURNS:
399 * Pointer to @kn on success, NULL on failure.
400 */
401struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
402{
403 if (unlikely(!kn))
404 return NULL;
405
406 if (!atomic_inc_unless_negative(&kn->active))
407 return NULL;
408
409 if (kernfs_lockdep(kn))
410 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
411 return kn;
412}
413
414/**
415 * kernfs_put_active - put an active reference to kernfs_node
416 * @kn: kernfs_node to put an active reference to
417 *
418 * Put an active reference to @kn. This function is noop if @kn
419 * is NULL.
420 */
421void kernfs_put_active(struct kernfs_node *kn)
422{
423 struct kernfs_root *root = kernfs_root(kn);
424 int v;
425
426 if (unlikely(!kn))
427 return;
428
429 if (kernfs_lockdep(kn))
430 rwsem_release(&kn->dep_map, 1, _RET_IP_);
431 v = atomic_dec_return(&kn->active);
432 if (likely(v != KN_DEACTIVATED_BIAS))
433 return;
434
435 wake_up_all(&root->deactivate_waitq);
436}
437
438/**
439 * kernfs_drain - drain kernfs_node
440 * @kn: kernfs_node to drain
441 *
442 * Drain existing usages and nuke all existing mmaps of @kn. Mutiple
443 * removers may invoke this function concurrently on @kn and all will
444 * return after draining is complete.
445 */
446static void kernfs_drain(struct kernfs_node *kn)
447 __releases(&kernfs_mutex) __acquires(&kernfs_mutex)
448{
449 struct kernfs_root *root = kernfs_root(kn);
450
451 lockdep_assert_held(&kernfs_mutex);
452 WARN_ON_ONCE(kernfs_active(kn));
453
454 mutex_unlock(&kernfs_mutex);
455
456 if (kernfs_lockdep(kn)) {
457 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
458 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
459 lock_contended(&kn->dep_map, _RET_IP_);
460 }
461
462 /* but everyone should wait for draining */
463 wait_event(root->deactivate_waitq,
464 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
465
466 if (kernfs_lockdep(kn)) {
467 lock_acquired(&kn->dep_map, _RET_IP_);
468 rwsem_release(&kn->dep_map, 1, _RET_IP_);
469 }
470
471 kernfs_unmap_bin_file(kn);
472
473 mutex_lock(&kernfs_mutex);
474}
475
476/**
477 * kernfs_get - get a reference count on a kernfs_node
478 * @kn: the target kernfs_node
479 */
480void kernfs_get(struct kernfs_node *kn)
481{
482 if (kn) {
483 WARN_ON(!atomic_read(&kn->count));
484 atomic_inc(&kn->count);
485 }
486}
487EXPORT_SYMBOL_GPL(kernfs_get);
488
489/**
490 * kernfs_put - put a reference count on a kernfs_node
491 * @kn: the target kernfs_node
492 *
493 * Put a reference count of @kn and destroy it if it reached zero.
494 */
495void kernfs_put(struct kernfs_node *kn)
496{
497 struct kernfs_node *parent;
498 struct kernfs_root *root;
499
500 if (!kn || !atomic_dec_and_test(&kn->count))
501 return;
502 root = kernfs_root(kn);
503 repeat:
504 /*
505 * Moving/renaming is always done while holding reference.
506 * kn->parent won't change beneath us.
507 */
508 parent = kn->parent;
509
510 WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
511 "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
512 parent ? parent->name : "", kn->name, atomic_read(&kn->active));
513
514 if (kernfs_type(kn) == KERNFS_LINK)
515 kernfs_put(kn->symlink.target_kn);
516
517 kfree_const(kn->name);
518
519 if (kn->iattr) {
520 if (kn->iattr->ia_secdata)
521 security_release_secctx(kn->iattr->ia_secdata,
522 kn->iattr->ia_secdata_len);
523 simple_xattrs_free(&kn->iattr->xattrs);
524 }
525 kfree(kn->iattr);
526 ida_simple_remove(&root->ino_ida, kn->ino);
527 kmem_cache_free(kernfs_node_cache, kn);
528
529 kn = parent;
530 if (kn) {
531 if (atomic_dec_and_test(&kn->count))
532 goto repeat;
533 } else {
534 /* just released the root kn, free @root too */
535 ida_destroy(&root->ino_ida);
536 kfree(root);
537 }
538}
539EXPORT_SYMBOL_GPL(kernfs_put);
540
541static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
542{
543 struct kernfs_node *kn;
544
545 if (flags & LOOKUP_RCU)
546 return -ECHILD;
547
548 /* Always perform fresh lookup for negatives */
549 if (d_really_is_negative(dentry))
550 goto out_bad_unlocked;
551
552 kn = dentry->d_fsdata;
553 mutex_lock(&kernfs_mutex);
554
555 /* The kernfs node has been deactivated */
556 if (!kernfs_active(kn))
557 goto out_bad;
558
559 /* The kernfs node has been moved? */
560 if (dentry->d_parent->d_fsdata != kn->parent)
561 goto out_bad;
562
563 /* The kernfs node has been renamed */
564 if (strcmp(dentry->d_name.name, kn->name) != 0)
565 goto out_bad;
566
567 /* The kernfs node has been moved to a different namespace */
568 if (kn->parent && kernfs_ns_enabled(kn->parent) &&
569 kernfs_info(dentry->d_sb)->ns != kn->ns)
570 goto out_bad;
571
572 mutex_unlock(&kernfs_mutex);
573 return 1;
574out_bad:
575 mutex_unlock(&kernfs_mutex);
576out_bad_unlocked:
577 return 0;
578}
579
580static void kernfs_dop_release(struct dentry *dentry)
581{
582 kernfs_put(dentry->d_fsdata);
583}
584
585const struct dentry_operations kernfs_dops = {
586 .d_revalidate = kernfs_dop_revalidate,
587 .d_release = kernfs_dop_release,
588};
589
590/**
591 * kernfs_node_from_dentry - determine kernfs_node associated with a dentry
592 * @dentry: the dentry in question
593 *
594 * Return the kernfs_node associated with @dentry. If @dentry is not a
595 * kernfs one, %NULL is returned.
596 *
597 * While the returned kernfs_node will stay accessible as long as @dentry
598 * is accessible, the returned node can be in any state and the caller is
599 * fully responsible for determining what's accessible.
600 */
601struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
602{
603 if (dentry->d_sb->s_op == &kernfs_sops)
604 return dentry->d_fsdata;
605 return NULL;
606}
607
608static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
609 const char *name, umode_t mode,
610 unsigned flags)
611{
612 struct kernfs_node *kn;
613 int ret;
614
615 name = kstrdup_const(name, GFP_KERNEL);
616 if (!name)
617 return NULL;
618
619 kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
620 if (!kn)
621 goto err_out1;
622
623 ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
624 if (ret < 0)
625 goto err_out2;
626 kn->ino = ret;
627
628 atomic_set(&kn->count, 1);
629 atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
630 RB_CLEAR_NODE(&kn->rb);
631
632 kn->name = name;
633 kn->mode = mode;
634 kn->flags = flags;
635
636 return kn;
637
638 err_out2:
639 kmem_cache_free(kernfs_node_cache, kn);
640 err_out1:
641 kfree_const(name);
642 return NULL;
643}
644
645struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
646 const char *name, umode_t mode,
647 unsigned flags)
648{
649 struct kernfs_node *kn;
650
651 kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags);
652 if (kn) {
653 kernfs_get(parent);
654 kn->parent = parent;
655 }
656 return kn;
657}
658
659/**
660 * kernfs_add_one - add kernfs_node to parent without warning
661 * @kn: kernfs_node to be added
662 *
663 * The caller must already have initialized @kn->parent. This
664 * function increments nlink of the parent's inode if @kn is a
665 * directory and link into the children list of the parent.
666 *
667 * RETURNS:
668 * 0 on success, -EEXIST if entry with the given name already
669 * exists.
670 */
671int kernfs_add_one(struct kernfs_node *kn)
672{
673 struct kernfs_node *parent = kn->parent;
674 struct kernfs_iattrs *ps_iattr;
675 bool has_ns;
676 int ret;
677
678 mutex_lock(&kernfs_mutex);
679
680 ret = -EINVAL;
681 has_ns = kernfs_ns_enabled(parent);
682 if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
683 has_ns ? "required" : "invalid", parent->name, kn->name))
684 goto out_unlock;
685
686 if (kernfs_type(parent) != KERNFS_DIR)
687 goto out_unlock;
688
689 ret = -ENOENT;
690 if (parent->flags & KERNFS_EMPTY_DIR)
691 goto out_unlock;
692
693 if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
694 goto out_unlock;
695
696 kn->hash = kernfs_name_hash(kn->name, kn->ns);
697
698 ret = kernfs_link_sibling(kn);
699 if (ret)
700 goto out_unlock;
701
702 /* Update timestamps on the parent */
703 ps_iattr = parent->iattr;
704 if (ps_iattr) {
705 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
706 ktime_get_real_ts(&ps_iattrs->ia_ctime);
707 ps_iattrs->ia_mtime = ps_iattrs->ia_ctime;
708 }
709
710 mutex_unlock(&kernfs_mutex);
711
712 /*
713 * Activate the new node unless CREATE_DEACTIVATED is requested.
714 * If not activated here, the kernfs user is responsible for
715 * activating the node with kernfs_activate(). A node which hasn't
716 * been activated is not visible to userland and its removal won't
717 * trigger deactivation.
718 */
719 if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
720 kernfs_activate(kn);
721 return 0;
722
723out_unlock:
724 mutex_unlock(&kernfs_mutex);
725 return ret;
726}
727
728/**
729 * kernfs_find_ns - find kernfs_node with the given name
730 * @parent: kernfs_node to search under
731 * @name: name to look for
732 * @ns: the namespace tag to use
733 *
734 * Look for kernfs_node with name @name under @parent. Returns pointer to
735 * the found kernfs_node on success, %NULL on failure.
736 */
737static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
738 const unsigned char *name,
739 const void *ns)
740{
741 struct rb_node *node = parent->dir.children.rb_node;
742 bool has_ns = kernfs_ns_enabled(parent);
743 unsigned int hash;
744
745 lockdep_assert_held(&kernfs_mutex);
746
747 if (has_ns != (bool)ns) {
748 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
749 has_ns ? "required" : "invalid", parent->name, name);
750 return NULL;
751 }
752
753 hash = kernfs_name_hash(name, ns);
754 while (node) {
755 struct kernfs_node *kn;
756 int result;
757
758 kn = rb_to_kn(node);
759 result = kernfs_name_compare(hash, name, ns, kn);
760 if (result < 0)
761 node = node->rb_left;
762 else if (result > 0)
763 node = node->rb_right;
764 else
765 return kn;
766 }
767 return NULL;
768}
769
770static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
771 const unsigned char *path,
772 const void *ns)
773{
774 size_t len;
775 char *p, *name;
776
777 lockdep_assert_held(&kernfs_mutex);
778
779 /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
780 spin_lock_irq(&kernfs_rename_lock);
781
782 len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
783
784 if (len >= sizeof(kernfs_pr_cont_buf)) {
785 spin_unlock_irq(&kernfs_rename_lock);
786 return NULL;
787 }
788
789 p = kernfs_pr_cont_buf;
790
791 while ((name = strsep(&p, "/")) && parent) {
792 if (*name == '\0')
793 continue;
794 parent = kernfs_find_ns(parent, name, ns);
795 }
796
797 spin_unlock_irq(&kernfs_rename_lock);
798
799 return parent;
800}
801
802/**
803 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
804 * @parent: kernfs_node to search under
805 * @name: name to look for
806 * @ns: the namespace tag to use
807 *
808 * Look for kernfs_node with name @name under @parent and get a reference
809 * if found. This function may sleep and returns pointer to the found
810 * kernfs_node on success, %NULL on failure.
811 */
812struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
813 const char *name, const void *ns)
814{
815 struct kernfs_node *kn;
816
817 mutex_lock(&kernfs_mutex);
818 kn = kernfs_find_ns(parent, name, ns);
819 kernfs_get(kn);
820 mutex_unlock(&kernfs_mutex);
821
822 return kn;
823}
824EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
825
826/**
827 * kernfs_walk_and_get_ns - find and get kernfs_node with the given path
828 * @parent: kernfs_node to search under
829 * @path: path to look for
830 * @ns: the namespace tag to use
831 *
832 * Look for kernfs_node with path @path under @parent and get a reference
833 * if found. This function may sleep and returns pointer to the found
834 * kernfs_node on success, %NULL on failure.
835 */
836struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
837 const char *path, const void *ns)
838{
839 struct kernfs_node *kn;
840
841 mutex_lock(&kernfs_mutex);
842 kn = kernfs_walk_ns(parent, path, ns);
843 kernfs_get(kn);
844 mutex_unlock(&kernfs_mutex);
845
846 return kn;
847}
848
849/**
850 * kernfs_create_root - create a new kernfs hierarchy
851 * @scops: optional syscall operations for the hierarchy
852 * @flags: KERNFS_ROOT_* flags
853 * @priv: opaque data associated with the new directory
854 *
855 * Returns the root of the new hierarchy on success, ERR_PTR() value on
856 * failure.
857 */
858struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
859 unsigned int flags, void *priv)
860{
861 struct kernfs_root *root;
862 struct kernfs_node *kn;
863
864 root = kzalloc(sizeof(*root), GFP_KERNEL);
865 if (!root)
866 return ERR_PTR(-ENOMEM);
867
868 ida_init(&root->ino_ida);
869 INIT_LIST_HEAD(&root->supers);
870
871 kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
872 KERNFS_DIR);
873 if (!kn) {
874 ida_destroy(&root->ino_ida);
875 kfree(root);
876 return ERR_PTR(-ENOMEM);
877 }
878
879 kn->priv = priv;
880 kn->dir.root = root;
881
882 root->syscall_ops = scops;
883 root->flags = flags;
884 root->kn = kn;
885 init_waitqueue_head(&root->deactivate_waitq);
886
887 if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
888 kernfs_activate(kn);
889
890 return root;
891}
892
893/**
894 * kernfs_destroy_root - destroy a kernfs hierarchy
895 * @root: root of the hierarchy to destroy
896 *
897 * Destroy the hierarchy anchored at @root by removing all existing
898 * directories and destroying @root.
899 */
900void kernfs_destroy_root(struct kernfs_root *root)
901{
902 kernfs_remove(root->kn); /* will also free @root */
903}
904
905/**
906 * kernfs_create_dir_ns - create a directory
907 * @parent: parent in which to create a new directory
908 * @name: name of the new directory
909 * @mode: mode of the new directory
910 * @priv: opaque data associated with the new directory
911 * @ns: optional namespace tag of the directory
912 *
913 * Returns the created node on success, ERR_PTR() value on failure.
914 */
915struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
916 const char *name, umode_t mode,
917 void *priv, const void *ns)
918{
919 struct kernfs_node *kn;
920 int rc;
921
922 /* allocate */
923 kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR);
924 if (!kn)
925 return ERR_PTR(-ENOMEM);
926
927 kn->dir.root = parent->dir.root;
928 kn->ns = ns;
929 kn->priv = priv;
930
931 /* link in */
932 rc = kernfs_add_one(kn);
933 if (!rc)
934 return kn;
935
936 kernfs_put(kn);
937 return ERR_PTR(rc);
938}
939
940/**
941 * kernfs_create_empty_dir - create an always empty directory
942 * @parent: parent in which to create a new directory
943 * @name: name of the new directory
944 *
945 * Returns the created node on success, ERR_PTR() value on failure.
946 */
947struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
948 const char *name)
949{
950 struct kernfs_node *kn;
951 int rc;
952
953 /* allocate */
954 kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR);
955 if (!kn)
956 return ERR_PTR(-ENOMEM);
957
958 kn->flags |= KERNFS_EMPTY_DIR;
959 kn->dir.root = parent->dir.root;
960 kn->ns = NULL;
961 kn->priv = NULL;
962
963 /* link in */
964 rc = kernfs_add_one(kn);
965 if (!rc)
966 return kn;
967
968 kernfs_put(kn);
969 return ERR_PTR(rc);
970}
971
972static struct dentry *kernfs_iop_lookup(struct inode *dir,
973 struct dentry *dentry,
974 unsigned int flags)
975{
976 struct dentry *ret;
977 struct kernfs_node *parent = dentry->d_parent->d_fsdata;
978 struct kernfs_node *kn;
979 struct inode *inode;
980 const void *ns = NULL;
981
982 mutex_lock(&kernfs_mutex);
983
984 if (kernfs_ns_enabled(parent))
985 ns = kernfs_info(dir->i_sb)->ns;
986
987 kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
988
989 /* no such entry */
990 if (!kn || !kernfs_active(kn)) {
991 ret = NULL;
992 goto out_unlock;
993 }
994 kernfs_get(kn);
995 dentry->d_fsdata = kn;
996
997 /* attach dentry and inode */
998 inode = kernfs_get_inode(dir->i_sb, kn);
999 if (!inode) {
1000 ret = ERR_PTR(-ENOMEM);
1001 goto out_unlock;
1002 }
1003
1004 /* instantiate and hash dentry */
1005 ret = d_splice_alias(inode, dentry);
1006 out_unlock:
1007 mutex_unlock(&kernfs_mutex);
1008 return ret;
1009}
1010
1011static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
1012 umode_t mode)
1013{
1014 struct kernfs_node *parent = dir->i_private;
1015 struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops;
1016 int ret;
1017
1018 if (!scops || !scops->mkdir)
1019 return -EPERM;
1020
1021 if (!kernfs_get_active(parent))
1022 return -ENODEV;
1023
1024 ret = scops->mkdir(parent, dentry->d_name.name, mode);
1025
1026 kernfs_put_active(parent);
1027 return ret;
1028}
1029
1030static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
1031{
1032 struct kernfs_node *kn = dentry->d_fsdata;
1033 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
1034 int ret;
1035
1036 if (!scops || !scops->rmdir)
1037 return -EPERM;
1038
1039 if (!kernfs_get_active(kn))
1040 return -ENODEV;
1041
1042 ret = scops->rmdir(kn);
1043
1044 kernfs_put_active(kn);
1045 return ret;
1046}
1047
1048static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
1049 struct inode *new_dir, struct dentry *new_dentry,
1050 unsigned int flags)
1051{
1052 struct kernfs_node *kn = old_dentry->d_fsdata;
1053 struct kernfs_node *new_parent = new_dir->i_private;
1054 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
1055 int ret;
1056
1057 if (flags)
1058 return -EINVAL;
1059
1060 if (!scops || !scops->rename)
1061 return -EPERM;
1062
1063 if (!kernfs_get_active(kn))
1064 return -ENODEV;
1065
1066 if (!kernfs_get_active(new_parent)) {
1067 kernfs_put_active(kn);
1068 return -ENODEV;
1069 }
1070
1071 ret = scops->rename(kn, new_parent, new_dentry->d_name.name);
1072
1073 kernfs_put_active(new_parent);
1074 kernfs_put_active(kn);
1075 return ret;
1076}
1077
1078const struct inode_operations kernfs_dir_iops = {
1079 .lookup = kernfs_iop_lookup,
1080 .permission = kernfs_iop_permission,
1081 .setattr = kernfs_iop_setattr,
1082 .getattr = kernfs_iop_getattr,
1083 .listxattr = kernfs_iop_listxattr,
1084
1085 .mkdir = kernfs_iop_mkdir,
1086 .rmdir = kernfs_iop_rmdir,
1087 .rename = kernfs_iop_rename,
1088};
1089
1090static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
1091{
1092 struct kernfs_node *last;
1093
1094 while (true) {
1095 struct rb_node *rbn;
1096
1097 last = pos;
1098
1099 if (kernfs_type(pos) != KERNFS_DIR)
1100 break;
1101
1102 rbn = rb_first(&pos->dir.children);
1103 if (!rbn)
1104 break;
1105
1106 pos = rb_to_kn(rbn);
1107 }
1108
1109 return last;
1110}
1111
1112/**
1113 * kernfs_next_descendant_post - find the next descendant for post-order walk
1114 * @pos: the current position (%NULL to initiate traversal)
1115 * @root: kernfs_node whose descendants to walk
1116 *
1117 * Find the next descendant to visit for post-order traversal of @root's
1118 * descendants. @root is included in the iteration and the last node to be
1119 * visited.
1120 */
1121static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
1122 struct kernfs_node *root)
1123{
1124 struct rb_node *rbn;
1125
1126 lockdep_assert_held(&kernfs_mutex);
1127
1128 /* if first iteration, visit leftmost descendant which may be root */
1129 if (!pos)
1130 return kernfs_leftmost_descendant(root);
1131
1132 /* if we visited @root, we're done */
1133 if (pos == root)
1134 return NULL;
1135
1136 /* if there's an unvisited sibling, visit its leftmost descendant */
1137 rbn = rb_next(&pos->rb);
1138 if (rbn)
1139 return kernfs_leftmost_descendant(rb_to_kn(rbn));
1140
1141 /* no sibling left, visit parent */
1142 return pos->parent;
1143}
1144
1145/**
1146 * kernfs_activate - activate a node which started deactivated
1147 * @kn: kernfs_node whose subtree is to be activated
1148 *
1149 * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
1150 * needs to be explicitly activated. A node which hasn't been activated
1151 * isn't visible to userland and deactivation is skipped during its
1152 * removal. This is useful to construct atomic init sequences where
1153 * creation of multiple nodes should either succeed or fail atomically.
1154 *
1155 * The caller is responsible for ensuring that this function is not called
1156 * after kernfs_remove*() is invoked on @kn.
1157 */
1158void kernfs_activate(struct kernfs_node *kn)
1159{
1160 struct kernfs_node *pos;
1161
1162 mutex_lock(&kernfs_mutex);
1163
1164 pos = NULL;
1165 while ((pos = kernfs_next_descendant_post(pos, kn))) {
1166 if (!pos || (pos->flags & KERNFS_ACTIVATED))
1167 continue;
1168
1169 WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
1170 WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
1171
1172 atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
1173 pos->flags |= KERNFS_ACTIVATED;
1174 }
1175
1176 mutex_unlock(&kernfs_mutex);
1177}
1178
1179static void __kernfs_remove(struct kernfs_node *kn)
1180{
1181 struct kernfs_node *pos;
1182
1183 lockdep_assert_held(&kernfs_mutex);
1184
1185 /*
1186 * Short-circuit if non-root @kn has already finished removal.
1187 * This is for kernfs_remove_self() which plays with active ref
1188 * after removal.
1189 */
1190 if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
1191 return;
1192
1193 pr_debug("kernfs %s: removing\n", kn->name);
1194
1195 /* prevent any new usage under @kn by deactivating all nodes */
1196 pos = NULL;
1197 while ((pos = kernfs_next_descendant_post(pos, kn)))
1198 if (kernfs_active(pos))
1199 atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
1200
1201 /* deactivate and unlink the subtree node-by-node */
1202 do {
1203 pos = kernfs_leftmost_descendant(kn);
1204
1205 /*
1206 * kernfs_drain() drops kernfs_mutex temporarily and @pos's
1207 * base ref could have been put by someone else by the time
1208 * the function returns. Make sure it doesn't go away
1209 * underneath us.
1210 */
1211 kernfs_get(pos);
1212
1213 /*
1214 * Drain iff @kn was activated. This avoids draining and
1215 * its lockdep annotations for nodes which have never been
1216 * activated and allows embedding kernfs_remove() in create
1217 * error paths without worrying about draining.
1218 */
1219 if (kn->flags & KERNFS_ACTIVATED)
1220 kernfs_drain(pos);
1221 else
1222 WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
1223
1224 /*
1225 * kernfs_unlink_sibling() succeeds once per node. Use it
1226 * to decide who's responsible for cleanups.
1227 */
1228 if (!pos->parent || kernfs_unlink_sibling(pos)) {
1229 struct kernfs_iattrs *ps_iattr =
1230 pos->parent ? pos->parent->iattr : NULL;
1231
1232 /* update timestamps on the parent */
1233 if (ps_iattr) {
1234 ktime_get_real_ts(&ps_iattr->ia_iattr.ia_ctime);
1235 ps_iattr->ia_iattr.ia_mtime =
1236 ps_iattr->ia_iattr.ia_ctime;
1237 }
1238
1239 kernfs_put(pos);
1240 }
1241
1242 kernfs_put(pos);
1243 } while (pos != kn);
1244}
1245
1246/**
1247 * kernfs_remove - remove a kernfs_node recursively
1248 * @kn: the kernfs_node to remove
1249 *
1250 * Remove @kn along with all its subdirectories and files.
1251 */
1252void kernfs_remove(struct kernfs_node *kn)
1253{
1254 mutex_lock(&kernfs_mutex);
1255 __kernfs_remove(kn);
1256 mutex_unlock(&kernfs_mutex);
1257}
1258
1259/**
1260 * kernfs_break_active_protection - break out of active protection
1261 * @kn: the self kernfs_node
1262 *
1263 * The caller must be running off of a kernfs operation which is invoked
1264 * with an active reference - e.g. one of kernfs_ops. Each invocation of
1265 * this function must also be matched with an invocation of
1266 * kernfs_unbreak_active_protection().
1267 *
1268 * This function releases the active reference of @kn the caller is
1269 * holding. Once this function is called, @kn may be removed at any point
1270 * and the caller is solely responsible for ensuring that the objects it
1271 * dereferences are accessible.
1272 */
1273void kernfs_break_active_protection(struct kernfs_node *kn)
1274{
1275 /*
1276 * Take out ourself out of the active ref dependency chain. If
1277 * we're called without an active ref, lockdep will complain.
1278 */
1279 kernfs_put_active(kn);
1280}
1281
1282/**
1283 * kernfs_unbreak_active_protection - undo kernfs_break_active_protection()
1284 * @kn: the self kernfs_node
1285 *
1286 * If kernfs_break_active_protection() was called, this function must be
1287 * invoked before finishing the kernfs operation. Note that while this
1288 * function restores the active reference, it doesn't and can't actually
1289 * restore the active protection - @kn may already or be in the process of
1290 * being removed. Once kernfs_break_active_protection() is invoked, that
1291 * protection is irreversibly gone for the kernfs operation instance.
1292 *
1293 * While this function may be called at any point after
1294 * kernfs_break_active_protection() is invoked, its most useful location
1295 * would be right before the enclosing kernfs operation returns.
1296 */
1297void kernfs_unbreak_active_protection(struct kernfs_node *kn)
1298{
1299 /*
1300 * @kn->active could be in any state; however, the increment we do
1301 * here will be undone as soon as the enclosing kernfs operation
1302 * finishes and this temporary bump can't break anything. If @kn
1303 * is alive, nothing changes. If @kn is being deactivated, the
1304 * soon-to-follow put will either finish deactivation or restore
1305 * deactivated state. If @kn is already removed, the temporary
1306 * bump is guaranteed to be gone before @kn is released.
1307 */
1308 atomic_inc(&kn->active);
1309 if (kernfs_lockdep(kn))
1310 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
1311}
1312
1313/**
1314 * kernfs_remove_self - remove a kernfs_node from its own method
1315 * @kn: the self kernfs_node to remove
1316 *
1317 * The caller must be running off of a kernfs operation which is invoked
1318 * with an active reference - e.g. one of kernfs_ops. This can be used to
1319 * implement a file operation which deletes itself.
1320 *
1321 * For example, the "delete" file for a sysfs device directory can be
1322 * implemented by invoking kernfs_remove_self() on the "delete" file
1323 * itself. This function breaks the circular dependency of trying to
1324 * deactivate self while holding an active ref itself. It isn't necessary
1325 * to modify the usual removal path to use kernfs_remove_self(). The
1326 * "delete" implementation can simply invoke kernfs_remove_self() on self
1327 * before proceeding with the usual removal path. kernfs will ignore later
1328 * kernfs_remove() on self.
1329 *
1330 * kernfs_remove_self() can be called multiple times concurrently on the
1331 * same kernfs_node. Only the first one actually performs removal and
1332 * returns %true. All others will wait until the kernfs operation which
1333 * won self-removal finishes and return %false. Note that the losers wait
1334 * for the completion of not only the winning kernfs_remove_self() but also
1335 * the whole kernfs_ops which won the arbitration. This can be used to
1336 * guarantee, for example, all concurrent writes to a "delete" file to
1337 * finish only after the whole operation is complete.
1338 */
1339bool kernfs_remove_self(struct kernfs_node *kn)
1340{
1341 bool ret;
1342
1343 mutex_lock(&kernfs_mutex);
1344 kernfs_break_active_protection(kn);
1345
1346 /*
1347 * SUICIDAL is used to arbitrate among competing invocations. Only
1348 * the first one will actually perform removal. When the removal
1349 * is complete, SUICIDED is set and the active ref is restored
1350 * while holding kernfs_mutex. The ones which lost arbitration
1351 * waits for SUICDED && drained which can happen only after the
1352 * enclosing kernfs operation which executed the winning instance
1353 * of kernfs_remove_self() finished.
1354 */
1355 if (!(kn->flags & KERNFS_SUICIDAL)) {
1356 kn->flags |= KERNFS_SUICIDAL;
1357 __kernfs_remove(kn);
1358 kn->flags |= KERNFS_SUICIDED;
1359 ret = true;
1360 } else {
1361 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
1362 DEFINE_WAIT(wait);
1363
1364 while (true) {
1365 prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
1366
1367 if ((kn->flags & KERNFS_SUICIDED) &&
1368 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
1369 break;
1370
1371 mutex_unlock(&kernfs_mutex);
1372 schedule();
1373 mutex_lock(&kernfs_mutex);
1374 }
1375 finish_wait(waitq, &wait);
1376 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
1377 ret = false;
1378 }
1379
1380 /*
1381 * This must be done while holding kernfs_mutex; otherwise, waiting
1382 * for SUICIDED && deactivated could finish prematurely.
1383 */
1384 kernfs_unbreak_active_protection(kn);
1385
1386 mutex_unlock(&kernfs_mutex);
1387 return ret;
1388}
1389
1390/**
1391 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
1392 * @parent: parent of the target
1393 * @name: name of the kernfs_node to remove
1394 * @ns: namespace tag of the kernfs_node to remove
1395 *
1396 * Look for the kernfs_node with @name and @ns under @parent and remove it.
1397 * Returns 0 on success, -ENOENT if such entry doesn't exist.
1398 */
1399int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
1400 const void *ns)
1401{
1402 struct kernfs_node *kn;
1403
1404 if (!parent) {
1405 WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
1406 name);
1407 return -ENOENT;
1408 }
1409
1410 mutex_lock(&kernfs_mutex);
1411
1412 kn = kernfs_find_ns(parent, name, ns);
1413 if (kn)
1414 __kernfs_remove(kn);
1415
1416 mutex_unlock(&kernfs_mutex);
1417
1418 if (kn)
1419 return 0;
1420 else
1421 return -ENOENT;
1422}
1423
1424/**
1425 * kernfs_rename_ns - move and rename a kernfs_node
1426 * @kn: target node
1427 * @new_parent: new parent to put @sd under
1428 * @new_name: new name
1429 * @new_ns: new namespace tag
1430 */
1431int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
1432 const char *new_name, const void *new_ns)
1433{
1434 struct kernfs_node *old_parent;
1435 const char *old_name = NULL;
1436 int error;
1437
1438 /* can't move or rename root */
1439 if (!kn->parent)
1440 return -EINVAL;
1441
1442 mutex_lock(&kernfs_mutex);
1443
1444 error = -ENOENT;
1445 if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
1446 (new_parent->flags & KERNFS_EMPTY_DIR))
1447 goto out;
1448
1449 error = 0;
1450 if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
1451 (strcmp(kn->name, new_name) == 0))
1452 goto out; /* nothing to rename */
1453
1454 error = -EEXIST;
1455 if (kernfs_find_ns(new_parent, new_name, new_ns))
1456 goto out;
1457
1458 /* rename kernfs_node */
1459 if (strcmp(kn->name, new_name) != 0) {
1460 error = -ENOMEM;
1461 new_name = kstrdup_const(new_name, GFP_KERNEL);
1462 if (!new_name)
1463 goto out;
1464 } else {
1465 new_name = NULL;
1466 }
1467
1468 /*
1469 * Move to the appropriate place in the appropriate directories rbtree.
1470 */
1471 kernfs_unlink_sibling(kn);
1472 kernfs_get(new_parent);
1473
1474 /* rename_lock protects ->parent and ->name accessors */
1475 spin_lock_irq(&kernfs_rename_lock);
1476
1477 old_parent = kn->parent;
1478 kn->parent = new_parent;
1479
1480 kn->ns = new_ns;
1481 if (new_name) {
1482 old_name = kn->name;
1483 kn->name = new_name;
1484 }
1485
1486 spin_unlock_irq(&kernfs_rename_lock);
1487
1488 kn->hash = kernfs_name_hash(kn->name, kn->ns);
1489 kernfs_link_sibling(kn);
1490
1491 kernfs_put(old_parent);
1492 kfree_const(old_name);
1493
1494 error = 0;
1495 out:
1496 mutex_unlock(&kernfs_mutex);
1497 return error;
1498}
1499
1500/* Relationship between s_mode and the DT_xxx types */
1501static inline unsigned char dt_type(struct kernfs_node *kn)
1502{
1503 return (kn->mode >> 12) & 15;
1504}
1505
1506static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
1507{
1508 kernfs_put(filp->private_data);
1509 return 0;
1510}
1511
1512static struct kernfs_node *kernfs_dir_pos(const void *ns,
1513 struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
1514{
1515 if (pos) {
1516 int valid = kernfs_active(pos) &&
1517 pos->parent == parent && hash == pos->hash;
1518 kernfs_put(pos);
1519 if (!valid)
1520 pos = NULL;
1521 }
1522 if (!pos && (hash > 1) && (hash < INT_MAX)) {
1523 struct rb_node *node = parent->dir.children.rb_node;
1524 while (node) {
1525 pos = rb_to_kn(node);
1526
1527 if (hash < pos->hash)
1528 node = node->rb_left;
1529 else if (hash > pos->hash)
1530 node = node->rb_right;
1531 else
1532 break;
1533 }
1534 }
1535 /* Skip over entries which are dying/dead or in the wrong namespace */
1536 while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
1537 struct rb_node *node = rb_next(&pos->rb);
1538 if (!node)
1539 pos = NULL;
1540 else
1541 pos = rb_to_kn(node);
1542 }
1543 return pos;
1544}
1545
1546static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1547 struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1548{
1549 pos = kernfs_dir_pos(ns, parent, ino, pos);
1550 if (pos) {
1551 do {
1552 struct rb_node *node = rb_next(&pos->rb);
1553 if (!node)
1554 pos = NULL;
1555 else
1556 pos = rb_to_kn(node);
1557 } while (pos && (!kernfs_active(pos) || pos->ns != ns));
1558 }
1559 return pos;
1560}
1561
1562static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1563{
1564 struct dentry *dentry = file->f_path.dentry;
1565 struct kernfs_node *parent = dentry->d_fsdata;
1566 struct kernfs_node *pos = file->private_data;
1567 const void *ns = NULL;
1568
1569 if (!dir_emit_dots(file, ctx))
1570 return 0;
1571 mutex_lock(&kernfs_mutex);
1572
1573 if (kernfs_ns_enabled(parent))
1574 ns = kernfs_info(dentry->d_sb)->ns;
1575
1576 for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1577 pos;
1578 pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1579 const char *name = pos->name;
1580 unsigned int type = dt_type(pos);
1581 int len = strlen(name);
1582 ino_t ino = pos->ino;
1583
1584 ctx->pos = pos->hash;
1585 file->private_data = pos;
1586 kernfs_get(pos);
1587
1588 mutex_unlock(&kernfs_mutex);
1589 if (!dir_emit(ctx, name, len, ino, type))
1590 return 0;
1591 mutex_lock(&kernfs_mutex);
1592 }
1593 mutex_unlock(&kernfs_mutex);
1594 file->private_data = NULL;
1595 ctx->pos = INT_MAX;
1596 return 0;
1597}
1598
1599const struct file_operations kernfs_dir_fops = {
1600 .read = generic_read_dir,
1601 .iterate_shared = kernfs_fop_readdir,
1602 .release = kernfs_dir_fop_release,
1603 .llseek = generic_file_llseek,
1604};
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * fs/kernfs/dir.c - kernfs directory implementation
4 *
5 * Copyright (c) 2001-3 Patrick Mochel
6 * Copyright (c) 2007 SUSE Linux Products GmbH
7 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
8 */
9
10#include <linux/sched.h>
11#include <linux/fs.h>
12#include <linux/namei.h>
13#include <linux/idr.h>
14#include <linux/slab.h>
15#include <linux/security.h>
16#include <linux/hash.h>
17
18#include "kernfs-internal.h"
19
20DEFINE_MUTEX(kernfs_mutex);
21static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
22static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
23static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
24
25#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
26
27static bool kernfs_active(struct kernfs_node *kn)
28{
29 lockdep_assert_held(&kernfs_mutex);
30 return atomic_read(&kn->active) >= 0;
31}
32
33static bool kernfs_lockdep(struct kernfs_node *kn)
34{
35#ifdef CONFIG_DEBUG_LOCK_ALLOC
36 return kn->flags & KERNFS_LOCKDEP;
37#else
38 return false;
39#endif
40}
41
42static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
43{
44 if (!kn)
45 return strlcpy(buf, "(null)", buflen);
46
47 return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
48}
49
50/* kernfs_node_depth - compute depth from @from to @to */
51static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
52{
53 size_t depth = 0;
54
55 while (to->parent && to != from) {
56 depth++;
57 to = to->parent;
58 }
59 return depth;
60}
61
62static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
63 struct kernfs_node *b)
64{
65 size_t da, db;
66 struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b);
67
68 if (ra != rb)
69 return NULL;
70
71 da = kernfs_depth(ra->kn, a);
72 db = kernfs_depth(rb->kn, b);
73
74 while (da > db) {
75 a = a->parent;
76 da--;
77 }
78 while (db > da) {
79 b = b->parent;
80 db--;
81 }
82
83 /* worst case b and a will be the same at root */
84 while (b != a) {
85 b = b->parent;
86 a = a->parent;
87 }
88
89 return a;
90}
91
92/**
93 * kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to,
94 * where kn_from is treated as root of the path.
95 * @kn_from: kernfs node which should be treated as root for the path
96 * @kn_to: kernfs node to which path is needed
97 * @buf: buffer to copy the path into
98 * @buflen: size of @buf
99 *
100 * We need to handle couple of scenarios here:
101 * [1] when @kn_from is an ancestor of @kn_to at some level
102 * kn_from: /n1/n2/n3
103 * kn_to: /n1/n2/n3/n4/n5
104 * result: /n4/n5
105 *
106 * [2] when @kn_from is on a different hierarchy and we need to find common
107 * ancestor between @kn_from and @kn_to.
108 * kn_from: /n1/n2/n3/n4
109 * kn_to: /n1/n2/n5
110 * result: /../../n5
111 * OR
112 * kn_from: /n1/n2/n3/n4/n5 [depth=5]
113 * kn_to: /n1/n2/n3 [depth=3]
114 * result: /../..
115 *
116 * [3] when @kn_to is NULL result will be "(null)"
117 *
118 * Returns the length of the full path. If the full length is equal to or
119 * greater than @buflen, @buf contains the truncated path with the trailing
120 * '\0'. On error, -errno is returned.
121 */
122static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
123 struct kernfs_node *kn_from,
124 char *buf, size_t buflen)
125{
126 struct kernfs_node *kn, *common;
127 const char parent_str[] = "/..";
128 size_t depth_from, depth_to, len = 0;
129 int i, j;
130
131 if (!kn_to)
132 return strlcpy(buf, "(null)", buflen);
133
134 if (!kn_from)
135 kn_from = kernfs_root(kn_to)->kn;
136
137 if (kn_from == kn_to)
138 return strlcpy(buf, "/", buflen);
139
140 if (!buf)
141 return -EINVAL;
142
143 common = kernfs_common_ancestor(kn_from, kn_to);
144 if (WARN_ON(!common))
145 return -EINVAL;
146
147 depth_to = kernfs_depth(common, kn_to);
148 depth_from = kernfs_depth(common, kn_from);
149
150 buf[0] = '\0';
151
152 for (i = 0; i < depth_from; i++)
153 len += strlcpy(buf + len, parent_str,
154 len < buflen ? buflen - len : 0);
155
156 /* Calculate how many bytes we need for the rest */
157 for (i = depth_to - 1; i >= 0; i--) {
158 for (kn = kn_to, j = 0; j < i; j++)
159 kn = kn->parent;
160 len += strlcpy(buf + len, "/",
161 len < buflen ? buflen - len : 0);
162 len += strlcpy(buf + len, kn->name,
163 len < buflen ? buflen - len : 0);
164 }
165
166 return len;
167}
168
169/**
170 * kernfs_name - obtain the name of a given node
171 * @kn: kernfs_node of interest
172 * @buf: buffer to copy @kn's name into
173 * @buflen: size of @buf
174 *
175 * Copies the name of @kn into @buf of @buflen bytes. The behavior is
176 * similar to strlcpy(). It returns the length of @kn's name and if @buf
177 * isn't long enough, it's filled upto @buflen-1 and nul terminated.
178 *
179 * Fills buffer with "(null)" if @kn is NULL.
180 *
181 * This function can be called from any context.
182 */
183int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
184{
185 unsigned long flags;
186 int ret;
187
188 spin_lock_irqsave(&kernfs_rename_lock, flags);
189 ret = kernfs_name_locked(kn, buf, buflen);
190 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
191 return ret;
192}
193
194/**
195 * kernfs_path_from_node - build path of node @to relative to @from.
196 * @from: parent kernfs_node relative to which we need to build the path
197 * @to: kernfs_node of interest
198 * @buf: buffer to copy @to's path into
199 * @buflen: size of @buf
200 *
201 * Builds @to's path relative to @from in @buf. @from and @to must
202 * be on the same kernfs-root. If @from is not parent of @to, then a relative
203 * path (which includes '..'s) as needed to reach from @from to @to is
204 * returned.
205 *
206 * Returns the length of the full path. If the full length is equal to or
207 * greater than @buflen, @buf contains the truncated path with the trailing
208 * '\0'. On error, -errno is returned.
209 */
210int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
211 char *buf, size_t buflen)
212{
213 unsigned long flags;
214 int ret;
215
216 spin_lock_irqsave(&kernfs_rename_lock, flags);
217 ret = kernfs_path_from_node_locked(to, from, buf, buflen);
218 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
219 return ret;
220}
221EXPORT_SYMBOL_GPL(kernfs_path_from_node);
222
223/**
224 * pr_cont_kernfs_name - pr_cont name of a kernfs_node
225 * @kn: kernfs_node of interest
226 *
227 * This function can be called from any context.
228 */
229void pr_cont_kernfs_name(struct kernfs_node *kn)
230{
231 unsigned long flags;
232
233 spin_lock_irqsave(&kernfs_rename_lock, flags);
234
235 kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
236 pr_cont("%s", kernfs_pr_cont_buf);
237
238 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
239}
240
241/**
242 * pr_cont_kernfs_path - pr_cont path of a kernfs_node
243 * @kn: kernfs_node of interest
244 *
245 * This function can be called from any context.
246 */
247void pr_cont_kernfs_path(struct kernfs_node *kn)
248{
249 unsigned long flags;
250 int sz;
251
252 spin_lock_irqsave(&kernfs_rename_lock, flags);
253
254 sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
255 sizeof(kernfs_pr_cont_buf));
256 if (sz < 0) {
257 pr_cont("(error)");
258 goto out;
259 }
260
261 if (sz >= sizeof(kernfs_pr_cont_buf)) {
262 pr_cont("(name too long)");
263 goto out;
264 }
265
266 pr_cont("%s", kernfs_pr_cont_buf);
267
268out:
269 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
270}
271
272/**
273 * kernfs_get_parent - determine the parent node and pin it
274 * @kn: kernfs_node of interest
275 *
276 * Determines @kn's parent, pins and returns it. This function can be
277 * called from any context.
278 */
279struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
280{
281 struct kernfs_node *parent;
282 unsigned long flags;
283
284 spin_lock_irqsave(&kernfs_rename_lock, flags);
285 parent = kn->parent;
286 kernfs_get(parent);
287 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
288
289 return parent;
290}
291
292/**
293 * kernfs_name_hash
294 * @name: Null terminated string to hash
295 * @ns: Namespace tag to hash
296 *
297 * Returns 31 bit hash of ns + name (so it fits in an off_t )
298 */
299static unsigned int kernfs_name_hash(const char *name, const void *ns)
300{
301 unsigned long hash = init_name_hash(ns);
302 unsigned int len = strlen(name);
303 while (len--)
304 hash = partial_name_hash(*name++, hash);
305 hash = end_name_hash(hash);
306 hash &= 0x7fffffffU;
307 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
308 if (hash < 2)
309 hash += 2;
310 if (hash >= INT_MAX)
311 hash = INT_MAX - 1;
312 return hash;
313}
314
315static int kernfs_name_compare(unsigned int hash, const char *name,
316 const void *ns, const struct kernfs_node *kn)
317{
318 if (hash < kn->hash)
319 return -1;
320 if (hash > kn->hash)
321 return 1;
322 if (ns < kn->ns)
323 return -1;
324 if (ns > kn->ns)
325 return 1;
326 return strcmp(name, kn->name);
327}
328
329static int kernfs_sd_compare(const struct kernfs_node *left,
330 const struct kernfs_node *right)
331{
332 return kernfs_name_compare(left->hash, left->name, left->ns, right);
333}
334
335/**
336 * kernfs_link_sibling - link kernfs_node into sibling rbtree
337 * @kn: kernfs_node of interest
338 *
339 * Link @kn into its sibling rbtree which starts from
340 * @kn->parent->dir.children.
341 *
342 * Locking:
343 * mutex_lock(kernfs_mutex)
344 *
345 * RETURNS:
346 * 0 on susccess -EEXIST on failure.
347 */
348static int kernfs_link_sibling(struct kernfs_node *kn)
349{
350 struct rb_node **node = &kn->parent->dir.children.rb_node;
351 struct rb_node *parent = NULL;
352
353 while (*node) {
354 struct kernfs_node *pos;
355 int result;
356
357 pos = rb_to_kn(*node);
358 parent = *node;
359 result = kernfs_sd_compare(kn, pos);
360 if (result < 0)
361 node = &pos->rb.rb_left;
362 else if (result > 0)
363 node = &pos->rb.rb_right;
364 else
365 return -EEXIST;
366 }
367
368 /* add new node and rebalance the tree */
369 rb_link_node(&kn->rb, parent, node);
370 rb_insert_color(&kn->rb, &kn->parent->dir.children);
371
372 /* successfully added, account subdir number */
373 if (kernfs_type(kn) == KERNFS_DIR)
374 kn->parent->dir.subdirs++;
375
376 return 0;
377}
378
379/**
380 * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
381 * @kn: kernfs_node of interest
382 *
383 * Try to unlink @kn from its sibling rbtree which starts from
384 * kn->parent->dir.children. Returns %true if @kn was actually
385 * removed, %false if @kn wasn't on the rbtree.
386 *
387 * Locking:
388 * mutex_lock(kernfs_mutex)
389 */
390static bool kernfs_unlink_sibling(struct kernfs_node *kn)
391{
392 if (RB_EMPTY_NODE(&kn->rb))
393 return false;
394
395 if (kernfs_type(kn) == KERNFS_DIR)
396 kn->parent->dir.subdirs--;
397
398 rb_erase(&kn->rb, &kn->parent->dir.children);
399 RB_CLEAR_NODE(&kn->rb);
400 return true;
401}
402
403/**
404 * kernfs_get_active - get an active reference to kernfs_node
405 * @kn: kernfs_node to get an active reference to
406 *
407 * Get an active reference of @kn. This function is noop if @kn
408 * is NULL.
409 *
410 * RETURNS:
411 * Pointer to @kn on success, NULL on failure.
412 */
413struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
414{
415 if (unlikely(!kn))
416 return NULL;
417
418 if (!atomic_inc_unless_negative(&kn->active))
419 return NULL;
420
421 if (kernfs_lockdep(kn))
422 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
423 return kn;
424}
425
426/**
427 * kernfs_put_active - put an active reference to kernfs_node
428 * @kn: kernfs_node to put an active reference to
429 *
430 * Put an active reference to @kn. This function is noop if @kn
431 * is NULL.
432 */
433void kernfs_put_active(struct kernfs_node *kn)
434{
435 int v;
436
437 if (unlikely(!kn))
438 return;
439
440 if (kernfs_lockdep(kn))
441 rwsem_release(&kn->dep_map, _RET_IP_);
442 v = atomic_dec_return(&kn->active);
443 if (likely(v != KN_DEACTIVATED_BIAS))
444 return;
445
446 wake_up_all(&kernfs_root(kn)->deactivate_waitq);
447}
448
449/**
450 * kernfs_drain - drain kernfs_node
451 * @kn: kernfs_node to drain
452 *
453 * Drain existing usages and nuke all existing mmaps of @kn. Mutiple
454 * removers may invoke this function concurrently on @kn and all will
455 * return after draining is complete.
456 */
457static void kernfs_drain(struct kernfs_node *kn)
458 __releases(&kernfs_mutex) __acquires(&kernfs_mutex)
459{
460 struct kernfs_root *root = kernfs_root(kn);
461
462 lockdep_assert_held(&kernfs_mutex);
463 WARN_ON_ONCE(kernfs_active(kn));
464
465 mutex_unlock(&kernfs_mutex);
466
467 if (kernfs_lockdep(kn)) {
468 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
469 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
470 lock_contended(&kn->dep_map, _RET_IP_);
471 }
472
473 /* but everyone should wait for draining */
474 wait_event(root->deactivate_waitq,
475 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
476
477 if (kernfs_lockdep(kn)) {
478 lock_acquired(&kn->dep_map, _RET_IP_);
479 rwsem_release(&kn->dep_map, _RET_IP_);
480 }
481
482 kernfs_drain_open_files(kn);
483
484 mutex_lock(&kernfs_mutex);
485}
486
487/**
488 * kernfs_get - get a reference count on a kernfs_node
489 * @kn: the target kernfs_node
490 */
491void kernfs_get(struct kernfs_node *kn)
492{
493 if (kn) {
494 WARN_ON(!atomic_read(&kn->count));
495 atomic_inc(&kn->count);
496 }
497}
498EXPORT_SYMBOL_GPL(kernfs_get);
499
500/**
501 * kernfs_put - put a reference count on a kernfs_node
502 * @kn: the target kernfs_node
503 *
504 * Put a reference count of @kn and destroy it if it reached zero.
505 */
506void kernfs_put(struct kernfs_node *kn)
507{
508 struct kernfs_node *parent;
509 struct kernfs_root *root;
510
511 if (!kn || !atomic_dec_and_test(&kn->count))
512 return;
513 root = kernfs_root(kn);
514 repeat:
515 /*
516 * Moving/renaming is always done while holding reference.
517 * kn->parent won't change beneath us.
518 */
519 parent = kn->parent;
520
521 WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
522 "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
523 parent ? parent->name : "", kn->name, atomic_read(&kn->active));
524
525 if (kernfs_type(kn) == KERNFS_LINK)
526 kernfs_put(kn->symlink.target_kn);
527
528 kfree_const(kn->name);
529
530 if (kn->iattr) {
531 simple_xattrs_free(&kn->iattr->xattrs);
532 kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
533 }
534 spin_lock(&kernfs_idr_lock);
535 idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
536 spin_unlock(&kernfs_idr_lock);
537 kmem_cache_free(kernfs_node_cache, kn);
538
539 kn = parent;
540 if (kn) {
541 if (atomic_dec_and_test(&kn->count))
542 goto repeat;
543 } else {
544 /* just released the root kn, free @root too */
545 idr_destroy(&root->ino_idr);
546 kfree(root);
547 }
548}
549EXPORT_SYMBOL_GPL(kernfs_put);
550
551/**
552 * kernfs_node_from_dentry - determine kernfs_node associated with a dentry
553 * @dentry: the dentry in question
554 *
555 * Return the kernfs_node associated with @dentry. If @dentry is not a
556 * kernfs one, %NULL is returned.
557 *
558 * While the returned kernfs_node will stay accessible as long as @dentry
559 * is accessible, the returned node can be in any state and the caller is
560 * fully responsible for determining what's accessible.
561 */
562struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
563{
564 if (dentry->d_sb->s_op == &kernfs_sops)
565 return kernfs_dentry_node(dentry);
566 return NULL;
567}
568
569static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
570 struct kernfs_node *parent,
571 const char *name, umode_t mode,
572 kuid_t uid, kgid_t gid,
573 unsigned flags)
574{
575 struct kernfs_node *kn;
576 u32 id_highbits;
577 int ret;
578
579 name = kstrdup_const(name, GFP_KERNEL);
580 if (!name)
581 return NULL;
582
583 kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
584 if (!kn)
585 goto err_out1;
586
587 idr_preload(GFP_KERNEL);
588 spin_lock(&kernfs_idr_lock);
589 ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC);
590 if (ret >= 0 && ret < root->last_id_lowbits)
591 root->id_highbits++;
592 id_highbits = root->id_highbits;
593 root->last_id_lowbits = ret;
594 spin_unlock(&kernfs_idr_lock);
595 idr_preload_end();
596 if (ret < 0)
597 goto err_out2;
598
599 kn->id = (u64)id_highbits << 32 | ret;
600
601 atomic_set(&kn->count, 1);
602 atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
603 RB_CLEAR_NODE(&kn->rb);
604
605 kn->name = name;
606 kn->mode = mode;
607 kn->flags = flags;
608
609 if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) {
610 struct iattr iattr = {
611 .ia_valid = ATTR_UID | ATTR_GID,
612 .ia_uid = uid,
613 .ia_gid = gid,
614 };
615
616 ret = __kernfs_setattr(kn, &iattr);
617 if (ret < 0)
618 goto err_out3;
619 }
620
621 if (parent) {
622 ret = security_kernfs_init_security(parent, kn);
623 if (ret)
624 goto err_out3;
625 }
626
627 return kn;
628
629 err_out3:
630 idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
631 err_out2:
632 kmem_cache_free(kernfs_node_cache, kn);
633 err_out1:
634 kfree_const(name);
635 return NULL;
636}
637
638struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
639 const char *name, umode_t mode,
640 kuid_t uid, kgid_t gid,
641 unsigned flags)
642{
643 struct kernfs_node *kn;
644
645 kn = __kernfs_new_node(kernfs_root(parent), parent,
646 name, mode, uid, gid, flags);
647 if (kn) {
648 kernfs_get(parent);
649 kn->parent = parent;
650 }
651 return kn;
652}
653
654/*
655 * kernfs_find_and_get_node_by_id - get kernfs_node from node id
656 * @root: the kernfs root
657 * @id: the target node id
658 *
659 * @id's lower 32bits encode ino and upper gen. If the gen portion is
660 * zero, all generations are matched.
661 *
662 * RETURNS:
663 * NULL on failure. Return a kernfs node with reference counter incremented
664 */
665struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
666 u64 id)
667{
668 struct kernfs_node *kn;
669 ino_t ino = kernfs_id_ino(id);
670 u32 gen = kernfs_id_gen(id);
671
672 spin_lock(&kernfs_idr_lock);
673
674 kn = idr_find(&root->ino_idr, (u32)ino);
675 if (!kn)
676 goto err_unlock;
677
678 if (sizeof(ino_t) >= sizeof(u64)) {
679 /* we looked up with the low 32bits, compare the whole */
680 if (kernfs_ino(kn) != ino)
681 goto err_unlock;
682 } else {
683 /* 0 matches all generations */
684 if (unlikely(gen && kernfs_gen(kn) != gen))
685 goto err_unlock;
686 }
687
688 /*
689 * ACTIVATED is protected with kernfs_mutex but it was clear when
690 * @kn was added to idr and we just wanna see it set. No need to
691 * grab kernfs_mutex.
692 */
693 if (unlikely(!(kn->flags & KERNFS_ACTIVATED) ||
694 !atomic_inc_not_zero(&kn->count)))
695 goto err_unlock;
696
697 spin_unlock(&kernfs_idr_lock);
698 return kn;
699err_unlock:
700 spin_unlock(&kernfs_idr_lock);
701 return NULL;
702}
703
704/**
705 * kernfs_add_one - add kernfs_node to parent without warning
706 * @kn: kernfs_node to be added
707 *
708 * The caller must already have initialized @kn->parent. This
709 * function increments nlink of the parent's inode if @kn is a
710 * directory and link into the children list of the parent.
711 *
712 * RETURNS:
713 * 0 on success, -EEXIST if entry with the given name already
714 * exists.
715 */
716int kernfs_add_one(struct kernfs_node *kn)
717{
718 struct kernfs_node *parent = kn->parent;
719 struct kernfs_iattrs *ps_iattr;
720 bool has_ns;
721 int ret;
722
723 mutex_lock(&kernfs_mutex);
724
725 ret = -EINVAL;
726 has_ns = kernfs_ns_enabled(parent);
727 if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
728 has_ns ? "required" : "invalid", parent->name, kn->name))
729 goto out_unlock;
730
731 if (kernfs_type(parent) != KERNFS_DIR)
732 goto out_unlock;
733
734 ret = -ENOENT;
735 if (parent->flags & KERNFS_EMPTY_DIR)
736 goto out_unlock;
737
738 if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
739 goto out_unlock;
740
741 kn->hash = kernfs_name_hash(kn->name, kn->ns);
742
743 ret = kernfs_link_sibling(kn);
744 if (ret)
745 goto out_unlock;
746
747 /* Update timestamps on the parent */
748 ps_iattr = parent->iattr;
749 if (ps_iattr) {
750 ktime_get_real_ts64(&ps_iattr->ia_ctime);
751 ps_iattr->ia_mtime = ps_iattr->ia_ctime;
752 }
753
754 mutex_unlock(&kernfs_mutex);
755
756 /*
757 * Activate the new node unless CREATE_DEACTIVATED is requested.
758 * If not activated here, the kernfs user is responsible for
759 * activating the node with kernfs_activate(). A node which hasn't
760 * been activated is not visible to userland and its removal won't
761 * trigger deactivation.
762 */
763 if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
764 kernfs_activate(kn);
765 return 0;
766
767out_unlock:
768 mutex_unlock(&kernfs_mutex);
769 return ret;
770}
771
772/**
773 * kernfs_find_ns - find kernfs_node with the given name
774 * @parent: kernfs_node to search under
775 * @name: name to look for
776 * @ns: the namespace tag to use
777 *
778 * Look for kernfs_node with name @name under @parent. Returns pointer to
779 * the found kernfs_node on success, %NULL on failure.
780 */
781static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
782 const unsigned char *name,
783 const void *ns)
784{
785 struct rb_node *node = parent->dir.children.rb_node;
786 bool has_ns = kernfs_ns_enabled(parent);
787 unsigned int hash;
788
789 lockdep_assert_held(&kernfs_mutex);
790
791 if (has_ns != (bool)ns) {
792 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
793 has_ns ? "required" : "invalid", parent->name, name);
794 return NULL;
795 }
796
797 hash = kernfs_name_hash(name, ns);
798 while (node) {
799 struct kernfs_node *kn;
800 int result;
801
802 kn = rb_to_kn(node);
803 result = kernfs_name_compare(hash, name, ns, kn);
804 if (result < 0)
805 node = node->rb_left;
806 else if (result > 0)
807 node = node->rb_right;
808 else
809 return kn;
810 }
811 return NULL;
812}
813
814static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
815 const unsigned char *path,
816 const void *ns)
817{
818 size_t len;
819 char *p, *name;
820
821 lockdep_assert_held(&kernfs_mutex);
822
823 /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
824 spin_lock_irq(&kernfs_rename_lock);
825
826 len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
827
828 if (len >= sizeof(kernfs_pr_cont_buf)) {
829 spin_unlock_irq(&kernfs_rename_lock);
830 return NULL;
831 }
832
833 p = kernfs_pr_cont_buf;
834
835 while ((name = strsep(&p, "/")) && parent) {
836 if (*name == '\0')
837 continue;
838 parent = kernfs_find_ns(parent, name, ns);
839 }
840
841 spin_unlock_irq(&kernfs_rename_lock);
842
843 return parent;
844}
845
846/**
847 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
848 * @parent: kernfs_node to search under
849 * @name: name to look for
850 * @ns: the namespace tag to use
851 *
852 * Look for kernfs_node with name @name under @parent and get a reference
853 * if found. This function may sleep and returns pointer to the found
854 * kernfs_node on success, %NULL on failure.
855 */
856struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
857 const char *name, const void *ns)
858{
859 struct kernfs_node *kn;
860
861 mutex_lock(&kernfs_mutex);
862 kn = kernfs_find_ns(parent, name, ns);
863 kernfs_get(kn);
864 mutex_unlock(&kernfs_mutex);
865
866 return kn;
867}
868EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
869
870/**
871 * kernfs_walk_and_get_ns - find and get kernfs_node with the given path
872 * @parent: kernfs_node to search under
873 * @path: path to look for
874 * @ns: the namespace tag to use
875 *
876 * Look for kernfs_node with path @path under @parent and get a reference
877 * if found. This function may sleep and returns pointer to the found
878 * kernfs_node on success, %NULL on failure.
879 */
880struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
881 const char *path, const void *ns)
882{
883 struct kernfs_node *kn;
884
885 mutex_lock(&kernfs_mutex);
886 kn = kernfs_walk_ns(parent, path, ns);
887 kernfs_get(kn);
888 mutex_unlock(&kernfs_mutex);
889
890 return kn;
891}
892
893/**
894 * kernfs_create_root - create a new kernfs hierarchy
895 * @scops: optional syscall operations for the hierarchy
896 * @flags: KERNFS_ROOT_* flags
897 * @priv: opaque data associated with the new directory
898 *
899 * Returns the root of the new hierarchy on success, ERR_PTR() value on
900 * failure.
901 */
902struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
903 unsigned int flags, void *priv)
904{
905 struct kernfs_root *root;
906 struct kernfs_node *kn;
907
908 root = kzalloc(sizeof(*root), GFP_KERNEL);
909 if (!root)
910 return ERR_PTR(-ENOMEM);
911
912 idr_init(&root->ino_idr);
913 INIT_LIST_HEAD(&root->supers);
914
915 /*
916 * On 64bit ino setups, id is ino. On 32bit, low 32bits are ino.
917 * High bits generation. The starting value for both ino and
918 * genenration is 1. Initialize upper 32bit allocation
919 * accordingly.
920 */
921 if (sizeof(ino_t) >= sizeof(u64))
922 root->id_highbits = 0;
923 else
924 root->id_highbits = 1;
925
926 kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO,
927 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
928 KERNFS_DIR);
929 if (!kn) {
930 idr_destroy(&root->ino_idr);
931 kfree(root);
932 return ERR_PTR(-ENOMEM);
933 }
934
935 kn->priv = priv;
936 kn->dir.root = root;
937
938 root->syscall_ops = scops;
939 root->flags = flags;
940 root->kn = kn;
941 init_waitqueue_head(&root->deactivate_waitq);
942
943 if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
944 kernfs_activate(kn);
945
946 return root;
947}
948
949/**
950 * kernfs_destroy_root - destroy a kernfs hierarchy
951 * @root: root of the hierarchy to destroy
952 *
953 * Destroy the hierarchy anchored at @root by removing all existing
954 * directories and destroying @root.
955 */
956void kernfs_destroy_root(struct kernfs_root *root)
957{
958 kernfs_remove(root->kn); /* will also free @root */
959}
960
961/**
962 * kernfs_create_dir_ns - create a directory
963 * @parent: parent in which to create a new directory
964 * @name: name of the new directory
965 * @mode: mode of the new directory
966 * @uid: uid of the new directory
967 * @gid: gid of the new directory
968 * @priv: opaque data associated with the new directory
969 * @ns: optional namespace tag of the directory
970 *
971 * Returns the created node on success, ERR_PTR() value on failure.
972 */
973struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
974 const char *name, umode_t mode,
975 kuid_t uid, kgid_t gid,
976 void *priv, const void *ns)
977{
978 struct kernfs_node *kn;
979 int rc;
980
981 /* allocate */
982 kn = kernfs_new_node(parent, name, mode | S_IFDIR,
983 uid, gid, KERNFS_DIR);
984 if (!kn)
985 return ERR_PTR(-ENOMEM);
986
987 kn->dir.root = parent->dir.root;
988 kn->ns = ns;
989 kn->priv = priv;
990
991 /* link in */
992 rc = kernfs_add_one(kn);
993 if (!rc)
994 return kn;
995
996 kernfs_put(kn);
997 return ERR_PTR(rc);
998}
999
1000/**
1001 * kernfs_create_empty_dir - create an always empty directory
1002 * @parent: parent in which to create a new directory
1003 * @name: name of the new directory
1004 *
1005 * Returns the created node on success, ERR_PTR() value on failure.
1006 */
1007struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
1008 const char *name)
1009{
1010 struct kernfs_node *kn;
1011 int rc;
1012
1013 /* allocate */
1014 kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR,
1015 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR);
1016 if (!kn)
1017 return ERR_PTR(-ENOMEM);
1018
1019 kn->flags |= KERNFS_EMPTY_DIR;
1020 kn->dir.root = parent->dir.root;
1021 kn->ns = NULL;
1022 kn->priv = NULL;
1023
1024 /* link in */
1025 rc = kernfs_add_one(kn);
1026 if (!rc)
1027 return kn;
1028
1029 kernfs_put(kn);
1030 return ERR_PTR(rc);
1031}
1032
1033static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
1034{
1035 struct kernfs_node *kn;
1036
1037 if (flags & LOOKUP_RCU)
1038 return -ECHILD;
1039
1040 /* Always perform fresh lookup for negatives */
1041 if (d_really_is_negative(dentry))
1042 goto out_bad_unlocked;
1043
1044 kn = kernfs_dentry_node(dentry);
1045 mutex_lock(&kernfs_mutex);
1046
1047 /* The kernfs node has been deactivated */
1048 if (!kernfs_active(kn))
1049 goto out_bad;
1050
1051 /* The kernfs node has been moved? */
1052 if (kernfs_dentry_node(dentry->d_parent) != kn->parent)
1053 goto out_bad;
1054
1055 /* The kernfs node has been renamed */
1056 if (strcmp(dentry->d_name.name, kn->name) != 0)
1057 goto out_bad;
1058
1059 /* The kernfs node has been moved to a different namespace */
1060 if (kn->parent && kernfs_ns_enabled(kn->parent) &&
1061 kernfs_info(dentry->d_sb)->ns != kn->ns)
1062 goto out_bad;
1063
1064 mutex_unlock(&kernfs_mutex);
1065 return 1;
1066out_bad:
1067 mutex_unlock(&kernfs_mutex);
1068out_bad_unlocked:
1069 return 0;
1070}
1071
1072const struct dentry_operations kernfs_dops = {
1073 .d_revalidate = kernfs_dop_revalidate,
1074};
1075
1076static struct dentry *kernfs_iop_lookup(struct inode *dir,
1077 struct dentry *dentry,
1078 unsigned int flags)
1079{
1080 struct dentry *ret;
1081 struct kernfs_node *parent = dir->i_private;
1082 struct kernfs_node *kn;
1083 struct inode *inode;
1084 const void *ns = NULL;
1085
1086 mutex_lock(&kernfs_mutex);
1087
1088 if (kernfs_ns_enabled(parent))
1089 ns = kernfs_info(dir->i_sb)->ns;
1090
1091 kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
1092
1093 /* no such entry */
1094 if (!kn || !kernfs_active(kn)) {
1095 ret = NULL;
1096 goto out_unlock;
1097 }
1098
1099 /* attach dentry and inode */
1100 inode = kernfs_get_inode(dir->i_sb, kn);
1101 if (!inode) {
1102 ret = ERR_PTR(-ENOMEM);
1103 goto out_unlock;
1104 }
1105
1106 /* instantiate and hash dentry */
1107 ret = d_splice_alias(inode, dentry);
1108 out_unlock:
1109 mutex_unlock(&kernfs_mutex);
1110 return ret;
1111}
1112
1113static int kernfs_iop_mkdir(struct user_namespace *mnt_userns,
1114 struct inode *dir, struct dentry *dentry,
1115 umode_t mode)
1116{
1117 struct kernfs_node *parent = dir->i_private;
1118 struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops;
1119 int ret;
1120
1121 if (!scops || !scops->mkdir)
1122 return -EPERM;
1123
1124 if (!kernfs_get_active(parent))
1125 return -ENODEV;
1126
1127 ret = scops->mkdir(parent, dentry->d_name.name, mode);
1128
1129 kernfs_put_active(parent);
1130 return ret;
1131}
1132
1133static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
1134{
1135 struct kernfs_node *kn = kernfs_dentry_node(dentry);
1136 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
1137 int ret;
1138
1139 if (!scops || !scops->rmdir)
1140 return -EPERM;
1141
1142 if (!kernfs_get_active(kn))
1143 return -ENODEV;
1144
1145 ret = scops->rmdir(kn);
1146
1147 kernfs_put_active(kn);
1148 return ret;
1149}
1150
1151static int kernfs_iop_rename(struct user_namespace *mnt_userns,
1152 struct inode *old_dir, struct dentry *old_dentry,
1153 struct inode *new_dir, struct dentry *new_dentry,
1154 unsigned int flags)
1155{
1156 struct kernfs_node *kn = kernfs_dentry_node(old_dentry);
1157 struct kernfs_node *new_parent = new_dir->i_private;
1158 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
1159 int ret;
1160
1161 if (flags)
1162 return -EINVAL;
1163
1164 if (!scops || !scops->rename)
1165 return -EPERM;
1166
1167 if (!kernfs_get_active(kn))
1168 return -ENODEV;
1169
1170 if (!kernfs_get_active(new_parent)) {
1171 kernfs_put_active(kn);
1172 return -ENODEV;
1173 }
1174
1175 ret = scops->rename(kn, new_parent, new_dentry->d_name.name);
1176
1177 kernfs_put_active(new_parent);
1178 kernfs_put_active(kn);
1179 return ret;
1180}
1181
1182const struct inode_operations kernfs_dir_iops = {
1183 .lookup = kernfs_iop_lookup,
1184 .permission = kernfs_iop_permission,
1185 .setattr = kernfs_iop_setattr,
1186 .getattr = kernfs_iop_getattr,
1187 .listxattr = kernfs_iop_listxattr,
1188
1189 .mkdir = kernfs_iop_mkdir,
1190 .rmdir = kernfs_iop_rmdir,
1191 .rename = kernfs_iop_rename,
1192};
1193
1194static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
1195{
1196 struct kernfs_node *last;
1197
1198 while (true) {
1199 struct rb_node *rbn;
1200
1201 last = pos;
1202
1203 if (kernfs_type(pos) != KERNFS_DIR)
1204 break;
1205
1206 rbn = rb_first(&pos->dir.children);
1207 if (!rbn)
1208 break;
1209
1210 pos = rb_to_kn(rbn);
1211 }
1212
1213 return last;
1214}
1215
1216/**
1217 * kernfs_next_descendant_post - find the next descendant for post-order walk
1218 * @pos: the current position (%NULL to initiate traversal)
1219 * @root: kernfs_node whose descendants to walk
1220 *
1221 * Find the next descendant to visit for post-order traversal of @root's
1222 * descendants. @root is included in the iteration and the last node to be
1223 * visited.
1224 */
1225static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
1226 struct kernfs_node *root)
1227{
1228 struct rb_node *rbn;
1229
1230 lockdep_assert_held(&kernfs_mutex);
1231
1232 /* if first iteration, visit leftmost descendant which may be root */
1233 if (!pos)
1234 return kernfs_leftmost_descendant(root);
1235
1236 /* if we visited @root, we're done */
1237 if (pos == root)
1238 return NULL;
1239
1240 /* if there's an unvisited sibling, visit its leftmost descendant */
1241 rbn = rb_next(&pos->rb);
1242 if (rbn)
1243 return kernfs_leftmost_descendant(rb_to_kn(rbn));
1244
1245 /* no sibling left, visit parent */
1246 return pos->parent;
1247}
1248
1249/**
1250 * kernfs_activate - activate a node which started deactivated
1251 * @kn: kernfs_node whose subtree is to be activated
1252 *
1253 * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
1254 * needs to be explicitly activated. A node which hasn't been activated
1255 * isn't visible to userland and deactivation is skipped during its
1256 * removal. This is useful to construct atomic init sequences where
1257 * creation of multiple nodes should either succeed or fail atomically.
1258 *
1259 * The caller is responsible for ensuring that this function is not called
1260 * after kernfs_remove*() is invoked on @kn.
1261 */
1262void kernfs_activate(struct kernfs_node *kn)
1263{
1264 struct kernfs_node *pos;
1265
1266 mutex_lock(&kernfs_mutex);
1267
1268 pos = NULL;
1269 while ((pos = kernfs_next_descendant_post(pos, kn))) {
1270 if (pos->flags & KERNFS_ACTIVATED)
1271 continue;
1272
1273 WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
1274 WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
1275
1276 atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
1277 pos->flags |= KERNFS_ACTIVATED;
1278 }
1279
1280 mutex_unlock(&kernfs_mutex);
1281}
1282
1283static void __kernfs_remove(struct kernfs_node *kn)
1284{
1285 struct kernfs_node *pos;
1286
1287 lockdep_assert_held(&kernfs_mutex);
1288
1289 /*
1290 * Short-circuit if non-root @kn has already finished removal.
1291 * This is for kernfs_remove_self() which plays with active ref
1292 * after removal.
1293 */
1294 if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
1295 return;
1296
1297 pr_debug("kernfs %s: removing\n", kn->name);
1298
1299 /* prevent any new usage under @kn by deactivating all nodes */
1300 pos = NULL;
1301 while ((pos = kernfs_next_descendant_post(pos, kn)))
1302 if (kernfs_active(pos))
1303 atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
1304
1305 /* deactivate and unlink the subtree node-by-node */
1306 do {
1307 pos = kernfs_leftmost_descendant(kn);
1308
1309 /*
1310 * kernfs_drain() drops kernfs_mutex temporarily and @pos's
1311 * base ref could have been put by someone else by the time
1312 * the function returns. Make sure it doesn't go away
1313 * underneath us.
1314 */
1315 kernfs_get(pos);
1316
1317 /*
1318 * Drain iff @kn was activated. This avoids draining and
1319 * its lockdep annotations for nodes which have never been
1320 * activated and allows embedding kernfs_remove() in create
1321 * error paths without worrying about draining.
1322 */
1323 if (kn->flags & KERNFS_ACTIVATED)
1324 kernfs_drain(pos);
1325 else
1326 WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
1327
1328 /*
1329 * kernfs_unlink_sibling() succeeds once per node. Use it
1330 * to decide who's responsible for cleanups.
1331 */
1332 if (!pos->parent || kernfs_unlink_sibling(pos)) {
1333 struct kernfs_iattrs *ps_iattr =
1334 pos->parent ? pos->parent->iattr : NULL;
1335
1336 /* update timestamps on the parent */
1337 if (ps_iattr) {
1338 ktime_get_real_ts64(&ps_iattr->ia_ctime);
1339 ps_iattr->ia_mtime = ps_iattr->ia_ctime;
1340 }
1341
1342 kernfs_put(pos);
1343 }
1344
1345 kernfs_put(pos);
1346 } while (pos != kn);
1347}
1348
1349/**
1350 * kernfs_remove - remove a kernfs_node recursively
1351 * @kn: the kernfs_node to remove
1352 *
1353 * Remove @kn along with all its subdirectories and files.
1354 */
1355void kernfs_remove(struct kernfs_node *kn)
1356{
1357 mutex_lock(&kernfs_mutex);
1358 __kernfs_remove(kn);
1359 mutex_unlock(&kernfs_mutex);
1360}
1361
1362/**
1363 * kernfs_break_active_protection - break out of active protection
1364 * @kn: the self kernfs_node
1365 *
1366 * The caller must be running off of a kernfs operation which is invoked
1367 * with an active reference - e.g. one of kernfs_ops. Each invocation of
1368 * this function must also be matched with an invocation of
1369 * kernfs_unbreak_active_protection().
1370 *
1371 * This function releases the active reference of @kn the caller is
1372 * holding. Once this function is called, @kn may be removed at any point
1373 * and the caller is solely responsible for ensuring that the objects it
1374 * dereferences are accessible.
1375 */
1376void kernfs_break_active_protection(struct kernfs_node *kn)
1377{
1378 /*
1379 * Take out ourself out of the active ref dependency chain. If
1380 * we're called without an active ref, lockdep will complain.
1381 */
1382 kernfs_put_active(kn);
1383}
1384
1385/**
1386 * kernfs_unbreak_active_protection - undo kernfs_break_active_protection()
1387 * @kn: the self kernfs_node
1388 *
1389 * If kernfs_break_active_protection() was called, this function must be
1390 * invoked before finishing the kernfs operation. Note that while this
1391 * function restores the active reference, it doesn't and can't actually
1392 * restore the active protection - @kn may already or be in the process of
1393 * being removed. Once kernfs_break_active_protection() is invoked, that
1394 * protection is irreversibly gone for the kernfs operation instance.
1395 *
1396 * While this function may be called at any point after
1397 * kernfs_break_active_protection() is invoked, its most useful location
1398 * would be right before the enclosing kernfs operation returns.
1399 */
1400void kernfs_unbreak_active_protection(struct kernfs_node *kn)
1401{
1402 /*
1403 * @kn->active could be in any state; however, the increment we do
1404 * here will be undone as soon as the enclosing kernfs operation
1405 * finishes and this temporary bump can't break anything. If @kn
1406 * is alive, nothing changes. If @kn is being deactivated, the
1407 * soon-to-follow put will either finish deactivation or restore
1408 * deactivated state. If @kn is already removed, the temporary
1409 * bump is guaranteed to be gone before @kn is released.
1410 */
1411 atomic_inc(&kn->active);
1412 if (kernfs_lockdep(kn))
1413 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
1414}
1415
1416/**
1417 * kernfs_remove_self - remove a kernfs_node from its own method
1418 * @kn: the self kernfs_node to remove
1419 *
1420 * The caller must be running off of a kernfs operation which is invoked
1421 * with an active reference - e.g. one of kernfs_ops. This can be used to
1422 * implement a file operation which deletes itself.
1423 *
1424 * For example, the "delete" file for a sysfs device directory can be
1425 * implemented by invoking kernfs_remove_self() on the "delete" file
1426 * itself. This function breaks the circular dependency of trying to
1427 * deactivate self while holding an active ref itself. It isn't necessary
1428 * to modify the usual removal path to use kernfs_remove_self(). The
1429 * "delete" implementation can simply invoke kernfs_remove_self() on self
1430 * before proceeding with the usual removal path. kernfs will ignore later
1431 * kernfs_remove() on self.
1432 *
1433 * kernfs_remove_self() can be called multiple times concurrently on the
1434 * same kernfs_node. Only the first one actually performs removal and
1435 * returns %true. All others will wait until the kernfs operation which
1436 * won self-removal finishes and return %false. Note that the losers wait
1437 * for the completion of not only the winning kernfs_remove_self() but also
1438 * the whole kernfs_ops which won the arbitration. This can be used to
1439 * guarantee, for example, all concurrent writes to a "delete" file to
1440 * finish only after the whole operation is complete.
1441 */
1442bool kernfs_remove_self(struct kernfs_node *kn)
1443{
1444 bool ret;
1445
1446 mutex_lock(&kernfs_mutex);
1447 kernfs_break_active_protection(kn);
1448
1449 /*
1450 * SUICIDAL is used to arbitrate among competing invocations. Only
1451 * the first one will actually perform removal. When the removal
1452 * is complete, SUICIDED is set and the active ref is restored
1453 * while holding kernfs_mutex. The ones which lost arbitration
1454 * waits for SUICDED && drained which can happen only after the
1455 * enclosing kernfs operation which executed the winning instance
1456 * of kernfs_remove_self() finished.
1457 */
1458 if (!(kn->flags & KERNFS_SUICIDAL)) {
1459 kn->flags |= KERNFS_SUICIDAL;
1460 __kernfs_remove(kn);
1461 kn->flags |= KERNFS_SUICIDED;
1462 ret = true;
1463 } else {
1464 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
1465 DEFINE_WAIT(wait);
1466
1467 while (true) {
1468 prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
1469
1470 if ((kn->flags & KERNFS_SUICIDED) &&
1471 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
1472 break;
1473
1474 mutex_unlock(&kernfs_mutex);
1475 schedule();
1476 mutex_lock(&kernfs_mutex);
1477 }
1478 finish_wait(waitq, &wait);
1479 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
1480 ret = false;
1481 }
1482
1483 /*
1484 * This must be done while holding kernfs_mutex; otherwise, waiting
1485 * for SUICIDED && deactivated could finish prematurely.
1486 */
1487 kernfs_unbreak_active_protection(kn);
1488
1489 mutex_unlock(&kernfs_mutex);
1490 return ret;
1491}
1492
1493/**
1494 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
1495 * @parent: parent of the target
1496 * @name: name of the kernfs_node to remove
1497 * @ns: namespace tag of the kernfs_node to remove
1498 *
1499 * Look for the kernfs_node with @name and @ns under @parent and remove it.
1500 * Returns 0 on success, -ENOENT if such entry doesn't exist.
1501 */
1502int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
1503 const void *ns)
1504{
1505 struct kernfs_node *kn;
1506
1507 if (!parent) {
1508 WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
1509 name);
1510 return -ENOENT;
1511 }
1512
1513 mutex_lock(&kernfs_mutex);
1514
1515 kn = kernfs_find_ns(parent, name, ns);
1516 if (kn)
1517 __kernfs_remove(kn);
1518
1519 mutex_unlock(&kernfs_mutex);
1520
1521 if (kn)
1522 return 0;
1523 else
1524 return -ENOENT;
1525}
1526
1527/**
1528 * kernfs_rename_ns - move and rename a kernfs_node
1529 * @kn: target node
1530 * @new_parent: new parent to put @sd under
1531 * @new_name: new name
1532 * @new_ns: new namespace tag
1533 */
1534int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
1535 const char *new_name, const void *new_ns)
1536{
1537 struct kernfs_node *old_parent;
1538 const char *old_name = NULL;
1539 int error;
1540
1541 /* can't move or rename root */
1542 if (!kn->parent)
1543 return -EINVAL;
1544
1545 mutex_lock(&kernfs_mutex);
1546
1547 error = -ENOENT;
1548 if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
1549 (new_parent->flags & KERNFS_EMPTY_DIR))
1550 goto out;
1551
1552 error = 0;
1553 if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
1554 (strcmp(kn->name, new_name) == 0))
1555 goto out; /* nothing to rename */
1556
1557 error = -EEXIST;
1558 if (kernfs_find_ns(new_parent, new_name, new_ns))
1559 goto out;
1560
1561 /* rename kernfs_node */
1562 if (strcmp(kn->name, new_name) != 0) {
1563 error = -ENOMEM;
1564 new_name = kstrdup_const(new_name, GFP_KERNEL);
1565 if (!new_name)
1566 goto out;
1567 } else {
1568 new_name = NULL;
1569 }
1570
1571 /*
1572 * Move to the appropriate place in the appropriate directories rbtree.
1573 */
1574 kernfs_unlink_sibling(kn);
1575 kernfs_get(new_parent);
1576
1577 /* rename_lock protects ->parent and ->name accessors */
1578 spin_lock_irq(&kernfs_rename_lock);
1579
1580 old_parent = kn->parent;
1581 kn->parent = new_parent;
1582
1583 kn->ns = new_ns;
1584 if (new_name) {
1585 old_name = kn->name;
1586 kn->name = new_name;
1587 }
1588
1589 spin_unlock_irq(&kernfs_rename_lock);
1590
1591 kn->hash = kernfs_name_hash(kn->name, kn->ns);
1592 kernfs_link_sibling(kn);
1593
1594 kernfs_put(old_parent);
1595 kfree_const(old_name);
1596
1597 error = 0;
1598 out:
1599 mutex_unlock(&kernfs_mutex);
1600 return error;
1601}
1602
1603/* Relationship between mode and the DT_xxx types */
1604static inline unsigned char dt_type(struct kernfs_node *kn)
1605{
1606 return (kn->mode >> 12) & 15;
1607}
1608
1609static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
1610{
1611 kernfs_put(filp->private_data);
1612 return 0;
1613}
1614
1615static struct kernfs_node *kernfs_dir_pos(const void *ns,
1616 struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
1617{
1618 if (pos) {
1619 int valid = kernfs_active(pos) &&
1620 pos->parent == parent && hash == pos->hash;
1621 kernfs_put(pos);
1622 if (!valid)
1623 pos = NULL;
1624 }
1625 if (!pos && (hash > 1) && (hash < INT_MAX)) {
1626 struct rb_node *node = parent->dir.children.rb_node;
1627 while (node) {
1628 pos = rb_to_kn(node);
1629
1630 if (hash < pos->hash)
1631 node = node->rb_left;
1632 else if (hash > pos->hash)
1633 node = node->rb_right;
1634 else
1635 break;
1636 }
1637 }
1638 /* Skip over entries which are dying/dead or in the wrong namespace */
1639 while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
1640 struct rb_node *node = rb_next(&pos->rb);
1641 if (!node)
1642 pos = NULL;
1643 else
1644 pos = rb_to_kn(node);
1645 }
1646 return pos;
1647}
1648
1649static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1650 struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1651{
1652 pos = kernfs_dir_pos(ns, parent, ino, pos);
1653 if (pos) {
1654 do {
1655 struct rb_node *node = rb_next(&pos->rb);
1656 if (!node)
1657 pos = NULL;
1658 else
1659 pos = rb_to_kn(node);
1660 } while (pos && (!kernfs_active(pos) || pos->ns != ns));
1661 }
1662 return pos;
1663}
1664
1665static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1666{
1667 struct dentry *dentry = file->f_path.dentry;
1668 struct kernfs_node *parent = kernfs_dentry_node(dentry);
1669 struct kernfs_node *pos = file->private_data;
1670 const void *ns = NULL;
1671
1672 if (!dir_emit_dots(file, ctx))
1673 return 0;
1674 mutex_lock(&kernfs_mutex);
1675
1676 if (kernfs_ns_enabled(parent))
1677 ns = kernfs_info(dentry->d_sb)->ns;
1678
1679 for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1680 pos;
1681 pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1682 const char *name = pos->name;
1683 unsigned int type = dt_type(pos);
1684 int len = strlen(name);
1685 ino_t ino = kernfs_ino(pos);
1686
1687 ctx->pos = pos->hash;
1688 file->private_data = pos;
1689 kernfs_get(pos);
1690
1691 mutex_unlock(&kernfs_mutex);
1692 if (!dir_emit(ctx, name, len, ino, type))
1693 return 0;
1694 mutex_lock(&kernfs_mutex);
1695 }
1696 mutex_unlock(&kernfs_mutex);
1697 file->private_data = NULL;
1698 ctx->pos = INT_MAX;
1699 return 0;
1700}
1701
1702const struct file_operations kernfs_dir_fops = {
1703 .read = generic_read_dir,
1704 .iterate_shared = kernfs_fop_readdir,
1705 .release = kernfs_dir_fop_release,
1706 .llseek = generic_file_llseek,
1707};