1 // SPDX-License-Identifier: GPL-2.0-only
3 * fs/kernfs/file.c - kernfs file implementation
5 * Copyright (c) 2001-3 Patrick Mochel
6 * Copyright (c) 2007 SUSE Linux Products GmbH
11 #include <linux/seq_file.h>
12 #include <linux/slab.h>
13 #include <linux/poll.h>
14 #include <linux/pagemap.h>
15 #include <linux/sched/mm.h>
16 #include <linux/fsnotify.h>
17 #include <linux/uio.h>
19 #include "kernfs-internal.h"
21 struct kernfs_open_node {
22 struct rcu_head rcu_head;
24 wait_queue_head_t poll;
25 struct list_head files; /* goes through kernfs_open_file.list */
29 * kernfs_notify() may be called from any context and bounces notifications
30 * through a work item. To minimize space overhead in kernfs_node, the
31 * pending queue is implemented as a singly linked list of kernfs_nodes.
32 * The list is terminated with the self pointer so that whether a
33 * kernfs_node is on the list or not can be determined by testing the next
36 #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list)
38 static DEFINE_SPINLOCK(kernfs_notify_lock);
39 static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
41 static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn)
43 int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
45 return &kernfs_locks->open_file_mutex[idx];
48 static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn)
52 lock = kernfs_open_file_mutex_ptr(kn);
60 * kernfs_deref_open_node - Get kernfs_open_node corresponding to @kn.
62 * @of: associated kernfs_open_file instance.
63 * @kn: target kernfs_node.
65 * Fetch and return ->attr.open of @kn if @of->list is non empty.
66 * If @of->list is not empty we can safely assume that @of is on
67 * @kn->attr.open->files list and this guarantees that @kn->attr.open
68 * will not vanish i.e. dereferencing outside RCU read-side critical
69 * section is safe here.
71 * The caller needs to make sure that @of->list is not empty.
73 static struct kernfs_open_node *
74 kernfs_deref_open_node(struct kernfs_open_file *of, struct kernfs_node *kn)
76 struct kernfs_open_node *on;
78 on = rcu_dereference_check(kn->attr.open, !list_empty(&of->list));
84 * kernfs_deref_open_node_protected - Get kernfs_open_node corresponding to @kn
86 * @kn: target kernfs_node.
88 * Fetch and return ->attr.open of @kn when caller holds the
89 * kernfs_open_file_mutex_ptr(kn).
91 * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when
92 * the caller guarantees that this mutex is being held, other updaters can't
93 * change ->attr.open and this means that we can safely deref ->attr.open
94 * outside RCU read-side critical section.
96 * The caller needs to make sure that kernfs_open_file_mutex is held.
98 static struct kernfs_open_node *
99 kernfs_deref_open_node_protected(struct kernfs_node *kn)
101 return rcu_dereference_protected(kn->attr.open,
102 lockdep_is_held(kernfs_open_file_mutex_ptr(kn)));
105 static struct kernfs_open_file *kernfs_of(struct file *file)
107 return ((struct seq_file *)file->private_data)->private;
111 * Determine the kernfs_ops for the given kernfs_node. This function must
112 * be called while holding an active reference.
114 static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn)
116 if (kn->flags & KERNFS_LOCKDEP)
117 lockdep_assert_held(kn);
122 * As kernfs_seq_stop() is also called after kernfs_seq_start() or
123 * kernfs_seq_next() failure, it needs to distinguish whether it's stopping
124 * a seq_file iteration which is fully initialized with an active reference
125 * or an aborted kernfs_seq_start() due to get_active failure. The
126 * position pointer is the only context for each seq_file iteration and
127 * thus the stop condition should be encoded in it. As the return value is
128 * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable
129 * choice to indicate get_active failure.
131 * Unfortunately, this is complicated due to the optional custom seq_file
132 * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop()
133 * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or
134 * custom seq_file operations and thus can't decide whether put_active
135 * should be performed or not only on ERR_PTR(-ENODEV).
137 * This is worked around by factoring out the custom seq_stop() and
138 * put_active part into kernfs_seq_stop_active(), skipping it from
139 * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after
140 * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures
141 * that kernfs_seq_stop_active() is skipped only after get_active failure.
143 static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
145 struct kernfs_open_file *of = sf->private;
146 const struct kernfs_ops *ops = kernfs_ops(of->kn);
149 ops->seq_stop(sf, v);
150 kernfs_put_active(of->kn);
153 static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
155 struct kernfs_open_file *of = sf->private;
156 const struct kernfs_ops *ops;
159 * @of->mutex nests outside active ref and is primarily to ensure that
160 * the ops aren't called concurrently for the same open file.
162 mutex_lock(&of->mutex);
163 if (!kernfs_get_active(of->kn))
164 return ERR_PTR(-ENODEV);
166 ops = kernfs_ops(of->kn);
167 if (ops->seq_start) {
168 void *next = ops->seq_start(sf, ppos);
169 /* see the comment above kernfs_seq_stop_active() */
170 if (next == ERR_PTR(-ENODEV))
171 kernfs_seq_stop_active(sf, next);
174 return single_start(sf, ppos);
177 static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
179 struct kernfs_open_file *of = sf->private;
180 const struct kernfs_ops *ops = kernfs_ops(of->kn);
183 void *next = ops->seq_next(sf, v, ppos);
184 /* see the comment above kernfs_seq_stop_active() */
185 if (next == ERR_PTR(-ENODEV))
186 kernfs_seq_stop_active(sf, next);
190 * The same behavior and code as single_open(), always
191 * terminate after the initial read.
198 static void kernfs_seq_stop(struct seq_file *sf, void *v)
200 struct kernfs_open_file *of = sf->private;
202 if (v != ERR_PTR(-ENODEV))
203 kernfs_seq_stop_active(sf, v);
204 mutex_unlock(&of->mutex);
207 static int kernfs_seq_show(struct seq_file *sf, void *v)
209 struct kernfs_open_file *of = sf->private;
210 struct kernfs_open_node *on = kernfs_deref_open_node(of, of->kn);
215 of->event = atomic_read(&on->event);
217 return of->kn->attr.ops->seq_show(sf, v);
220 static const struct seq_operations kernfs_seq_ops = {
221 .start = kernfs_seq_start,
222 .next = kernfs_seq_next,
223 .stop = kernfs_seq_stop,
224 .show = kernfs_seq_show,
228 * As reading a bin file can have side-effects, the exact offset and bytes
229 * specified in read(2) call should be passed to the read callback making
230 * it difficult to use seq_file. Implement simplistic custom buffering for
233 static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
235 struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
236 ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE);
237 const struct kernfs_ops *ops;
238 struct kernfs_open_node *on;
241 buf = of->prealloc_buf;
243 mutex_lock(&of->prealloc_mutex);
245 buf = kmalloc(len, GFP_KERNEL);
250 * @of->mutex nests outside active ref and is used both to ensure that
251 * the ops aren't called concurrently for the same open file.
253 mutex_lock(&of->mutex);
254 if (!kernfs_get_active(of->kn)) {
256 mutex_unlock(&of->mutex);
260 on = kernfs_deref_open_node(of, of->kn);
263 mutex_unlock(&of->mutex);
267 of->event = atomic_read(&on->event);
269 ops = kernfs_ops(of->kn);
271 len = ops->read(of, buf, len, iocb->ki_pos);
275 kernfs_put_active(of->kn);
276 mutex_unlock(&of->mutex);
281 if (copy_to_iter(buf, len, iter) != len) {
289 if (buf == of->prealloc_buf)
290 mutex_unlock(&of->prealloc_mutex);
296 static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter)
298 if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW)
299 return seq_read_iter(iocb, iter);
300 return kernfs_file_read_iter(iocb, iter);
304 * Copy data in from userland and pass it to the matching kernfs write
307 * There is no easy way for us to know if userspace is only doing a partial
308 * write, so we don't support them. We expect the entire buffer to come on
309 * the first write. Hint: if you're writing a value, first read the file,
310 * modify only the value you're changing, then write entire buffer
313 static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
315 struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
316 ssize_t len = iov_iter_count(iter);
317 const struct kernfs_ops *ops;
320 if (of->atomic_write_len) {
321 if (len > of->atomic_write_len)
324 len = min_t(size_t, len, PAGE_SIZE);
327 buf = of->prealloc_buf;
329 mutex_lock(&of->prealloc_mutex);
331 buf = kmalloc(len + 1, GFP_KERNEL);
335 if (copy_from_iter(buf, len, iter) != len) {
339 buf[len] = '\0'; /* guarantee string termination */
342 * @of->mutex nests outside active ref and is used both to ensure that
343 * the ops aren't called concurrently for the same open file.
345 mutex_lock(&of->mutex);
346 if (!kernfs_get_active(of->kn)) {
347 mutex_unlock(&of->mutex);
352 ops = kernfs_ops(of->kn);
354 len = ops->write(of, buf, len, iocb->ki_pos);
358 kernfs_put_active(of->kn);
359 mutex_unlock(&of->mutex);
365 if (buf == of->prealloc_buf)
366 mutex_unlock(&of->prealloc_mutex);
372 static void kernfs_vma_open(struct vm_area_struct *vma)
374 struct file *file = vma->vm_file;
375 struct kernfs_open_file *of = kernfs_of(file);
380 if (!kernfs_get_active(of->kn))
383 if (of->vm_ops->open)
384 of->vm_ops->open(vma);
386 kernfs_put_active(of->kn);
389 static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
391 struct file *file = vmf->vma->vm_file;
392 struct kernfs_open_file *of = kernfs_of(file);
396 return VM_FAULT_SIGBUS;
398 if (!kernfs_get_active(of->kn))
399 return VM_FAULT_SIGBUS;
401 ret = VM_FAULT_SIGBUS;
402 if (of->vm_ops->fault)
403 ret = of->vm_ops->fault(vmf);
405 kernfs_put_active(of->kn);
409 static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
411 struct file *file = vmf->vma->vm_file;
412 struct kernfs_open_file *of = kernfs_of(file);
416 return VM_FAULT_SIGBUS;
418 if (!kernfs_get_active(of->kn))
419 return VM_FAULT_SIGBUS;
422 if (of->vm_ops->page_mkwrite)
423 ret = of->vm_ops->page_mkwrite(vmf);
425 file_update_time(file);
427 kernfs_put_active(of->kn);
431 static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
432 void *buf, int len, int write)
434 struct file *file = vma->vm_file;
435 struct kernfs_open_file *of = kernfs_of(file);
441 if (!kernfs_get_active(of->kn))
445 if (of->vm_ops->access)
446 ret = of->vm_ops->access(vma, addr, buf, len, write);
448 kernfs_put_active(of->kn);
453 static int kernfs_vma_set_policy(struct vm_area_struct *vma,
454 struct mempolicy *new)
456 struct file *file = vma->vm_file;
457 struct kernfs_open_file *of = kernfs_of(file);
463 if (!kernfs_get_active(of->kn))
467 if (of->vm_ops->set_policy)
468 ret = of->vm_ops->set_policy(vma, new);
470 kernfs_put_active(of->kn);
474 static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
477 struct file *file = vma->vm_file;
478 struct kernfs_open_file *of = kernfs_of(file);
479 struct mempolicy *pol;
482 return vma->vm_policy;
484 if (!kernfs_get_active(of->kn))
485 return vma->vm_policy;
487 pol = vma->vm_policy;
488 if (of->vm_ops->get_policy)
489 pol = of->vm_ops->get_policy(vma, addr);
491 kernfs_put_active(of->kn);
497 static const struct vm_operations_struct kernfs_vm_ops = {
498 .open = kernfs_vma_open,
499 .fault = kernfs_vma_fault,
500 .page_mkwrite = kernfs_vma_page_mkwrite,
501 .access = kernfs_vma_access,
503 .set_policy = kernfs_vma_set_policy,
504 .get_policy = kernfs_vma_get_policy,
508 static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
510 struct kernfs_open_file *of = kernfs_of(file);
511 const struct kernfs_ops *ops;
515 * mmap path and of->mutex are prone to triggering spurious lockdep
516 * warnings and we don't want to add spurious locking dependency
517 * between the two. Check whether mmap is actually implemented
518 * without grabbing @of->mutex by testing HAS_MMAP flag. See the
519 * comment in kernfs_file_open() for more details.
521 if (!(of->kn->flags & KERNFS_HAS_MMAP))
524 mutex_lock(&of->mutex);
527 if (!kernfs_get_active(of->kn))
530 ops = kernfs_ops(of->kn);
531 rc = ops->mmap(of, vma);
536 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
537 * to satisfy versions of X which crash if the mmap fails: that
538 * substitutes a new vm_file, and we don't then want bin_vm_ops.
540 if (vma->vm_file != file)
544 if (of->mmapped && of->vm_ops != vma->vm_ops)
548 * It is not possible to successfully wrap close.
549 * So error if someone is trying to use close.
551 if (vma->vm_ops && vma->vm_ops->close)
556 of->vm_ops = vma->vm_ops;
557 vma->vm_ops = &kernfs_vm_ops;
559 kernfs_put_active(of->kn);
561 mutex_unlock(&of->mutex);
567 * kernfs_get_open_node - get or create kernfs_open_node
568 * @kn: target kernfs_node
569 * @of: kernfs_open_file for this instance of open
571 * If @kn->attr.open exists, increment its reference count; otherwise,
572 * create one. @of is chained to the files list.
575 * Kernel thread context (may sleep).
578 * 0 on success, -errno on failure.
580 static int kernfs_get_open_node(struct kernfs_node *kn,
581 struct kernfs_open_file *of)
583 struct kernfs_open_node *on, *new_on = NULL;
584 struct mutex *mutex = NULL;
586 mutex = kernfs_open_file_mutex_lock(kn);
587 on = kernfs_deref_open_node_protected(kn);
590 list_add_tail(&of->list, &on->files);
594 /* not there, initialize a new one */
595 new_on = kmalloc(sizeof(*new_on), GFP_KERNEL);
600 atomic_set(&new_on->event, 1);
601 init_waitqueue_head(&new_on->poll);
602 INIT_LIST_HEAD(&new_on->files);
603 list_add_tail(&of->list, &new_on->files);
604 rcu_assign_pointer(kn->attr.open, new_on);
612 * kernfs_unlink_open_file - Unlink @of from @kn.
614 * @kn: target kernfs_node
615 * @of: associated kernfs_open_file
617 * Unlink @of from list of @kn's associated open files. If list of
618 * associated open files becomes empty, disassociate and free
624 static void kernfs_unlink_open_file(struct kernfs_node *kn,
625 struct kernfs_open_file *of)
627 struct kernfs_open_node *on;
628 struct mutex *mutex = NULL;
630 mutex = kernfs_open_file_mutex_lock(kn);
632 on = kernfs_deref_open_node_protected(kn);
641 if (list_empty(&on->files)) {
642 rcu_assign_pointer(kn->attr.open, NULL);
643 kfree_rcu(on, rcu_head);
649 static int kernfs_fop_open(struct inode *inode, struct file *file)
651 struct kernfs_node *kn = inode->i_private;
652 struct kernfs_root *root = kernfs_root(kn);
653 const struct kernfs_ops *ops;
654 struct kernfs_open_file *of;
655 bool has_read, has_write, has_mmap;
658 if (!kernfs_get_active(kn))
661 ops = kernfs_ops(kn);
663 has_read = ops->seq_show || ops->read || ops->mmap;
664 has_write = ops->write || ops->mmap;
665 has_mmap = ops->mmap;
667 /* see the flag definition for details */
668 if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) {
669 if ((file->f_mode & FMODE_WRITE) &&
670 (!(inode->i_mode & S_IWUGO) || !has_write))
673 if ((file->f_mode & FMODE_READ) &&
674 (!(inode->i_mode & S_IRUGO) || !has_read))
678 /* allocate a kernfs_open_file for the file */
680 of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL);
685 * The following is done to give a different lockdep key to
686 * @of->mutex for files which implement mmap. This is a rather
687 * crude way to avoid false positive lockdep warning around
688 * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and
689 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
690 * which mm->mmap_lock nests, while holding @of->mutex. As each
691 * open file has a separate mutex, it's okay as long as those don't
692 * happen on the same file. At this point, we can't easily give
693 * each file a separate locking class. Let's differentiate on
694 * whether the file has mmap or not for now.
696 * Both paths of the branch look the same. They're supposed to
697 * look that way and give @of->mutex different static lockdep keys.
700 mutex_init(&of->mutex);
702 mutex_init(&of->mutex);
708 * Write path needs to atomic_write_len outside active reference.
709 * Cache it in open_file. See kernfs_fop_write_iter() for details.
711 of->atomic_write_len = ops->atomic_write_len;
715 * ->seq_show is incompatible with ->prealloc,
716 * as seq_read does its own allocation.
717 * ->read must be used instead.
719 if (ops->prealloc && ops->seq_show)
722 int len = of->atomic_write_len ?: PAGE_SIZE;
723 of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL);
725 if (!of->prealloc_buf)
727 mutex_init(&of->prealloc_mutex);
731 * Always instantiate seq_file even if read access doesn't use
732 * seq_file or is not requested. This unifies private data access
733 * and readable regular files are the vast majority anyway.
736 error = seq_open(file, &kernfs_seq_ops);
738 error = seq_open(file, NULL);
742 of->seq_file = file->private_data;
743 of->seq_file->private = of;
745 /* seq_file clears PWRITE unconditionally, restore it if WRITE */
746 if (file->f_mode & FMODE_WRITE)
747 file->f_mode |= FMODE_PWRITE;
749 /* make sure we have open node struct */
750 error = kernfs_get_open_node(kn, of);
752 goto err_seq_release;
755 /* nobody has access to @of yet, skip @of->mutex */
756 error = ops->open(of);
761 /* open succeeded, put active references */
762 kernfs_put_active(kn);
766 kernfs_unlink_open_file(kn, of);
768 seq_release(inode, file);
770 kfree(of->prealloc_buf);
773 kernfs_put_active(kn);
777 /* used from release/drain to ensure that ->release() is called exactly once */
778 static void kernfs_release_file(struct kernfs_node *kn,
779 struct kernfs_open_file *of)
782 * @of is guaranteed to have no other file operations in flight and
783 * we just want to synchronize release and drain paths.
784 * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used
785 * here because drain path may be called from places which can
786 * cause circular dependency.
788 lockdep_assert_held(kernfs_open_file_mutex_ptr(kn));
792 * A file is never detached without being released and we
793 * need to be able to release files which are deactivated
794 * and being drained. Don't use kernfs_ops().
796 kn->attr.ops->release(of);
801 static int kernfs_fop_release(struct inode *inode, struct file *filp)
803 struct kernfs_node *kn = inode->i_private;
804 struct kernfs_open_file *of = kernfs_of(filp);
805 struct mutex *mutex = NULL;
807 if (kn->flags & KERNFS_HAS_RELEASE) {
808 mutex = kernfs_open_file_mutex_lock(kn);
809 kernfs_release_file(kn, of);
813 kernfs_unlink_open_file(kn, of);
814 seq_release(inode, filp);
815 kfree(of->prealloc_buf);
821 void kernfs_drain_open_files(struct kernfs_node *kn)
823 struct kernfs_open_node *on;
824 struct kernfs_open_file *of;
825 struct mutex *mutex = NULL;
827 if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
831 * lockless opportunistic check is safe below because no one is adding to
832 * ->attr.open at this point of time. This check allows early bail out
833 * if ->attr.open is already NULL. kernfs_unlink_open_file makes
834 * ->attr.open NULL only while holding kernfs_open_file_mutex so below
835 * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if
836 * ->attr.open became NULL while waiting for the mutex.
838 if (!rcu_access_pointer(kn->attr.open))
841 mutex = kernfs_open_file_mutex_lock(kn);
842 on = kernfs_deref_open_node_protected(kn);
848 list_for_each_entry(of, &on->files, list) {
849 struct inode *inode = file_inode(of->file);
851 if (kn->flags & KERNFS_HAS_MMAP)
852 unmap_mapping_range(inode->i_mapping, 0, 0, 1);
854 if (kn->flags & KERNFS_HAS_RELEASE)
855 kernfs_release_file(kn, of);
862 * Kernfs attribute files are pollable. The idea is that you read
863 * the content and then you use 'poll' or 'select' to wait for
864 * the content to change. When the content changes (assuming the
865 * manager for the kobject supports notification), poll will
866 * return EPOLLERR|EPOLLPRI, and select will return the fd whether
867 * it is waiting for read, write, or exceptions.
868 * Once poll/select indicates that the value has changed, you
869 * need to close and re-open the file, or seek to 0 and read again.
870 * Reminder: this only works for attributes which actively support
871 * it, and it is not possible to test an attribute from userspace
872 * to see if it supports poll (Neither 'poll' nor 'select' return
873 * an appropriate error code). When in doubt, set a suitable timeout value.
875 __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
877 struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry);
878 struct kernfs_open_node *on = kernfs_deref_open_node(of, kn);
883 poll_wait(of->file, &on->poll, wait);
885 if (of->event != atomic_read(&on->event))
886 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
888 return DEFAULT_POLLMASK;
891 static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
893 struct kernfs_open_file *of = kernfs_of(filp);
894 struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
897 if (!kernfs_get_active(kn))
898 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
900 if (kn->attr.ops->poll)
901 ret = kn->attr.ops->poll(of, wait);
903 ret = kernfs_generic_poll(of, wait);
905 kernfs_put_active(kn);
909 static void kernfs_notify_workfn(struct work_struct *work)
911 struct kernfs_node *kn;
912 struct kernfs_super_info *info;
913 struct kernfs_root *root;
915 /* pop one off the notify_list */
916 spin_lock_irq(&kernfs_notify_lock);
917 kn = kernfs_notify_list;
918 if (kn == KERNFS_NOTIFY_EOL) {
919 spin_unlock_irq(&kernfs_notify_lock);
922 kernfs_notify_list = kn->attr.notify_next;
923 kn->attr.notify_next = NULL;
924 spin_unlock_irq(&kernfs_notify_lock);
926 root = kernfs_root(kn);
928 down_write(&root->kernfs_rwsem);
930 list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
931 struct kernfs_node *parent;
932 struct inode *p_inode = NULL;
937 * We want fsnotify_modify() on @kn but as the
938 * modifications aren't originating from userland don't
939 * have the matching @file available. Look up the inodes
940 * and generate the events manually.
942 inode = ilookup(info->sb, kernfs_ino(kn));
946 name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name));
947 parent = kernfs_get_parent(kn);
949 p_inode = ilookup(info->sb, kernfs_ino(parent));
951 fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD,
952 inode, FSNOTIFY_EVENT_INODE,
953 p_inode, &name, inode, 0);
961 fsnotify_inode(inode, FS_MODIFY);
966 up_write(&root->kernfs_rwsem);
972 * kernfs_notify - notify a kernfs file
973 * @kn: file to notify
975 * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any
978 void kernfs_notify(struct kernfs_node *kn)
980 static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
982 struct kernfs_open_node *on;
984 if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
987 /* kick poll immediately */
989 on = rcu_dereference(kn->attr.open);
991 atomic_inc(&on->event);
992 wake_up_interruptible(&on->poll);
996 /* schedule work to kick fsnotify */
997 spin_lock_irqsave(&kernfs_notify_lock, flags);
998 if (!kn->attr.notify_next) {
1000 kn->attr.notify_next = kernfs_notify_list;
1001 kernfs_notify_list = kn;
1002 schedule_work(&kernfs_notify_work);
1004 spin_unlock_irqrestore(&kernfs_notify_lock, flags);
1006 EXPORT_SYMBOL_GPL(kernfs_notify);
1008 const struct file_operations kernfs_file_fops = {
1009 .read_iter = kernfs_fop_read_iter,
1010 .write_iter = kernfs_fop_write_iter,
1011 .llseek = generic_file_llseek,
1012 .mmap = kernfs_fop_mmap,
1013 .open = kernfs_fop_open,
1014 .release = kernfs_fop_release,
1015 .poll = kernfs_fop_poll,
1016 .fsync = noop_fsync,
1017 .splice_read = generic_file_splice_read,
1018 .splice_write = iter_file_splice_write,
1022 * __kernfs_create_file - kernfs internal function to create a file
1023 * @parent: directory to create the file in
1024 * @name: name of the file
1025 * @mode: mode of the file
1026 * @uid: uid of the file
1027 * @gid: gid of the file
1028 * @size: size of the file
1029 * @ops: kernfs operations for the file
1030 * @priv: private data for the file
1031 * @ns: optional namespace tag of the file
1032 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
1034 * Returns the created node on success, ERR_PTR() value on error.
1036 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
1038 umode_t mode, kuid_t uid, kgid_t gid,
1040 const struct kernfs_ops *ops,
1041 void *priv, const void *ns,
1042 struct lock_class_key *key)
1044 struct kernfs_node *kn;
1048 flags = KERNFS_FILE;
1050 kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
1053 return ERR_PTR(-ENOMEM);
1056 kn->attr.size = size;
1060 #ifdef CONFIG_DEBUG_LOCK_ALLOC
1062 lockdep_init_map(&kn->dep_map, "kn->active", key, 0);
1063 kn->flags |= KERNFS_LOCKDEP;
1068 * kn->attr.ops is accessible only while holding active ref. We
1069 * need to know whether some ops are implemented outside active
1070 * ref. Cache their existence in flags.
1073 kn->flags |= KERNFS_HAS_SEQ_SHOW;
1075 kn->flags |= KERNFS_HAS_MMAP;
1077 kn->flags |= KERNFS_HAS_RELEASE;
1079 rc = kernfs_add_one(kn);