}
pagefault_disable();
- ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+ ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops, NULL);
pagefault_enable();
if (ret > 0)
written += ret;
.splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
+ .fop_flags = FOP_ASYNC_LOCK,
};
const struct file_operations gfs2_dir_fops = {
.lock = gfs2_lock,
.flock = gfs2_flock,
.llseek = default_llseek,
+ .fop_flags = FOP_ASYNC_LOCK,
};
#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
.prepare = nfsd4_cb_notify_lock_prepare,
.done = nfsd4_cb_notify_lock_done,
.release = nfsd4_cb_notify_lock_release,
+ .opcode = OP_CB_NOTIFY_LOCK,
};
/*
* When a delegation is recalled, the filehandle is stored in the "new"
* filter.
* Every 30 seconds we swap the filters and clear the "new" one,
- * unless both are empty of course.
+ * unless both are empty of course. This results in delegations for a
+ * given filehandle being blocked for between 30 and 60 seconds.
*
* Each filter is 256 bits. We hash the filehandle to 32bit and use the
* low 3 bytes as hash-table indices.
if (ktime_get_seconds() - bd->swap_time > 30) {
bd->entries -= bd->old_entries;
bd->old_entries = bd->entries;
+ bd->new = 1-bd->new;
memset(bd->set[bd->new], 0,
sizeof(bd->set[0]));
- bd->new = 1-bd->new;
bd->swap_time = ktime_get_seconds();
}
spin_unlock(&blocked_delegations_lock);
{
struct nfs4_ol_stateid *stp;
struct nfs4_client *clp = oo->oo_owner.so_client;
- struct list_head reaplist;
-
- INIT_LIST_HEAD(&reaplist);
+ LIST_HEAD(reaplist);
spin_lock(&clp->cl_lock);
unhash_openowner_locked(oo);
int i;
struct nfs4_openowner *oo;
struct nfs4_delegation *dp;
- struct list_head reaplist;
+ LIST_HEAD(reaplist);
- INIT_LIST_HEAD(&reaplist);
spin_lock(&state_lock);
while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
}
seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
- seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
+ seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
seq_printf(m, "admin-revoked states: %d\n",
atomic_read(&clp->cl_admin_revoked));
drop_client(clp);
{
struct nfs4_cb_fattr *ncf =
container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+ struct nfs4_delegation *dp =
+ container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
+ trace_nfsd_cb_getattr_done(&dp->dl_stid.sc_stateid, task);
ncf->ncf_cb_status = task->tk_status;
switch (task->tk_status) {
case -NFS4ERR_DELAY:
struct nfs4_delegation *dp =
container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
- clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
- wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
+ clear_and_wake_up_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
nfs4_put_stid(&dp->dl_stid);
}
static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
.done = nfsd4_cb_recall_any_done,
.release = nfsd4_cb_recall_any_release,
+ .opcode = OP_CB_RECALL_ANY,
};
static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
.done = nfsd4_cb_getattr_done,
.release = nfsd4_cb_getattr_release,
+ .opcode = OP_CB_GETATTR,
};
static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
if (so != NULL) {
cstate->replay_owner = NULL;
atomic_set(&so->so_replay.rp_locked, RP_UNLOCKED);
+ smp_mb__after_atomic();
wake_up_var(&so->so_replay.rp_locked);
nfs4_put_stateowner(so);
}
* so tell them to stop waiting.
*/
atomic_set(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED);
+ smp_mb__after_atomic();
wake_up_var(&oo->oo_owner.so_replay.rp_locked);
wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
.prepare = nfsd4_cb_recall_prepare,
.done = nfsd4_cb_recall_done,
.release = nfsd4_cb_recall_release,
+ .opcode = OP_CB_RECALL,
};
static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
struct svc_rqst *rqst;
struct nfs4_client *clp;
- if (!i_am_nfsd())
- return false;
- rqst = kthread_data(current);
- /* Note rq_prog == NFS_ACL_PROGRAM is also possible: */
- if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4)
+ rqst = nfsd_current_rqst();
+ if (!nfsd_v4client(rqst))
return false;
clp = *(rqst->rq_lease_breaker);
return dl->dl_stid.sc_client == clp;
/*
* Now that the deleg is set, check again to ensure that nothing
- * raced in and changed the mode while we weren't lookng.
+ * raced in and changed the mode while we weren't looking.
*/
status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file);
if (status)
}
}
+static bool
+nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh,
+ struct kstat *stat)
+{
+ struct nfsd_file *nf = find_rw_file(dp->dl_stid.sc_file);
+ struct path path;
+ int rc;
+
+ if (!nf)
+ return false;
+
+ path.mnt = currentfh->fh_export->ex_path.mnt;
+ path.dentry = file_dentry(nf->nf_file);
+
+ rc = vfs_getattr(&path, stat,
+ (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
+ AT_STATX_SYNC_AS_STAT);
+
+ nfsd_file_put(nf);
+ return rc == 0;
+}
+
/*
* The Linux NFS server does not offer write delegations to NFSv4.0
* clients in order to avoid conflicts between write delegations and
int cb_up;
int status = 0;
struct kstat stat;
- struct path path;
cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
open->op_recall = false;
memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
- trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
- path.mnt = currentfh->fh_export->ex_path.mnt;
- path.dentry = currentfh->fh_dentry;
- if (vfs_getattr(&path, &stat,
- (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
- AT_STATX_SYNC_AS_STAT)) {
+ if (!nfs4_delegation_stat(dp, currentfh, &stat)) {
nfs4_put_stid(&dp->dl_stid);
destroy_delegation(dp);
goto out_no_deleg;
}
+ open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
dp->dl_cb_fattr.ncf_initial_cinfo =
nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
+ trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
} else {
open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
INIT_LIST_HEAD(&nn->nfsd_ssc_mount_list);
init_waitqueue_head(&nn->nfsd_ssc_waitq);
}
-EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work);
/*
* This is called when nfsd is being shutdown, after all inter_ssc
{
struct list_head *pos, *next;
struct nfs4_client *clp;
- struct list_head cblist;
+ LIST_HEAD(cblist);
- INIT_LIST_HEAD(&cblist);
spin_lock(&nn->client_lock);
list_for_each_safe(pos, next, &nn->client_lru) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
cl_ra_cblist);
list_del_init(&clp->cl_ra_cblist);
clp->cl_ra->ra_keep = 0;
- clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG);
clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
BIT(RCA4_TYPE_MASK_WDATA_DLG);
trace_nfsd_cb_recall_any(clp->cl_ra);
nf = nfs4_find_file(s, flags);
if (nf) {
- status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+ status = nfsd_permission(&rqstp->rq_cred,
+ fhp->fh_export, fhp->fh_dentry,
acc | NFSD_MAY_OWNER_OVERRIDE);
if (status) {
nfsd_file_put(nf);
*nfp = NULL;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
- if (cstid)
- status = nfserr_bad_stateid;
- else
- status = check_special_stateids(net, fhp, stateid,
- flags);
+ status = check_special_stateids(net, fhp, stateid, flags);
goto done;
}
goto put_stateid;
trace_nfsd_deleg_return(stateid);
- wake_up_var(d_inode(cstate->current_fh.fh_dentry));
destroy_delegation(dp);
+ smp_mb__after_atomic();
+ wake_up_var(d_inode(cstate->current_fh.fh_dentry));
put_stateid:
nfs4_put_stid(&dp->dl_stid);
out:
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
- if (nfsd4_has_session(cstate) ||
- exportfs_lock_op_is_async(sb->s_export_op))
- flags |= FL_SLEEP;
fallthrough;
case NFS4_READ_LT:
spin_lock(&fp->fi_lock);
type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
- if (nfsd4_has_session(cstate) ||
- exportfs_lock_op_is_async(sb->s_export_op))
- flags |= FL_SLEEP;
fallthrough;
case NFS4_WRITE_LT:
spin_lock(&fp->fi_lock);
goto out;
}
- /*
- * Most filesystems with their own ->lock operations will block
- * the nfsd thread waiting to acquire the lock. That leads to
- * deadlocks (we don't want every nfsd thread tied up waiting
- * for file locks), so don't attempt blocking lock notifications
- * on those filesystems:
- */
- if (!exportfs_lock_op_is_async(sb->s_export_op))
- flags &= ~FL_SLEEP;
+ if (lock->lk_type & (NFS4_READW_LT | NFS4_WRITEW_LT) &&
+ nfsd4_has_session(cstate) &&
+ locks_can_async_lock(nf->nf_file->f_op))
+ flags |= FL_SLEEP;
nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
if (!nbl) {
* @cstate: NFSv4 COMPOUND state
* @u: RELEASE_LOCKOWNER arguments
*
- * Check if theree are any locks still held and if not - free the lockowner
+ * Check if there are any locks still held and if not, free the lockowner
* and any lock state that is owned.
*
* Return values:
spin_lock_init(&nn->client_lock);
spin_lock_init(&nn->s2s_cp_lock);
idr_init(&nn->s2s_cp_stateids);
+ atomic_set(&nn->pending_async_copies, 0);
spin_lock_init(&nn->blocked_locks_lock);
INIT_LIST_HEAD(&nn->blocked_locks_lru);
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file_lock_context *ctx;
+ struct nfs4_delegation *dp = NULL;
struct file_lease *fl;
struct iattr attrs;
struct nfs4_cb_fattr *ncf;
ctx = locks_inode_context(inode);
if (!ctx)
return 0;
+
+#define NON_NFSD_LEASE ((void *)1)
+
spin_lock(&ctx->flc_lock);
for_each_file_lock(fl, &ctx->flc_lease) {
- unsigned char type = fl->c.flc_type;
-
if (fl->c.flc_flags == FL_LAYOUT)
continue;
- if (fl->fl_lmops != &nfsd_lease_mng_ops) {
- /*
- * non-nfs lease, if it's a lease with F_RDLCK then
- * we are done; there isn't any write delegation
- * on this inode
- */
- if (type == F_RDLCK)
- break;
-
- nfsd_stats_wdeleg_getattr_inc(nn);
- spin_unlock(&ctx->flc_lock);
-
- status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (fl->c.flc_type == F_WRLCK) {
+ if (fl->fl_lmops == &nfsd_lease_mng_ops)
+ dp = fl->c.flc_owner;
+ else
+ dp = NON_NFSD_LEASE;
+ }
+ break;
+ }
+ if (dp == NULL || dp == NON_NFSD_LEASE ||
+ dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
+ spin_unlock(&ctx->flc_lock);
+ if (dp == NON_NFSD_LEASE) {
+ status = nfserrno(nfsd_open_break_lease(inode,
+ NFSD_MAY_READ));
if (status != nfserr_jukebox ||
!nfsd_wait_for_delegreturn(rqstp, inode))
return status;
- return 0;
}
- if (type == F_WRLCK) {
- struct nfs4_delegation *dp = fl->c.flc_owner;
+ return 0;
+ }
- if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
- spin_unlock(&ctx->flc_lock);
- return 0;
- }
- nfsd_stats_wdeleg_getattr_inc(nn);
- dp = fl->c.flc_owner;
- refcount_inc(&dp->dl_stid.sc_count);
- ncf = &dp->dl_cb_fattr;
- nfs4_cb_getattr(&dp->dl_cb_fattr);
- spin_unlock(&ctx->flc_lock);
- wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY,
- TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT);
- if (ncf->ncf_cb_status) {
- /* Recall delegation only if client didn't respond */
- status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
- if (status != nfserr_jukebox ||
- !nfsd_wait_for_delegreturn(rqstp, inode)) {
- nfs4_put_stid(&dp->dl_stid);
- return status;
- }
- }
- if (!ncf->ncf_file_modified &&
- (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
- ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
- ncf->ncf_file_modified = true;
- if (ncf->ncf_file_modified) {
- int err;
-
- /*
- * Per section 10.4.3 of RFC 8881, the server would
- * not update the file's metadata with the client's
- * modified size
- */
- attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
- attrs.ia_valid = ATTR_MTIME | ATTR_CTIME | ATTR_DELEG;
- inode_lock(inode);
- err = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL);
- inode_unlock(inode);
- if (err) {
- nfs4_put_stid(&dp->dl_stid);
- return nfserrno(err);
- }
- ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
- *size = ncf->ncf_cur_fsize;
- *modified = true;
- }
- nfs4_put_stid(&dp->dl_stid);
- return 0;
+ nfsd_stats_wdeleg_getattr_inc(nn);
+ refcount_inc(&dp->dl_stid.sc_count);
+ ncf = &dp->dl_cb_fattr;
+ nfs4_cb_getattr(&dp->dl_cb_fattr);
+ spin_unlock(&ctx->flc_lock);
+
+ wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY,
+ TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT);
+ if (ncf->ncf_cb_status) {
+ /* Recall delegation only if client didn't respond */
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+ goto out_status;
+ }
+ if (!ncf->ncf_file_modified &&
+ (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
+ ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
+ ncf->ncf_file_modified = true;
+ if (ncf->ncf_file_modified) {
+ int err;
+
+ /*
+ * Per section 10.4.3 of RFC 8881, the server would
+ * not update the file's metadata with the client's
+ * modified size
+ */
+ attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
+ attrs.ia_valid = ATTR_MTIME | ATTR_CTIME | ATTR_DELEG;
+ inode_lock(inode);
+ err = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL);
+ inode_unlock(inode);
+ if (err) {
+ status = nfserrno(err);
+ goto out_status;
}
- break;
+ ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
+ *size = ncf->ncf_cur_fsize;
+ *modified = true;
}
- spin_unlock(&ctx->flc_lock);
- return 0;
+ status = 0;
+out_status:
+ nfs4_put_stid(&dp->dl_stid);
+ return status;
}
u64 abs_to, struct buffer_head *di_bh)
{
struct address_space *mapping = inode->i_mapping;
- struct page *page;
+ struct folio *folio;
unsigned long index = abs_from >> PAGE_SHIFT;
handle_t *handle;
int ret = 0;
goto out;
}
- page = find_or_create_page(mapping, index, GFP_NOFS);
- if (!page) {
- ret = -ENOMEM;
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS);
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
mlog_errno(ret);
goto out_commit_trans;
}
* __block_write_begin and block_commit_write to zero the
* whole block.
*/
- ret = __block_write_begin(page, block_start + 1, 0,
+ ret = __block_write_begin(folio, block_start + 1, 0,
ocfs2_get_block);
if (ret < 0) {
mlog_errno(ret);
/* must not update i_size! */
- block_commit_write(page, block_start + 1, block_start + 1);
+ block_commit_write(&folio->page, block_start + 1, block_start + 1);
}
/*
}
out_unlock:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
out_commit_trans:
if (handle)
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
return remapped > 0 ? remapped : ret;
}
+static loff_t ocfs2_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct ocfs2_file_private *fp = file->private_data;
+
+ return generic_llseek_cookie(file, offset, whence, &fp->cookie);
+}
+
const struct inode_operations ocfs2_file_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
.remap_file_range = ocfs2_remap_file_range,
+ .fop_flags = FOP_ASYNC_LOCK,
};
WRAP_DIR_ITER(ocfs2_readdir) // FIXME!
const struct file_operations ocfs2_dops = {
- .llseek = generic_file_llseek,
+ .llseek = ocfs2_dir_llseek,
.read = generic_read_dir,
.iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file,
#endif
.lock = ocfs2_lock,
.flock = ocfs2_flock,
+ .fop_flags = FOP_ASYNC_LOCK,
};
/*
};
const struct file_operations ocfs2_dops_no_plocks = {
- .llseek = generic_file_llseek,
+ .llseek = ocfs2_dir_llseek,
.read = generic_read_dir,
.iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file,
wake_up(&fl->c.flc_wait);
}
+ static inline bool locks_can_async_lock(const struct file_operations *fops)
+ {
+ return !fops->lock || fops->fop_flags & FOP_ASYNC_LOCK;
+ }
+
/* fs/locks.c */
void locks_free_lock_context(struct inode *inode);
void locks_free_lock(struct file_lock *fl);
#ifdef CONFIG_FILE_LOCKING
static inline int break_lease(struct inode *inode, unsigned int mode)
{
+ struct file_lock_context *flctx;
+
/*
* Since this check is lockless, we must ensure that any refcounts
* taken are done before checking i_flctx->flc_lease. Otherwise, we
* could end up racing with tasks trying to set a new lease on this
* file.
*/
+ flctx = READ_ONCE(inode->i_flctx);
+ if (!flctx)
+ return 0;
smp_mb();
- if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
+ if (!list_empty_careful(&flctx->flc_lease))
return __break_lease(inode, mode, FL_LEASE);
return 0;
}
static inline int break_deleg(struct inode *inode, unsigned int mode)
{
+ struct file_lock_context *flctx;
+
/*
* Since this check is lockless, we must ensure that any refcounts
* taken are done before checking i_flctx->flc_lease. Otherwise, we
* could end up racing with tasks trying to set a new lease on this
* file.
*/
+ flctx = READ_ONCE(inode->i_flctx);
+ if (!flctx)
+ return 0;
smp_mb();
- if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
+ if (!list_empty_careful(&flctx->flc_lease))
return __break_lease(inode, mode, FL_DELEG);
return 0;
}
/* Expect random access pattern */
#define FMODE_RANDOM ((__force fmode_t)(1 << 12))
-/* File is huge (eg. /dev/mem): treat loff_t as unsigned */
-#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)(1 << 13))
+/* FMODE_* bit 13 */
/* File is opened with O_PATH; almost nothing can be done with it */
#define FMODE_PATH ((__force fmode_t)(1 << 14))
int (*write_begin)(struct file *, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata);
+ struct folio **foliop, void **fsdata);
int (*write_end)(struct file *, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata);
+ struct folio *folio, void *fsdata);
/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
sector_t (*bmap)(struct address_space *, sector_t);
#endif
/* Misc */
- unsigned long i_state;
+ u32 i_state;
+ /* 32-bit hole */
struct rw_semaphore i_rwsem;
unsigned long dirtied_when; /* jiffies of first dirtying */
void *i_private; /* fs or device private pointer */
} __randomize_layout;
+/*
+ * Get bit address from inode->i_state to use with wait_var_event()
+ * infrastructre.
+ */
+#define inode_state_wait_address(inode, bit) ((char *)&(inode)->i_state + (bit))
+
+struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
+ struct inode *inode, u32 bit);
+
+static inline void inode_wake_up_bit(struct inode *inode, u32 bit)
+{
+ /* Caller is responsible for correct memory barriers. */
+ wake_up_var(inode_state_wait_address(inode, bit));
+}
+
struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);
static inline unsigned int i_blocksize(const struct inode *node)
}
struct fown_struct {
+ struct file *file; /* backpointer for security modules */
rwlock_t lock; /* protects pid, uid, euid fields */
struct pid *pid; /* pid or -pgrp where SIGIO should be sent */
enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */
index < ra->start + ra->size);
}
-/*
- * f_{lock,count,pos_lock} members can be highly contended and share
- * the same cacheline. f_{lock,mode} are very frequently used together
- * and so share the same cacheline as well. The read-mostly
- * f_{path,inode,op} are kept on a separate cacheline.
+/**
+ * struct file - Represents a file
+ * @f_count: reference count
+ * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context.
+ * @f_mode: FMODE_* flags often used in hotpaths
+ * @f_op: file operations
+ * @f_mapping: Contents of a cacheable, mappable object.
+ * @private_data: filesystem or driver specific data
+ * @f_inode: cached inode
+ * @f_flags: file flags
+ * @f_iocb_flags: iocb flags
+ * @f_cred: stashed credentials of creator/opener
+ * @f_path: path of the file
+ * @f_pos_lock: lock protecting file position
+ * @f_pipe: specific to pipes
+ * @f_pos: file position
+ * @f_security: LSM security context of this file
+ * @f_owner: file owner
+ * @f_wb_err: writeback error
+ * @f_sb_err: per sb writeback errors
+ * @f_ep: link of all epoll hooks for this file
+ * @f_task_work: task work entry point
+ * @f_llist: work queue entrypoint
+ * @f_ra: file's readahead state
+ * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.)
*/
struct file {
+ atomic_long_t f_count;
+ spinlock_t f_lock;
+ fmode_t f_mode;
+ const struct file_operations *f_op;
+ struct address_space *f_mapping;
+ void *private_data;
+ struct inode *f_inode;
+ unsigned int f_flags;
+ unsigned int f_iocb_flags;
+ const struct cred *f_cred;
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct path f_path;
union {
- /* fput() uses task work when closing and freeing file (default). */
- struct callback_head f_task_work;
- /* fput() must use workqueue (most kernel threads). */
- struct llist_node f_llist;
- unsigned int f_iocb_flags;
+ /* regular files (with FMODE_ATOMIC_POS) and directories */
+ struct mutex f_pos_lock;
+ /* pipes */
+ u64 f_pipe;
};
-
- /*
- * Protects f_ep, f_flags.
- * Must not be taken from IRQ context.
- */
- spinlock_t f_lock;
- fmode_t f_mode;
- atomic_long_t f_count;
- struct mutex f_pos_lock;
- loff_t f_pos;
- unsigned int f_flags;
- struct fown_struct f_owner;
- const struct cred *f_cred;
- struct file_ra_state f_ra;
- struct path f_path;
- struct inode *f_inode; /* cached value */
- const struct file_operations *f_op;
-
- u64 f_version;
+ loff_t f_pos;
#ifdef CONFIG_SECURITY
- void *f_security;
+ void *f_security;
#endif
- /* needed for tty driver, and maybe others */
- void *private_data;
-
+ /* --- cacheline 2 boundary (128 bytes) --- */
+ struct fown_struct *f_owner;
+ errseq_t f_wb_err;
+ errseq_t f_sb_err;
#ifdef CONFIG_EPOLL
- /* Used by fs/eventpoll.c to link all the hooks to this file */
- struct hlist_head *f_ep;
-#endif /* #ifdef CONFIG_EPOLL */
- struct address_space *f_mapping;
- errseq_t f_wb_err;
- errseq_t f_sb_err; /* for syncfs */
+ struct hlist_head *f_ep;
+#endif
+ union {
+ struct callback_head f_task_work;
+ struct llist_node f_llist;
+ struct file_ra_state f_ra;
+ freeptr_t f_freeptr;
+ };
+ /* --- cacheline 3 boundary (192 bytes) --- */
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
#define OFFT_OFFSET_MAX type_max(off_t)
#endif
+int file_f_owner_allocate(struct file *file);
+static inline struct fown_struct *file_f_owner(const struct file *file)
+{
+ return READ_ONCE(file->f_owner);
+}
+
extern void send_sigio(struct fown_struct *fown, int fd, int band);
static inline struct inode *file_inode(const struct file *f)
extern int f_setown(struct file *filp, int who, int force);
extern void f_delown(struct file *filp);
extern pid_t f_getown(struct file *filp);
-extern int send_sigurg(struct fown_struct *fown);
+extern int send_sigurg(struct file *file);
/*
* sb->s_flags. Note that these mirror the equivalent MS_* flags where
#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
#define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */
#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */
+#define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */
/* Possible states of 'frozen' field */
enum {
time64_t s_time_min;
time64_t s_time_max;
#ifdef CONFIG_FSNOTIFY
- __u32 s_fsnotify_mask;
+ u32 s_fsnotify_mask;
struct fsnotify_sb_info *s_fsnotify_info;
#endif
#define __sb_writers_acquired(sb, lev) \
percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
#define __sb_writers_release(sb, lev) \
- percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
+ percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], _THIS_IP_)
/**
* __sb_write_started - check if sb freeze level is held
#define FOP_DIO_PARALLEL_WRITE ((__force fop_flags_t)(1 << 3))
/* Contains huge pages */
#define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4))
-#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 5))
+/* Treat loff_t as unsigned (e.g., /dev/mem) */
+#define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5))
+ /* Supports asynchronous lock callbacks */
++#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,
*
* I_REFERENCED Marks the inode as recently references on the LRU list.
*
- * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit().
- *
* I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to
* synchronize competing switching instances and to tell
* wb stat updates to grab the i_pages lock. See
* i_count.
*
* Q: What is the difference between I_WILL_FREE and I_FREEING?
+ *
+ * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait
+ * upon. There's one free address left.
*/
-#define I_DIRTY_SYNC (1 << 0)
-#define I_DIRTY_DATASYNC (1 << 1)
-#define I_DIRTY_PAGES (1 << 2)
-#define __I_NEW 3
+#define __I_NEW 0
#define I_NEW (1 << __I_NEW)
-#define I_WILL_FREE (1 << 4)
-#define I_FREEING (1 << 5)
-#define I_CLEAR (1 << 6)
-#define __I_SYNC 7
+#define __I_SYNC 1
#define I_SYNC (1 << __I_SYNC)
-#define I_REFERENCED (1 << 8)
-#define __I_DIO_WAKEUP 9
-#define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP)
+#define __I_LRU_ISOLATING 2
+#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING)
+
+#define I_DIRTY_SYNC (1 << 3)
+#define I_DIRTY_DATASYNC (1 << 4)
+#define I_DIRTY_PAGES (1 << 5)
+#define I_WILL_FREE (1 << 6)
+#define I_FREEING (1 << 7)
+#define I_CLEAR (1 << 8)
+#define I_REFERENCED (1 << 9)
#define I_LINKABLE (1 << 10)
#define I_DIRTY_TIME (1 << 11)
-#define I_WB_SWITCH (1 << 13)
-#define I_OVL_INUSE (1 << 14)
-#define I_CREATING (1 << 15)
-#define I_DONTCACHE (1 << 16)
-#define I_SYNC_QUEUED (1 << 17)
-#define I_PINNING_NETFS_WB (1 << 18)
-#define __I_LRU_ISOLATING 19
-#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING)
+#define I_WB_SWITCH (1 << 12)
+#define I_OVL_INUSE (1 << 13)
+#define I_CREATING (1 << 14)
+#define I_DONTCACHE (1 << 15)
+#define I_SYNC_QUEUED (1 << 16)
+#define I_PINNING_NETFS_WB (1 << 17)
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
struct super_block *sget_dev(struct fs_context *fc, dev_t dev);
/* Alas, no aliases. Too much hassle with bringing module.h everywhere */
-#define fops_get(fops) \
- (((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
-#define fops_put(fops) \
- do { if (fops) module_put((fops)->owner); } while(0)
+#define fops_get(fops) ({ \
+ const struct file_operations *_fops = (fops); \
+ (((_fops) && try_module_get((_fops)->owner) ? (_fops) : NULL)); \
+})
+
+#define fops_put(fops) ({ \
+ const struct file_operations *_fops = (fops); \
+ if (_fops) \
+ module_put((_fops)->owner); \
+})
+
/*
* This one is to be used *ONLY* from ->open() instances.
* fops must be non-NULL, pinned down *and* module dependencies
return (u32)ino == 0;
}
-extern void __iget(struct inode * inode);
+/*
+ * inode->i_lock must be held
+ */
+static inline void __iget(struct inode *inode)
+{
+ atomic_inc(&inode->i_count);
+}
+
extern void iget_failed(struct inode *);
extern void clear_inode(struct inode *);
extern void __destroy_inode(struct inode *);
extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
-#define no_llseek NULL
extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
int whence, loff_t maxsize, loff_t eof);
+loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence,
+ u64 *cookie);
extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
int whence, loff_t size);
extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
}
#endif
+bool inode_dio_finished(const struct inode *inode);
void inode_dio_wait(struct inode *inode);
+void inode_dio_wait_interruptible(struct inode *inode);
/**
* inode_dio_begin - signal start of a direct I/O requests
static inline void inode_dio_end(struct inode *inode)
{
if (atomic_dec_and_test(&inode->i_dio_count))
- wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
+ wake_up_var(&inode->i_dio_count);
}
extern void inode_set_flags(struct inode *inode, unsigned int flags,
extern int simple_empty(struct dentry *);
extern int simple_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata);
+ struct folio **foliop, void **fsdata);
extern const struct address_space_operations ram_aops;
extern int always_delete_dentry(const struct dentry *);
extern struct inode *alloc_anon_inode(struct super_block *);
if (flags & RWF_NOWAIT) {
if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
return -EOPNOTSUPP;
- kiocb_flags |= IOCB_NOIO;
}
if (flags & RWF_ATOMIC) {
if (rw_type != WRITE)