1 // SPDX-License-Identifier: LGPL-2.1
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
24 #include <asm/div64.h>
28 #include "cifsproto.h"
29 #include "cifs_unicode.h"
30 #include "cifs_debug.h"
31 #include "cifs_fs_sb.h"
33 #include "smbdirect.h"
34 #include "fs_context.h"
35 #include "cifs_ioctl.h"
37 static inline int cifs_convert_flags(unsigned int flags)
39 if ((flags & O_ACCMODE) == O_RDONLY)
41 else if ((flags & O_ACCMODE) == O_WRONLY)
43 else if ((flags & O_ACCMODE) == O_RDWR) {
44 /* GENERIC_ALL is too much permission to request
45 can cause unnecessary access denied on create */
46 /* return GENERIC_ALL; */
47 return (GENERIC_READ | GENERIC_WRITE);
50 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
51 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
55 static u32 cifs_posix_convert_flags(unsigned int flags)
59 if ((flags & O_ACCMODE) == O_RDONLY)
60 posix_flags = SMB_O_RDONLY;
61 else if ((flags & O_ACCMODE) == O_WRONLY)
62 posix_flags = SMB_O_WRONLY;
63 else if ((flags & O_ACCMODE) == O_RDWR)
64 posix_flags = SMB_O_RDWR;
66 if (flags & O_CREAT) {
67 posix_flags |= SMB_O_CREAT;
69 posix_flags |= SMB_O_EXCL;
70 } else if (flags & O_EXCL)
71 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
72 current->comm, current->tgid);
75 posix_flags |= SMB_O_TRUNC;
76 /* be safe and imply O_SYNC for O_DSYNC */
78 posix_flags |= SMB_O_SYNC;
79 if (flags & O_DIRECTORY)
80 posix_flags |= SMB_O_DIRECTORY;
81 if (flags & O_NOFOLLOW)
82 posix_flags |= SMB_O_NOFOLLOW;
84 posix_flags |= SMB_O_DIRECT;
89 static inline int cifs_get_disposition(unsigned int flags)
91 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
93 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
94 return FILE_OVERWRITE_IF;
95 else if ((flags & O_CREAT) == O_CREAT)
97 else if ((flags & O_TRUNC) == O_TRUNC)
98 return FILE_OVERWRITE;
103 int cifs_posix_open(const char *full_path, struct inode **pinode,
104 struct super_block *sb, int mode, unsigned int f_flags,
105 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
108 FILE_UNIX_BASIC_INFO *presp_data;
109 __u32 posix_flags = 0;
110 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
111 struct cifs_fattr fattr;
112 struct tcon_link *tlink;
113 struct cifs_tcon *tcon;
115 cifs_dbg(FYI, "posix open %s\n", full_path);
117 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
118 if (presp_data == NULL)
121 tlink = cifs_sb_tlink(cifs_sb);
127 tcon = tlink_tcon(tlink);
128 mode &= ~current_umask();
130 posix_flags = cifs_posix_convert_flags(f_flags);
131 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
132 poplock, full_path, cifs_sb->local_nls,
133 cifs_remap(cifs_sb));
134 cifs_put_tlink(tlink);
139 if (presp_data->Type == cpu_to_le32(-1))
140 goto posix_open_ret; /* open ok, caller does qpathinfo */
143 goto posix_open_ret; /* caller does not need info */
145 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
147 /* get new inode and set it up */
148 if (*pinode == NULL) {
149 cifs_fill_uniqueid(sb, &fattr);
150 *pinode = cifs_iget(sb, &fattr);
156 cifs_revalidate_mapping(*pinode);
157 rc = cifs_fattr_to_inode(*pinode, &fattr);
166 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
167 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
168 struct cifs_fid *fid, unsigned int xid)
173 int create_options = CREATE_NOT_DIR;
175 struct TCP_Server_Info *server = tcon->ses->server;
176 struct cifs_open_parms oparms;
178 if (!server->ops->open)
181 desired_access = cifs_convert_flags(f_flags);
183 /*********************************************************************
184 * open flag mapping table:
186 * POSIX Flag CIFS Disposition
187 * ---------- ----------------
188 * O_CREAT FILE_OPEN_IF
189 * O_CREAT | O_EXCL FILE_CREATE
190 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
191 * O_TRUNC FILE_OVERWRITE
192 * none of the above FILE_OPEN
194 * Note that there is not a direct match between disposition
195 * FILE_SUPERSEDE (ie create whether or not file exists although
196 * O_CREAT | O_TRUNC is similar but truncates the existing
197 * file rather than creating a new file as FILE_SUPERSEDE does
198 * (which uses the attributes / metadata passed in on open call)
200 *? O_SYNC is a reasonable match to CIFS writethrough flag
201 *? and the read write flags match reasonably. O_LARGEFILE
202 *? is irrelevant because largefile support is always used
203 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205 *********************************************************************/
207 disposition = cifs_get_disposition(f_flags);
209 /* BB pass O_SYNC flag through on file attributes .. BB */
211 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
215 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
216 if (f_flags & O_SYNC)
217 create_options |= CREATE_WRITE_THROUGH;
219 if (f_flags & O_DIRECT)
220 create_options |= CREATE_NO_BUFFER;
223 oparms.cifs_sb = cifs_sb;
224 oparms.desired_access = desired_access;
225 oparms.create_options = cifs_create_options(cifs_sb, create_options);
226 oparms.disposition = disposition;
227 oparms.path = full_path;
229 oparms.reconnect = false;
231 rc = server->ops->open(xid, &oparms, oplock, buf);
236 /* TODO: Add support for calling posix query info but with passing in fid */
238 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
241 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
245 server->ops->close(xid, tcon, fid);
256 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
258 struct cifs_fid_locks *cur;
259 bool has_locks = false;
261 down_read(&cinode->lock_sem);
262 list_for_each_entry(cur, &cinode->llist, llist) {
263 if (!list_empty(&cur->locks)) {
268 up_read(&cinode->lock_sem);
273 cifs_down_write(struct rw_semaphore *sem)
275 while (!down_write_trylock(sem))
279 static void cifsFileInfo_put_work(struct work_struct *work);
281 struct cifsFileInfo *
282 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
283 struct tcon_link *tlink, __u32 oplock)
285 struct dentry *dentry = file_dentry(file);
286 struct inode *inode = d_inode(dentry);
287 struct cifsInodeInfo *cinode = CIFS_I(inode);
288 struct cifsFileInfo *cfile;
289 struct cifs_fid_locks *fdlocks;
290 struct cifs_tcon *tcon = tlink_tcon(tlink);
291 struct TCP_Server_Info *server = tcon->ses->server;
293 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
297 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
303 INIT_LIST_HEAD(&fdlocks->locks);
304 fdlocks->cfile = cfile;
305 cfile->llist = fdlocks;
308 cfile->pid = current->tgid;
309 cfile->uid = current_fsuid();
310 cfile->dentry = dget(dentry);
311 cfile->f_flags = file->f_flags;
312 cfile->invalidHandle = false;
313 cfile->deferred_close_scheduled = false;
314 cfile->tlink = cifs_get_tlink(tlink);
315 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
316 INIT_WORK(&cfile->put, cifsFileInfo_put_work);
317 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
318 mutex_init(&cfile->fh_mutex);
319 spin_lock_init(&cfile->file_info_lock);
321 cifs_sb_active(inode->i_sb);
324 * If the server returned a read oplock and we have mandatory brlocks,
325 * set oplock level to None.
327 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
328 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
332 cifs_down_write(&cinode->lock_sem);
333 list_add(&fdlocks->llist, &cinode->llist);
334 up_write(&cinode->lock_sem);
336 spin_lock(&tcon->open_file_lock);
337 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
338 oplock = fid->pending_open->oplock;
339 list_del(&fid->pending_open->olist);
341 fid->purge_cache = false;
342 server->ops->set_fid(cfile, fid, oplock);
344 list_add(&cfile->tlist, &tcon->openFileList);
345 atomic_inc(&tcon->num_local_opens);
347 /* if readable file instance put first in list*/
348 spin_lock(&cinode->open_file_lock);
349 if (file->f_mode & FMODE_READ)
350 list_add(&cfile->flist, &cinode->openFileList);
352 list_add_tail(&cfile->flist, &cinode->openFileList);
353 spin_unlock(&cinode->open_file_lock);
354 spin_unlock(&tcon->open_file_lock);
356 if (fid->purge_cache)
357 cifs_zap_mapping(inode);
359 file->private_data = cfile;
363 struct cifsFileInfo *
364 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
366 spin_lock(&cifs_file->file_info_lock);
367 cifsFileInfo_get_locked(cifs_file);
368 spin_unlock(&cifs_file->file_info_lock);
372 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
374 struct inode *inode = d_inode(cifs_file->dentry);
375 struct cifsInodeInfo *cifsi = CIFS_I(inode);
376 struct cifsLockInfo *li, *tmp;
377 struct super_block *sb = inode->i_sb;
379 cifs_fscache_release_inode_cookie(inode);
382 * Delete any outstanding lock records. We'll lose them when the file
385 cifs_down_write(&cifsi->lock_sem);
386 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
387 list_del(&li->llist);
388 cifs_del_lock_waiters(li);
391 list_del(&cifs_file->llist->llist);
392 kfree(cifs_file->llist);
393 up_write(&cifsi->lock_sem);
395 cifs_put_tlink(cifs_file->tlink);
396 dput(cifs_file->dentry);
397 cifs_sb_deactive(sb);
401 static void cifsFileInfo_put_work(struct work_struct *work)
403 struct cifsFileInfo *cifs_file = container_of(work,
404 struct cifsFileInfo, put);
406 cifsFileInfo_put_final(cifs_file);
410 * cifsFileInfo_put - release a reference of file priv data
412 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
414 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
416 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
418 _cifsFileInfo_put(cifs_file, true, true);
422 * _cifsFileInfo_put - release a reference of file priv data
424 * This may involve closing the filehandle @cifs_file out on the
425 * server. Must be called without holding tcon->open_file_lock,
426 * cinode->open_file_lock and cifs_file->file_info_lock.
428 * If @wait_for_oplock_handler is true and we are releasing the last
429 * reference, wait for any running oplock break handler of the file
430 * and cancel any pending one.
432 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
433 * @wait_oplock_handler: must be false if called from oplock_break_handler
434 * @offload: not offloaded on close and oplock breaks
437 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
438 bool wait_oplock_handler, bool offload)
440 struct inode *inode = d_inode(cifs_file->dentry);
441 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
442 struct TCP_Server_Info *server = tcon->ses->server;
443 struct cifsInodeInfo *cifsi = CIFS_I(inode);
444 struct super_block *sb = inode->i_sb;
445 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
447 struct cifs_pending_open open;
448 bool oplock_break_cancelled;
450 spin_lock(&tcon->open_file_lock);
451 spin_lock(&cifsi->open_file_lock);
452 spin_lock(&cifs_file->file_info_lock);
453 if (--cifs_file->count > 0) {
454 spin_unlock(&cifs_file->file_info_lock);
455 spin_unlock(&cifsi->open_file_lock);
456 spin_unlock(&tcon->open_file_lock);
459 spin_unlock(&cifs_file->file_info_lock);
461 if (server->ops->get_lease_key)
462 server->ops->get_lease_key(inode, &fid);
464 /* store open in pending opens to make sure we don't miss lease break */
465 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
467 /* remove it from the lists */
468 list_del(&cifs_file->flist);
469 list_del(&cifs_file->tlist);
470 atomic_dec(&tcon->num_local_opens);
472 if (list_empty(&cifsi->openFileList)) {
473 cifs_dbg(FYI, "closing last open instance for inode %p\n",
474 d_inode(cifs_file->dentry));
476 * In strict cache mode we need invalidate mapping on the last
477 * close because it may cause a error when we open this file
478 * again and get at least level II oplock.
480 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
481 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
482 cifs_set_oplock_level(cifsi, 0);
485 spin_unlock(&cifsi->open_file_lock);
486 spin_unlock(&tcon->open_file_lock);
488 oplock_break_cancelled = wait_oplock_handler ?
489 cancel_work_sync(&cifs_file->oplock_break) : false;
491 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
492 struct TCP_Server_Info *server = tcon->ses->server;
496 if (server->ops->close_getattr)
497 server->ops->close_getattr(xid, tcon, cifs_file);
498 else if (server->ops->close)
499 server->ops->close(xid, tcon, &cifs_file->fid);
503 if (oplock_break_cancelled)
504 cifs_done_oplock_break(cifsi);
506 cifs_del_pending_open(&open);
509 queue_work(fileinfo_put_wq, &cifs_file->put);
511 cifsFileInfo_put_final(cifs_file);
514 int cifs_open(struct inode *inode, struct file *file)
520 struct cifs_sb_info *cifs_sb;
521 struct TCP_Server_Info *server;
522 struct cifs_tcon *tcon;
523 struct tcon_link *tlink;
524 struct cifsFileInfo *cfile = NULL;
526 const char *full_path;
527 bool posix_open_ok = false;
529 struct cifs_pending_open open;
533 cifs_sb = CIFS_SB(inode->i_sb);
534 if (unlikely(cifs_forced_shutdown(cifs_sb))) {
539 tlink = cifs_sb_tlink(cifs_sb);
542 return PTR_ERR(tlink);
544 tcon = tlink_tcon(tlink);
545 server = tcon->ses->server;
547 page = alloc_dentry_path();
548 full_path = build_path_from_dentry(file_dentry(file), page);
549 if (IS_ERR(full_path)) {
550 rc = PTR_ERR(full_path);
554 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
555 inode, file->f_flags, full_path);
557 if (file->f_flags & O_DIRECT &&
558 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
559 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
560 file->f_op = &cifs_file_direct_nobrl_ops;
562 file->f_op = &cifs_file_direct_ops;
565 /* Get the cached handle as SMB2 close is deferred */
566 rc = cifs_get_readable_path(tcon, full_path, &cfile);
568 if (file->f_flags == cfile->f_flags) {
569 file->private_data = cfile;
570 spin_lock(&CIFS_I(inode)->deferred_lock);
571 cifs_del_deferred_close(cfile);
572 spin_unlock(&CIFS_I(inode)->deferred_lock);
575 _cifsFileInfo_put(cfile, true, false);
584 if (!tcon->broken_posix_open && tcon->unix_ext &&
585 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
586 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
587 /* can not refresh inode info since size could be stale */
588 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
589 cifs_sb->ctx->file_mode /* ignored */,
590 file->f_flags, &oplock, &fid.netfid, xid);
592 cifs_dbg(FYI, "posix open succeeded\n");
593 posix_open_ok = true;
594 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
595 if (tcon->ses->serverNOS)
596 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
598 tcon->ses->serverNOS);
599 tcon->broken_posix_open = true;
600 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
601 (rc != -EOPNOTSUPP)) /* path not found or net err */
604 * Else fallthrough to retry open the old way on network i/o
609 if (server->ops->get_lease_key)
610 server->ops->get_lease_key(inode, &fid);
612 cifs_add_pending_open(&fid, tlink, &open);
614 if (!posix_open_ok) {
615 if (server->ops->get_lease_key)
616 server->ops->get_lease_key(inode, &fid);
618 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
619 file->f_flags, &oplock, &fid, xid);
621 cifs_del_pending_open(&open);
626 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
628 if (server->ops->close)
629 server->ops->close(xid, tcon, &fid);
630 cifs_del_pending_open(&open);
635 cifs_fscache_set_inode_cookie(inode, file);
637 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
639 * Time to set mode which we can not set earlier due to
640 * problems creating new read-only files.
642 struct cifs_unix_set_info_args args = {
643 .mode = inode->i_mode,
644 .uid = INVALID_UID, /* no change */
645 .gid = INVALID_GID, /* no change */
646 .ctime = NO_CHANGE_64,
647 .atime = NO_CHANGE_64,
648 .mtime = NO_CHANGE_64,
651 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
656 free_dentry_path(page);
658 cifs_put_tlink(tlink);
662 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
665 * Try to reacquire byte range locks that were released when session
666 * to server was lost.
669 cifs_relock_file(struct cifsFileInfo *cfile)
671 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
672 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
673 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
676 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
677 if (cinode->can_cache_brlcks) {
678 /* can cache locks - no need to relock */
679 up_read(&cinode->lock_sem);
683 if (cap_unix(tcon->ses) &&
684 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
685 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
686 rc = cifs_push_posix_locks(cfile);
688 rc = tcon->ses->server->ops->push_mand_locks(cfile);
690 up_read(&cinode->lock_sem);
695 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
700 struct cifs_sb_info *cifs_sb;
701 struct cifs_tcon *tcon;
702 struct TCP_Server_Info *server;
703 struct cifsInodeInfo *cinode;
706 const char *full_path;
708 int disposition = FILE_OPEN;
709 int create_options = CREATE_NOT_DIR;
710 struct cifs_open_parms oparms;
713 mutex_lock(&cfile->fh_mutex);
714 if (!cfile->invalidHandle) {
715 mutex_unlock(&cfile->fh_mutex);
720 inode = d_inode(cfile->dentry);
721 cifs_sb = CIFS_SB(inode->i_sb);
722 tcon = tlink_tcon(cfile->tlink);
723 server = tcon->ses->server;
726 * Can not grab rename sem here because various ops, including those
727 * that already have the rename sem can end up causing writepage to get
728 * called and if the server was down that means we end up here, and we
729 * can never tell if the caller already has the rename_sem.
731 page = alloc_dentry_path();
732 full_path = build_path_from_dentry(cfile->dentry, page);
733 if (IS_ERR(full_path)) {
734 mutex_unlock(&cfile->fh_mutex);
735 free_dentry_path(page);
737 return PTR_ERR(full_path);
740 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
741 inode, cfile->f_flags, full_path);
743 if (tcon->ses->server->oplocks)
748 if (tcon->unix_ext && cap_unix(tcon->ses) &&
749 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
750 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
752 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
753 * original open. Must mask them off for a reopen.
755 unsigned int oflags = cfile->f_flags &
756 ~(O_CREAT | O_EXCL | O_TRUNC);
758 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
759 cifs_sb->ctx->file_mode /* ignored */,
760 oflags, &oplock, &cfile->fid.netfid, xid);
762 cifs_dbg(FYI, "posix reopen succeeded\n");
763 oparms.reconnect = true;
767 * fallthrough to retry open the old way on errors, especially
768 * in the reconnect path it is important to retry hard
772 desired_access = cifs_convert_flags(cfile->f_flags);
774 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
775 if (cfile->f_flags & O_SYNC)
776 create_options |= CREATE_WRITE_THROUGH;
778 if (cfile->f_flags & O_DIRECT)
779 create_options |= CREATE_NO_BUFFER;
781 if (server->ops->get_lease_key)
782 server->ops->get_lease_key(inode, &cfile->fid);
785 oparms.cifs_sb = cifs_sb;
786 oparms.desired_access = desired_access;
787 oparms.create_options = cifs_create_options(cifs_sb, create_options);
788 oparms.disposition = disposition;
789 oparms.path = full_path;
790 oparms.fid = &cfile->fid;
791 oparms.reconnect = true;
794 * Can not refresh inode by passing in file_info buf to be returned by
795 * ops->open and then calling get_inode_info with returned buf since
796 * file might have write behind data that needs to be flushed and server
797 * version of file size can be stale. If we knew for sure that inode was
798 * not dirty locally we could do this.
800 rc = server->ops->open(xid, &oparms, &oplock, NULL);
801 if (rc == -ENOENT && oparms.reconnect == false) {
802 /* durable handle timeout is expired - open the file again */
803 rc = server->ops->open(xid, &oparms, &oplock, NULL);
804 /* indicate that we need to relock the file */
805 oparms.reconnect = true;
809 mutex_unlock(&cfile->fh_mutex);
810 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
811 cifs_dbg(FYI, "oplock: %d\n", oplock);
812 goto reopen_error_exit;
816 cfile->invalidHandle = false;
817 mutex_unlock(&cfile->fh_mutex);
818 cinode = CIFS_I(inode);
821 rc = filemap_write_and_wait(inode->i_mapping);
822 if (!is_interrupt_error(rc))
823 mapping_set_error(inode->i_mapping, rc);
825 if (tcon->posix_extensions)
826 rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
827 else if (tcon->unix_ext)
828 rc = cifs_get_inode_info_unix(&inode, full_path,
831 rc = cifs_get_inode_info(&inode, full_path, NULL,
832 inode->i_sb, xid, NULL);
835 * Else we are writing out data to server already and could deadlock if
836 * we tried to flush data, and since we do not know if we have data that
837 * would invalidate the current end of file on the server we can not go
838 * to the server to get the new inode info.
842 * If the server returned a read oplock and we have mandatory brlocks,
843 * set oplock level to None.
845 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
846 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
850 server->ops->set_fid(cfile, &cfile->fid, oplock);
851 if (oparms.reconnect)
852 cifs_relock_file(cfile);
855 free_dentry_path(page);
860 void smb2_deferred_work_close(struct work_struct *work)
862 struct cifsFileInfo *cfile = container_of(work,
863 struct cifsFileInfo, deferred.work);
865 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
866 cifs_del_deferred_close(cfile);
867 cfile->deferred_close_scheduled = false;
868 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
869 _cifsFileInfo_put(cfile, true, false);
872 int cifs_close(struct inode *inode, struct file *file)
874 struct cifsFileInfo *cfile;
875 struct cifsInodeInfo *cinode = CIFS_I(inode);
876 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
877 struct cifs_deferred_close *dclose;
879 if (file->private_data != NULL) {
880 cfile = file->private_data;
881 file->private_data = NULL;
882 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
883 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
884 cinode->lease_granted &&
885 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
887 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
888 inode->i_ctime = inode->i_mtime = current_time(inode);
889 cifs_fscache_update_inode_cookie(inode);
891 spin_lock(&cinode->deferred_lock);
892 cifs_add_deferred_close(cfile, dclose);
893 if (cfile->deferred_close_scheduled &&
894 delayed_work_pending(&cfile->deferred)) {
896 * If there is no pending work, mod_delayed_work queues new work.
897 * So, Increase the ref count to avoid use-after-free.
899 if (!mod_delayed_work(deferredclose_wq,
900 &cfile->deferred, cifs_sb->ctx->acregmax))
901 cifsFileInfo_get(cfile);
903 /* Deferred close for files */
904 queue_delayed_work(deferredclose_wq,
905 &cfile->deferred, cifs_sb->ctx->acregmax);
906 cfile->deferred_close_scheduled = true;
907 spin_unlock(&cinode->deferred_lock);
910 spin_unlock(&cinode->deferred_lock);
911 _cifsFileInfo_put(cfile, true, false);
913 _cifsFileInfo_put(cfile, true, false);
918 /* return code from the ->release op is always ignored */
923 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
925 struct cifsFileInfo *open_file;
926 struct list_head *tmp;
927 struct list_head *tmp1;
928 struct list_head tmp_list;
930 if (!tcon->use_persistent || !tcon->need_reopen_files)
933 tcon->need_reopen_files = false;
935 cifs_dbg(FYI, "Reopen persistent handles\n");
936 INIT_LIST_HEAD(&tmp_list);
938 /* list all files open on tree connection, reopen resilient handles */
939 spin_lock(&tcon->open_file_lock);
940 list_for_each(tmp, &tcon->openFileList) {
941 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
942 if (!open_file->invalidHandle)
944 cifsFileInfo_get(open_file);
945 list_add_tail(&open_file->rlist, &tmp_list);
947 spin_unlock(&tcon->open_file_lock);
949 list_for_each_safe(tmp, tmp1, &tmp_list) {
950 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
951 if (cifs_reopen_file(open_file, false /* do not flush */))
952 tcon->need_reopen_files = true;
953 list_del_init(&open_file->rlist);
954 cifsFileInfo_put(open_file);
958 int cifs_closedir(struct inode *inode, struct file *file)
962 struct cifsFileInfo *cfile = file->private_data;
963 struct cifs_tcon *tcon;
964 struct TCP_Server_Info *server;
967 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
973 tcon = tlink_tcon(cfile->tlink);
974 server = tcon->ses->server;
976 cifs_dbg(FYI, "Freeing private data in close dir\n");
977 spin_lock(&cfile->file_info_lock);
978 if (server->ops->dir_needs_close(cfile)) {
979 cfile->invalidHandle = true;
980 spin_unlock(&cfile->file_info_lock);
981 if (server->ops->close_dir)
982 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
985 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
986 /* not much we can do if it fails anyway, ignore rc */
989 spin_unlock(&cfile->file_info_lock);
991 buf = cfile->srch_inf.ntwrk_buf_start;
993 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
994 cfile->srch_inf.ntwrk_buf_start = NULL;
995 if (cfile->srch_inf.smallBuf)
996 cifs_small_buf_release(buf);
998 cifs_buf_release(buf);
1001 cifs_put_tlink(cfile->tlink);
1002 kfree(file->private_data);
1003 file->private_data = NULL;
1004 /* BB can we lock the filestruct while this is going on? */
1009 static struct cifsLockInfo *
1010 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1012 struct cifsLockInfo *lock =
1013 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1016 lock->offset = offset;
1017 lock->length = length;
1019 lock->pid = current->tgid;
1020 lock->flags = flags;
1021 INIT_LIST_HEAD(&lock->blist);
1022 init_waitqueue_head(&lock->block_q);
1027 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1029 struct cifsLockInfo *li, *tmp;
1030 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1031 list_del_init(&li->blist);
1032 wake_up(&li->block_q);
1036 #define CIFS_LOCK_OP 0
1037 #define CIFS_READ_OP 1
1038 #define CIFS_WRITE_OP 2
1040 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1042 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1043 __u64 length, __u8 type, __u16 flags,
1044 struct cifsFileInfo *cfile,
1045 struct cifsLockInfo **conf_lock, int rw_check)
1047 struct cifsLockInfo *li;
1048 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1049 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1051 list_for_each_entry(li, &fdlocks->locks, llist) {
1052 if (offset + length <= li->offset ||
1053 offset >= li->offset + li->length)
1055 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1056 server->ops->compare_fids(cfile, cur_cfile)) {
1057 /* shared lock prevents write op through the same fid */
1058 if (!(li->type & server->vals->shared_lock_type) ||
1059 rw_check != CIFS_WRITE_OP)
1062 if ((type & server->vals->shared_lock_type) &&
1063 ((server->ops->compare_fids(cfile, cur_cfile) &&
1064 current->tgid == li->pid) || type == li->type))
1066 if (rw_check == CIFS_LOCK_OP &&
1067 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1068 server->ops->compare_fids(cfile, cur_cfile))
1078 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1079 __u8 type, __u16 flags,
1080 struct cifsLockInfo **conf_lock, int rw_check)
1083 struct cifs_fid_locks *cur;
1084 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1086 list_for_each_entry(cur, &cinode->llist, llist) {
1087 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1088 flags, cfile, conf_lock,
1098 * Check if there is another lock that prevents us to set the lock (mandatory
1099 * style). If such a lock exists, update the flock structure with its
1100 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1101 * or leave it the same if we can't. Returns 0 if we don't need to request to
1102 * the server or 1 otherwise.
1105 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1106 __u8 type, struct file_lock *flock)
1109 struct cifsLockInfo *conf_lock;
1110 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1111 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1114 down_read(&cinode->lock_sem);
1116 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1117 flock->fl_flags, &conf_lock,
1120 flock->fl_start = conf_lock->offset;
1121 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1122 flock->fl_pid = conf_lock->pid;
1123 if (conf_lock->type & server->vals->shared_lock_type)
1124 flock->fl_type = F_RDLCK;
1126 flock->fl_type = F_WRLCK;
1127 } else if (!cinode->can_cache_brlcks)
1130 flock->fl_type = F_UNLCK;
1132 up_read(&cinode->lock_sem);
1137 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1139 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1140 cifs_down_write(&cinode->lock_sem);
1141 list_add_tail(&lock->llist, &cfile->llist->locks);
1142 up_write(&cinode->lock_sem);
1146 * Set the byte-range lock (mandatory style). Returns:
1147 * 1) 0, if we set the lock and don't need to request to the server;
1148 * 2) 1, if no locks prevent us but we need to request to the server;
1149 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1152 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1155 struct cifsLockInfo *conf_lock;
1156 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1162 cifs_down_write(&cinode->lock_sem);
1164 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1165 lock->type, lock->flags, &conf_lock,
1167 if (!exist && cinode->can_cache_brlcks) {
1168 list_add_tail(&lock->llist, &cfile->llist->locks);
1169 up_write(&cinode->lock_sem);
1178 list_add_tail(&lock->blist, &conf_lock->blist);
1179 up_write(&cinode->lock_sem);
1180 rc = wait_event_interruptible(lock->block_q,
1181 (lock->blist.prev == &lock->blist) &&
1182 (lock->blist.next == &lock->blist));
1185 cifs_down_write(&cinode->lock_sem);
1186 list_del_init(&lock->blist);
1189 up_write(&cinode->lock_sem);
1194 * Check if there is another lock that prevents us to set the lock (posix
1195 * style). If such a lock exists, update the flock structure with its
1196 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1197 * or leave it the same if we can't. Returns 0 if we don't need to request to
1198 * the server or 1 otherwise.
1201 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1204 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1205 unsigned char saved_type = flock->fl_type;
1207 if ((flock->fl_flags & FL_POSIX) == 0)
1210 down_read(&cinode->lock_sem);
1211 posix_test_lock(file, flock);
1213 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1214 flock->fl_type = saved_type;
1218 up_read(&cinode->lock_sem);
1223 * Set the byte-range lock (posix style). Returns:
1224 * 1) <0, if the error occurs while setting the lock;
1225 * 2) 0, if we set the lock and don't need to request to the server;
1226 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1227 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1230 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1232 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1233 int rc = FILE_LOCK_DEFERRED + 1;
1235 if ((flock->fl_flags & FL_POSIX) == 0)
1238 cifs_down_write(&cinode->lock_sem);
1239 if (!cinode->can_cache_brlcks) {
1240 up_write(&cinode->lock_sem);
1244 rc = posix_lock_file(file, flock, NULL);
1245 up_write(&cinode->lock_sem);
1250 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1253 int rc = 0, stored_rc;
1254 struct cifsLockInfo *li, *tmp;
1255 struct cifs_tcon *tcon;
1256 unsigned int num, max_num, max_buf;
1257 LOCKING_ANDX_RANGE *buf, *cur;
1258 static const int types[] = {
1259 LOCKING_ANDX_LARGE_FILES,
1260 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1265 tcon = tlink_tcon(cfile->tlink);
1268 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1269 * and check it before using.
1271 max_buf = tcon->ses->server->maxBuf;
1272 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1277 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1279 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1281 max_num = (max_buf - sizeof(struct smb_hdr)) /
1282 sizeof(LOCKING_ANDX_RANGE);
1283 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1289 for (i = 0; i < 2; i++) {
1292 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1293 if (li->type != types[i])
1295 cur->Pid = cpu_to_le16(li->pid);
1296 cur->LengthLow = cpu_to_le32((u32)li->length);
1297 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1298 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1299 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1300 if (++num == max_num) {
1301 stored_rc = cifs_lockv(xid, tcon,
1303 (__u8)li->type, 0, num,
1314 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1315 (__u8)types[i], 0, num, buf);
1327 hash_lockowner(fl_owner_t owner)
1329 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1332 struct lock_to_push {
1333 struct list_head llist;
1342 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1344 struct inode *inode = d_inode(cfile->dentry);
1345 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1346 struct file_lock *flock;
1347 struct file_lock_context *flctx = inode->i_flctx;
1348 unsigned int count = 0, i;
1349 int rc = 0, xid, type;
1350 struct list_head locks_to_send, *el;
1351 struct lock_to_push *lck, *tmp;
1359 spin_lock(&flctx->flc_lock);
1360 list_for_each(el, &flctx->flc_posix) {
1363 spin_unlock(&flctx->flc_lock);
1365 INIT_LIST_HEAD(&locks_to_send);
1368 * Allocating count locks is enough because no FL_POSIX locks can be
1369 * added to the list while we are holding cinode->lock_sem that
1370 * protects locking operations of this inode.
1372 for (i = 0; i < count; i++) {
1373 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1378 list_add_tail(&lck->llist, &locks_to_send);
1381 el = locks_to_send.next;
1382 spin_lock(&flctx->flc_lock);
1383 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1384 if (el == &locks_to_send) {
1386 * The list ended. We don't have enough allocated
1387 * structures - something is really wrong.
1389 cifs_dbg(VFS, "Can't push all brlocks!\n");
1392 length = 1 + flock->fl_end - flock->fl_start;
1393 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1397 lck = list_entry(el, struct lock_to_push, llist);
1398 lck->pid = hash_lockowner(flock->fl_owner);
1399 lck->netfid = cfile->fid.netfid;
1400 lck->length = length;
1402 lck->offset = flock->fl_start;
1404 spin_unlock(&flctx->flc_lock);
1406 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1409 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1410 lck->offset, lck->length, NULL,
1414 list_del(&lck->llist);
1422 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1423 list_del(&lck->llist);
1430 cifs_push_locks(struct cifsFileInfo *cfile)
1432 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1433 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1434 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1437 /* we are going to update can_cache_brlcks here - need a write access */
1438 cifs_down_write(&cinode->lock_sem);
1439 if (!cinode->can_cache_brlcks) {
1440 up_write(&cinode->lock_sem);
1444 if (cap_unix(tcon->ses) &&
1445 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1446 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1447 rc = cifs_push_posix_locks(cfile);
1449 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1451 cinode->can_cache_brlcks = false;
1452 up_write(&cinode->lock_sem);
1457 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1458 bool *wait_flag, struct TCP_Server_Info *server)
1460 if (flock->fl_flags & FL_POSIX)
1461 cifs_dbg(FYI, "Posix\n");
1462 if (flock->fl_flags & FL_FLOCK)
1463 cifs_dbg(FYI, "Flock\n");
1464 if (flock->fl_flags & FL_SLEEP) {
1465 cifs_dbg(FYI, "Blocking lock\n");
1468 if (flock->fl_flags & FL_ACCESS)
1469 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1470 if (flock->fl_flags & FL_LEASE)
1471 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1472 if (flock->fl_flags &
1473 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1474 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1475 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1477 *type = server->vals->large_lock_type;
1478 if (flock->fl_type == F_WRLCK) {
1479 cifs_dbg(FYI, "F_WRLCK\n");
1480 *type |= server->vals->exclusive_lock_type;
1482 } else if (flock->fl_type == F_UNLCK) {
1483 cifs_dbg(FYI, "F_UNLCK\n");
1484 *type |= server->vals->unlock_lock_type;
1486 /* Check if unlock includes more than one lock range */
1487 } else if (flock->fl_type == F_RDLCK) {
1488 cifs_dbg(FYI, "F_RDLCK\n");
1489 *type |= server->vals->shared_lock_type;
1491 } else if (flock->fl_type == F_EXLCK) {
1492 cifs_dbg(FYI, "F_EXLCK\n");
1493 *type |= server->vals->exclusive_lock_type;
1495 } else if (flock->fl_type == F_SHLCK) {
1496 cifs_dbg(FYI, "F_SHLCK\n");
1497 *type |= server->vals->shared_lock_type;
1500 cifs_dbg(FYI, "Unknown type of lock\n");
1504 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1505 bool wait_flag, bool posix_lck, unsigned int xid)
1508 __u64 length = 1 + flock->fl_end - flock->fl_start;
1509 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1510 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1511 struct TCP_Server_Info *server = tcon->ses->server;
1512 __u16 netfid = cfile->fid.netfid;
1515 int posix_lock_type;
1517 rc = cifs_posix_lock_test(file, flock);
1521 if (type & server->vals->shared_lock_type)
1522 posix_lock_type = CIFS_RDLCK;
1524 posix_lock_type = CIFS_WRLCK;
1525 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1526 hash_lockowner(flock->fl_owner),
1527 flock->fl_start, length, flock,
1528 posix_lock_type, wait_flag);
1532 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1536 /* BB we could chain these into one lock request BB */
1537 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1540 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1542 flock->fl_type = F_UNLCK;
1544 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1549 if (type & server->vals->shared_lock_type) {
1550 flock->fl_type = F_WRLCK;
1554 type &= ~server->vals->exclusive_lock_type;
1556 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1557 type | server->vals->shared_lock_type,
1560 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1561 type | server->vals->shared_lock_type, 0, 1, false);
1562 flock->fl_type = F_RDLCK;
1564 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1567 flock->fl_type = F_WRLCK;
1573 cifs_move_llist(struct list_head *source, struct list_head *dest)
1575 struct list_head *li, *tmp;
1576 list_for_each_safe(li, tmp, source)
1577 list_move(li, dest);
1581 cifs_free_llist(struct list_head *llist)
1583 struct cifsLockInfo *li, *tmp;
1584 list_for_each_entry_safe(li, tmp, llist, llist) {
1585 cifs_del_lock_waiters(li);
1586 list_del(&li->llist);
1592 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1595 int rc = 0, stored_rc;
1596 static const int types[] = {
1597 LOCKING_ANDX_LARGE_FILES,
1598 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1601 unsigned int max_num, num, max_buf;
1602 LOCKING_ANDX_RANGE *buf, *cur;
1603 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1604 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1605 struct cifsLockInfo *li, *tmp;
1606 __u64 length = 1 + flock->fl_end - flock->fl_start;
1607 struct list_head tmp_llist;
1609 INIT_LIST_HEAD(&tmp_llist);
1612 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1613 * and check it before using.
1615 max_buf = tcon->ses->server->maxBuf;
1616 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1619 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1621 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1623 max_num = (max_buf - sizeof(struct smb_hdr)) /
1624 sizeof(LOCKING_ANDX_RANGE);
1625 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1629 cifs_down_write(&cinode->lock_sem);
1630 for (i = 0; i < 2; i++) {
1633 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1634 if (flock->fl_start > li->offset ||
1635 (flock->fl_start + length) <
1636 (li->offset + li->length))
1638 if (current->tgid != li->pid)
1640 if (types[i] != li->type)
1642 if (cinode->can_cache_brlcks) {
1644 * We can cache brlock requests - simply remove
1645 * a lock from the file's list.
1647 list_del(&li->llist);
1648 cifs_del_lock_waiters(li);
1652 cur->Pid = cpu_to_le16(li->pid);
1653 cur->LengthLow = cpu_to_le32((u32)li->length);
1654 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1655 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1656 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1658 * We need to save a lock here to let us add it again to
1659 * the file's list if the unlock range request fails on
1662 list_move(&li->llist, &tmp_llist);
1663 if (++num == max_num) {
1664 stored_rc = cifs_lockv(xid, tcon,
1666 li->type, num, 0, buf);
1669 * We failed on the unlock range
1670 * request - add all locks from the tmp
1671 * list to the head of the file's list.
1673 cifs_move_llist(&tmp_llist,
1674 &cfile->llist->locks);
1678 * The unlock range request succeed -
1679 * free the tmp list.
1681 cifs_free_llist(&tmp_llist);
1688 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1689 types[i], num, 0, buf);
1691 cifs_move_llist(&tmp_llist,
1692 &cfile->llist->locks);
1695 cifs_free_llist(&tmp_llist);
1699 up_write(&cinode->lock_sem);
1705 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1706 bool wait_flag, bool posix_lck, int lock, int unlock,
1710 __u64 length = 1 + flock->fl_end - flock->fl_start;
1711 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1712 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1713 struct TCP_Server_Info *server = tcon->ses->server;
1714 struct inode *inode = d_inode(cfile->dentry);
1717 int posix_lock_type;
1719 rc = cifs_posix_lock_set(file, flock);
1720 if (rc <= FILE_LOCK_DEFERRED)
1723 if (type & server->vals->shared_lock_type)
1724 posix_lock_type = CIFS_RDLCK;
1726 posix_lock_type = CIFS_WRLCK;
1729 posix_lock_type = CIFS_UNLCK;
1731 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1732 hash_lockowner(flock->fl_owner),
1733 flock->fl_start, length,
1734 NULL, posix_lock_type, wait_flag);
1739 struct cifsLockInfo *lock;
1741 lock = cifs_lock_init(flock->fl_start, length, type,
1746 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1755 * Windows 7 server can delay breaking lease from read to None
1756 * if we set a byte-range lock on a file - break it explicitly
1757 * before sending the lock to the server to be sure the next
1758 * read won't conflict with non-overlapted locks due to
1761 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1762 CIFS_CACHE_READ(CIFS_I(inode))) {
1763 cifs_zap_mapping(inode);
1764 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1766 CIFS_I(inode)->oplock = 0;
1769 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1770 type, 1, 0, wait_flag);
1776 cifs_lock_add(cfile, lock);
1778 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1781 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1783 * If this is a request to remove all locks because we
1784 * are closing the file, it doesn't matter if the
1785 * unlocking failed as both cifs.ko and the SMB server
1786 * remove the lock on file close
1789 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1790 if (!(flock->fl_flags & FL_CLOSE))
1793 rc = locks_lock_file_wait(file, flock);
1798 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1801 int lock = 0, unlock = 0;
1802 bool wait_flag = false;
1803 bool posix_lck = false;
1804 struct cifs_sb_info *cifs_sb;
1805 struct cifs_tcon *tcon;
1806 struct cifsFileInfo *cfile;
1812 if (!(fl->fl_flags & FL_FLOCK))
1815 cfile = (struct cifsFileInfo *)file->private_data;
1816 tcon = tlink_tcon(cfile->tlink);
1818 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1820 cifs_sb = CIFS_FILE_SB(file);
1822 if (cap_unix(tcon->ses) &&
1823 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1824 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1827 if (!lock && !unlock) {
1829 * if no lock or unlock then nothing to do since we do not
1836 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1844 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1847 int lock = 0, unlock = 0;
1848 bool wait_flag = false;
1849 bool posix_lck = false;
1850 struct cifs_sb_info *cifs_sb;
1851 struct cifs_tcon *tcon;
1852 struct cifsFileInfo *cfile;
1858 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1859 cmd, flock->fl_flags, flock->fl_type,
1860 flock->fl_start, flock->fl_end);
1862 cfile = (struct cifsFileInfo *)file->private_data;
1863 tcon = tlink_tcon(cfile->tlink);
1865 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1867 cifs_sb = CIFS_FILE_SB(file);
1868 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1870 if (cap_unix(tcon->ses) &&
1871 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1872 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1875 * BB add code here to normalize offset and length to account for
1876 * negative length which we can not accept over the wire.
1878 if (IS_GETLK(cmd)) {
1879 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1884 if (!lock && !unlock) {
1886 * if no lock or unlock then nothing to do since we do not
1893 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1900 * update the file size (if needed) after a write. Should be called with
1901 * the inode->i_lock held
1904 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1905 unsigned int bytes_written)
1907 loff_t end_of_write = offset + bytes_written;
1909 if (end_of_write > cifsi->server_eof)
1910 cifsi->server_eof = end_of_write;
1914 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1915 size_t write_size, loff_t *offset)
1918 unsigned int bytes_written = 0;
1919 unsigned int total_written;
1920 struct cifs_tcon *tcon;
1921 struct TCP_Server_Info *server;
1923 struct dentry *dentry = open_file->dentry;
1924 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1925 struct cifs_io_parms io_parms = {0};
1927 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1928 write_size, *offset, dentry);
1930 tcon = tlink_tcon(open_file->tlink);
1931 server = tcon->ses->server;
1933 if (!server->ops->sync_write)
1938 for (total_written = 0; write_size > total_written;
1939 total_written += bytes_written) {
1941 while (rc == -EAGAIN) {
1945 if (open_file->invalidHandle) {
1946 /* we could deadlock if we called
1947 filemap_fdatawait from here so tell
1948 reopen_file not to flush data to
1950 rc = cifs_reopen_file(open_file, false);
1955 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1956 (unsigned int)write_size - total_written);
1957 /* iov[0] is reserved for smb header */
1958 iov[1].iov_base = (char *)write_data + total_written;
1959 iov[1].iov_len = len;
1961 io_parms.tcon = tcon;
1962 io_parms.offset = *offset;
1963 io_parms.length = len;
1964 rc = server->ops->sync_write(xid, &open_file->fid,
1965 &io_parms, &bytes_written, iov, 1);
1967 if (rc || (bytes_written == 0)) {
1975 spin_lock(&d_inode(dentry)->i_lock);
1976 cifs_update_eof(cifsi, *offset, bytes_written);
1977 spin_unlock(&d_inode(dentry)->i_lock);
1978 *offset += bytes_written;
1982 cifs_stats_bytes_written(tcon, total_written);
1984 if (total_written > 0) {
1985 spin_lock(&d_inode(dentry)->i_lock);
1986 if (*offset > d_inode(dentry)->i_size) {
1987 i_size_write(d_inode(dentry), *offset);
1988 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1990 spin_unlock(&d_inode(dentry)->i_lock);
1992 mark_inode_dirty_sync(d_inode(dentry));
1994 return total_written;
1997 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2000 struct cifsFileInfo *open_file = NULL;
2001 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2003 /* only filter by fsuid on multiuser mounts */
2004 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2007 spin_lock(&cifs_inode->open_file_lock);
2008 /* we could simply get the first_list_entry since write-only entries
2009 are always at the end of the list but since the first entry might
2010 have a close pending, we go through the whole list */
2011 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2012 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2014 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2015 if ((!open_file->invalidHandle)) {
2016 /* found a good file */
2017 /* lock it so it will not be closed on us */
2018 cifsFileInfo_get(open_file);
2019 spin_unlock(&cifs_inode->open_file_lock);
2021 } /* else might as well continue, and look for
2022 another, or simply have the caller reopen it
2023 again rather than trying to fix this handle */
2024 } else /* write only file */
2025 break; /* write only files are last so must be done */
2027 spin_unlock(&cifs_inode->open_file_lock);
2031 /* Return -EBADF if no handle is found and general rc otherwise */
2033 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2034 struct cifsFileInfo **ret_file)
2036 struct cifsFileInfo *open_file, *inv_file = NULL;
2037 struct cifs_sb_info *cifs_sb;
2038 bool any_available = false;
2040 unsigned int refind = 0;
2041 bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2042 bool with_delete = flags & FIND_WR_WITH_DELETE;
2046 * Having a null inode here (because mapping->host was set to zero by
2047 * the VFS or MM) should not happen but we had reports of on oops (due
2048 * to it being zero) during stress testcases so we need to check for it
2051 if (cifs_inode == NULL) {
2052 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2057 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2059 /* only filter by fsuid on multiuser mounts */
2060 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2063 spin_lock(&cifs_inode->open_file_lock);
2065 if (refind > MAX_REOPEN_ATT) {
2066 spin_unlock(&cifs_inode->open_file_lock);
2069 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2070 if (!any_available && open_file->pid != current->tgid)
2072 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2074 if (with_delete && !(open_file->fid.access & DELETE))
2076 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2077 if (!open_file->invalidHandle) {
2078 /* found a good writable file */
2079 cifsFileInfo_get(open_file);
2080 spin_unlock(&cifs_inode->open_file_lock);
2081 *ret_file = open_file;
2085 inv_file = open_file;
2089 /* couldn't find useable FH with same pid, try any available */
2090 if (!any_available) {
2091 any_available = true;
2092 goto refind_writable;
2096 any_available = false;
2097 cifsFileInfo_get(inv_file);
2100 spin_unlock(&cifs_inode->open_file_lock);
2103 rc = cifs_reopen_file(inv_file, false);
2105 *ret_file = inv_file;
2109 spin_lock(&cifs_inode->open_file_lock);
2110 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2111 spin_unlock(&cifs_inode->open_file_lock);
2112 cifsFileInfo_put(inv_file);
2115 spin_lock(&cifs_inode->open_file_lock);
2116 goto refind_writable;
2122 struct cifsFileInfo *
2123 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2125 struct cifsFileInfo *cfile;
2128 rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2130 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2136 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2138 struct cifsFileInfo **ret_file)
2140 struct cifsFileInfo *cfile;
2141 void *page = alloc_dentry_path();
2145 spin_lock(&tcon->open_file_lock);
2146 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2147 struct cifsInodeInfo *cinode;
2148 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2149 if (IS_ERR(full_path)) {
2150 spin_unlock(&tcon->open_file_lock);
2151 free_dentry_path(page);
2152 return PTR_ERR(full_path);
2154 if (strcmp(full_path, name))
2157 cinode = CIFS_I(d_inode(cfile->dentry));
2158 spin_unlock(&tcon->open_file_lock);
2159 free_dentry_path(page);
2160 return cifs_get_writable_file(cinode, flags, ret_file);
2163 spin_unlock(&tcon->open_file_lock);
2164 free_dentry_path(page);
2169 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2170 struct cifsFileInfo **ret_file)
2172 struct cifsFileInfo *cfile;
2173 void *page = alloc_dentry_path();
2177 spin_lock(&tcon->open_file_lock);
2178 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2179 struct cifsInodeInfo *cinode;
2180 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2181 if (IS_ERR(full_path)) {
2182 spin_unlock(&tcon->open_file_lock);
2183 free_dentry_path(page);
2184 return PTR_ERR(full_path);
2186 if (strcmp(full_path, name))
2189 cinode = CIFS_I(d_inode(cfile->dentry));
2190 spin_unlock(&tcon->open_file_lock);
2191 free_dentry_path(page);
2192 *ret_file = find_readable_file(cinode, 0);
2193 return *ret_file ? 0 : -ENOENT;
2196 spin_unlock(&tcon->open_file_lock);
2197 free_dentry_path(page);
2201 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2203 struct address_space *mapping = page->mapping;
2204 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2207 int bytes_written = 0;
2208 struct inode *inode;
2209 struct cifsFileInfo *open_file;
2211 if (!mapping || !mapping->host)
2214 inode = page->mapping->host;
2216 offset += (loff_t)from;
2217 write_data = kmap(page);
2220 if ((to > PAGE_SIZE) || (from > to)) {
2225 /* racing with truncate? */
2226 if (offset > mapping->host->i_size) {
2228 return 0; /* don't care */
2231 /* check to make sure that we are not extending the file */
2232 if (mapping->host->i_size - offset < (loff_t)to)
2233 to = (unsigned)(mapping->host->i_size - offset);
2235 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2238 bytes_written = cifs_write(open_file, open_file->pid,
2239 write_data, to - from, &offset);
2240 cifsFileInfo_put(open_file);
2241 /* Does mm or vfs already set times? */
2242 inode->i_atime = inode->i_mtime = current_time(inode);
2243 if ((bytes_written > 0) && (offset))
2245 else if (bytes_written < 0)
2250 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2251 if (!is_retryable_error(rc))
2259 static struct cifs_writedata *
2260 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2261 pgoff_t end, pgoff_t *index,
2262 unsigned int *found_pages)
2264 struct cifs_writedata *wdata;
2266 wdata = cifs_writedata_alloc((unsigned int)tofind,
2267 cifs_writev_complete);
2271 *found_pages = find_get_pages_range_tag(mapping, index, end,
2272 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2277 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2278 struct address_space *mapping,
2279 struct writeback_control *wbc,
2280 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2282 unsigned int nr_pages = 0, i;
2285 for (i = 0; i < found_pages; i++) {
2286 page = wdata->pages[i];
2288 * At this point we hold neither the i_pages lock nor the
2289 * page lock: the page may be truncated or invalidated
2290 * (changing page->mapping to NULL), or even swizzled
2291 * back from swapper_space to tmpfs file mapping
2296 else if (!trylock_page(page))
2299 if (unlikely(page->mapping != mapping)) {
2304 if (!wbc->range_cyclic && page->index > end) {
2310 if (*next && (page->index != *next)) {
2311 /* Not next consecutive page */
2316 if (wbc->sync_mode != WB_SYNC_NONE)
2317 wait_on_page_writeback(page);
2319 if (PageWriteback(page) ||
2320 !clear_page_dirty_for_io(page)) {
2326 * This actually clears the dirty bit in the radix tree.
2327 * See cifs_writepage() for more commentary.
2329 set_page_writeback(page);
2330 if (page_offset(page) >= i_size_read(mapping->host)) {
2333 end_page_writeback(page);
2337 wdata->pages[i] = page;
2338 *next = page->index + 1;
2342 /* reset index to refind any pages skipped */
2344 *index = wdata->pages[0]->index + 1;
2346 /* put any pages we aren't going to use */
2347 for (i = nr_pages; i < found_pages; i++) {
2348 put_page(wdata->pages[i]);
2349 wdata->pages[i] = NULL;
2356 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2357 struct address_space *mapping, struct writeback_control *wbc)
2361 wdata->sync_mode = wbc->sync_mode;
2362 wdata->nr_pages = nr_pages;
2363 wdata->offset = page_offset(wdata->pages[0]);
2364 wdata->pagesz = PAGE_SIZE;
2365 wdata->tailsz = min(i_size_read(mapping->host) -
2366 page_offset(wdata->pages[nr_pages - 1]),
2368 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2369 wdata->pid = wdata->cfile->pid;
2371 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2375 if (wdata->cfile->invalidHandle)
2378 rc = wdata->server->ops->async_writev(wdata,
2379 cifs_writedata_release);
2384 static int cifs_writepages(struct address_space *mapping,
2385 struct writeback_control *wbc)
2387 struct inode *inode = mapping->host;
2388 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2389 struct TCP_Server_Info *server;
2390 bool done = false, scanned = false, range_whole = false;
2392 struct cifs_writedata *wdata;
2393 struct cifsFileInfo *cfile = NULL;
2399 * If wsize is smaller than the page cache size, default to writing
2400 * one page at a time via cifs_writepage
2402 if (cifs_sb->ctx->wsize < PAGE_SIZE)
2403 return generic_writepages(mapping, wbc);
2406 if (wbc->range_cyclic) {
2407 index = mapping->writeback_index; /* Start from prev offset */
2410 index = wbc->range_start >> PAGE_SHIFT;
2411 end = wbc->range_end >> PAGE_SHIFT;
2412 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2416 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2419 while (!done && index <= end) {
2420 unsigned int i, nr_pages, found_pages, wsize;
2421 pgoff_t next = 0, tofind, saved_index = index;
2422 struct cifs_credits credits_on_stack;
2423 struct cifs_credits *credits = &credits_on_stack;
2424 int get_file_rc = 0;
2427 cifsFileInfo_put(cfile);
2429 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2431 /* in case of an error store it to return later */
2435 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2442 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2444 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2449 add_credits_and_wake_if(server, credits, 0);
2453 if (found_pages == 0) {
2454 kref_put(&wdata->refcount, cifs_writedata_release);
2455 add_credits_and_wake_if(server, credits, 0);
2459 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2460 end, &index, &next, &done);
2462 /* nothing to write? */
2463 if (nr_pages == 0) {
2464 kref_put(&wdata->refcount, cifs_writedata_release);
2465 add_credits_and_wake_if(server, credits, 0);
2469 wdata->credits = credits_on_stack;
2470 wdata->cfile = cfile;
2471 wdata->server = server;
2474 if (!wdata->cfile) {
2475 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2477 if (is_retryable_error(get_file_rc))
2482 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2484 for (i = 0; i < nr_pages; ++i)
2485 unlock_page(wdata->pages[i]);
2487 /* send failure -- clean up the mess */
2489 add_credits_and_wake_if(server, &wdata->credits, 0);
2490 for (i = 0; i < nr_pages; ++i) {
2491 if (is_retryable_error(rc))
2492 redirty_page_for_writepage(wbc,
2495 SetPageError(wdata->pages[i]);
2496 end_page_writeback(wdata->pages[i]);
2497 put_page(wdata->pages[i]);
2499 if (!is_retryable_error(rc))
2500 mapping_set_error(mapping, rc);
2502 kref_put(&wdata->refcount, cifs_writedata_release);
2504 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2505 index = saved_index;
2509 /* Return immediately if we received a signal during writing */
2510 if (is_interrupt_error(rc)) {
2515 if (rc != 0 && saved_rc == 0)
2518 wbc->nr_to_write -= nr_pages;
2519 if (wbc->nr_to_write <= 0)
2525 if (!scanned && !done) {
2527 * We hit the last page and there is more work to be done: wrap
2528 * back to the start of the file
2538 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2539 mapping->writeback_index = index;
2542 cifsFileInfo_put(cfile);
2544 /* Indication to update ctime and mtime as close is deferred */
2545 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2550 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2556 /* BB add check for wbc flags */
2558 if (!PageUptodate(page))
2559 cifs_dbg(FYI, "ppw - page not up to date\n");
2562 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2564 * A writepage() implementation always needs to do either this,
2565 * or re-dirty the page with "redirty_page_for_writepage()" in
2566 * the case of a failure.
2568 * Just unlocking the page will cause the radix tree tag-bits
2569 * to fail to update with the state of the page correctly.
2571 set_page_writeback(page);
2573 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2574 if (is_retryable_error(rc)) {
2575 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2577 redirty_page_for_writepage(wbc, page);
2578 } else if (rc != 0) {
2580 mapping_set_error(page->mapping, rc);
2582 SetPageUptodate(page);
2584 end_page_writeback(page);
2590 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2592 int rc = cifs_writepage_locked(page, wbc);
2597 static int cifs_write_end(struct file *file, struct address_space *mapping,
2598 loff_t pos, unsigned len, unsigned copied,
2599 struct page *page, void *fsdata)
2602 struct inode *inode = mapping->host;
2603 struct cifsFileInfo *cfile = file->private_data;
2604 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2607 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2610 pid = current->tgid;
2612 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2615 if (PageChecked(page)) {
2617 SetPageUptodate(page);
2618 ClearPageChecked(page);
2619 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2620 SetPageUptodate(page);
2622 if (!PageUptodate(page)) {
2624 unsigned offset = pos & (PAGE_SIZE - 1);
2628 /* this is probably better than directly calling
2629 partialpage_write since in this function the file handle is
2630 known which we might as well leverage */
2631 /* BB check if anything else missing out of ppw
2632 such as updating last write time */
2633 page_data = kmap(page);
2634 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2635 /* if (rc < 0) should we set writebehind rc? */
2642 set_page_dirty(page);
2646 spin_lock(&inode->i_lock);
2647 if (pos > inode->i_size) {
2648 i_size_write(inode, pos);
2649 inode->i_blocks = (512 - 1 + pos) >> 9;
2651 spin_unlock(&inode->i_lock);
2656 /* Indication to update ctime and mtime as close is deferred */
2657 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2662 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2667 struct cifs_tcon *tcon;
2668 struct TCP_Server_Info *server;
2669 struct cifsFileInfo *smbfile = file->private_data;
2670 struct inode *inode = file_inode(file);
2671 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2673 rc = file_write_and_wait_range(file, start, end);
2675 trace_cifs_fsync_err(inode->i_ino, rc);
2681 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2684 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2685 rc = cifs_zap_mapping(inode);
2687 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2688 rc = 0; /* don't care about it in fsync */
2692 tcon = tlink_tcon(smbfile->tlink);
2693 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2694 server = tcon->ses->server;
2695 if (server->ops->flush == NULL) {
2697 goto strict_fsync_exit;
2700 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2701 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2703 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2704 cifsFileInfo_put(smbfile);
2706 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2708 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2716 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2720 struct cifs_tcon *tcon;
2721 struct TCP_Server_Info *server;
2722 struct cifsFileInfo *smbfile = file->private_data;
2723 struct inode *inode = file_inode(file);
2724 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2726 rc = file_write_and_wait_range(file, start, end);
2728 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2734 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2737 tcon = tlink_tcon(smbfile->tlink);
2738 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2739 server = tcon->ses->server;
2740 if (server->ops->flush == NULL) {
2745 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2746 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2748 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2749 cifsFileInfo_put(smbfile);
2751 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2753 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2762 * As file closes, flush all cached write data for this inode checking
2763 * for write behind errors.
2765 int cifs_flush(struct file *file, fl_owner_t id)
2767 struct inode *inode = file_inode(file);
2770 if (file->f_mode & FMODE_WRITE)
2771 rc = filemap_write_and_wait(inode->i_mapping);
2773 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2775 trace_cifs_flush_err(inode->i_ino, rc);
2780 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2785 for (i = 0; i < num_pages; i++) {
2786 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2789 * save number of pages we have already allocated and
2790 * return with ENOMEM error
2799 for (i = 0; i < num_pages; i++)
2806 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2811 clen = min_t(const size_t, len, wsize);
2812 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2821 cifs_uncached_writedata_release(struct kref *refcount)
2824 struct cifs_writedata *wdata = container_of(refcount,
2825 struct cifs_writedata, refcount);
2827 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2828 for (i = 0; i < wdata->nr_pages; i++)
2829 put_page(wdata->pages[i]);
2830 cifs_writedata_release(refcount);
2833 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2836 cifs_uncached_writev_complete(struct work_struct *work)
2838 struct cifs_writedata *wdata = container_of(work,
2839 struct cifs_writedata, work);
2840 struct inode *inode = d_inode(wdata->cfile->dentry);
2841 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2843 spin_lock(&inode->i_lock);
2844 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2845 if (cifsi->server_eof > inode->i_size)
2846 i_size_write(inode, cifsi->server_eof);
2847 spin_unlock(&inode->i_lock);
2849 complete(&wdata->done);
2850 collect_uncached_write_data(wdata->ctx);
2851 /* the below call can possibly free the last ref to aio ctx */
2852 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2856 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2857 size_t *len, unsigned long *num_pages)
2859 size_t save_len, copied, bytes, cur_len = *len;
2860 unsigned long i, nr_pages = *num_pages;
2863 for (i = 0; i < nr_pages; i++) {
2864 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2865 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2868 * If we didn't copy as much as we expected, then that
2869 * may mean we trod into an unmapped area. Stop copying
2870 * at that point. On the next pass through the big
2871 * loop, we'll likely end up getting a zero-length
2872 * write and bailing out of it.
2877 cur_len = save_len - cur_len;
2881 * If we have no data to send, then that probably means that
2882 * the copy above failed altogether. That's most likely because
2883 * the address in the iovec was bogus. Return -EFAULT and let
2884 * the caller free anything we allocated and bail out.
2890 * i + 1 now represents the number of pages we actually used in
2891 * the copy phase above.
2898 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2899 struct cifs_aio_ctx *ctx)
2902 struct cifs_credits credits;
2904 struct TCP_Server_Info *server = wdata->server;
2907 if (wdata->cfile->invalidHandle) {
2908 rc = cifs_reopen_file(wdata->cfile, false);
2917 * Wait for credits to resend this wdata.
2918 * Note: we are attempting to resend the whole wdata not in
2922 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2927 if (wsize < wdata->bytes) {
2928 add_credits_and_wake_if(server, &credits, 0);
2931 } while (wsize < wdata->bytes);
2932 wdata->credits = credits;
2934 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2937 if (wdata->cfile->invalidHandle)
2940 #ifdef CONFIG_CIFS_SMB_DIRECT
2942 wdata->mr->need_invalidate = true;
2943 smbd_deregister_mr(wdata->mr);
2947 rc = server->ops->async_writev(wdata,
2948 cifs_uncached_writedata_release);
2952 /* If the write was successfully sent, we are done */
2954 list_add_tail(&wdata->list, wdata_list);
2958 /* Roll back credits and retry if needed */
2959 add_credits_and_wake_if(server, &wdata->credits, 0);
2960 } while (rc == -EAGAIN);
2963 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2968 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2969 struct cifsFileInfo *open_file,
2970 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2971 struct cifs_aio_ctx *ctx)
2975 unsigned long nr_pages, num_pages, i;
2976 struct cifs_writedata *wdata;
2977 struct iov_iter saved_from = *from;
2978 loff_t saved_offset = offset;
2980 struct TCP_Server_Info *server;
2981 struct page **pagevec;
2985 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2986 pid = open_file->pid;
2988 pid = current->tgid;
2990 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2995 struct cifs_credits credits_on_stack;
2996 struct cifs_credits *credits = &credits_on_stack;
2998 if (open_file->invalidHandle) {
2999 rc = cifs_reopen_file(open_file, false);
3006 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3011 cur_len = min_t(const size_t, len, wsize);
3013 if (ctx->direct_io) {
3016 result = iov_iter_get_pages_alloc(
3017 from, &pagevec, cur_len, &start);
3020 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3021 result, iov_iter_type(from),
3022 from->iov_offset, from->count);
3026 add_credits_and_wake_if(server, credits, 0);
3029 cur_len = (size_t)result;
3030 iov_iter_advance(from, cur_len);
3033 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3035 wdata = cifs_writedata_direct_alloc(pagevec,
3036 cifs_uncached_writev_complete);
3039 add_credits_and_wake_if(server, credits, 0);
3044 wdata->page_offset = start;
3047 cur_len - (PAGE_SIZE - start) -
3048 (nr_pages - 2) * PAGE_SIZE :
3051 nr_pages = get_numpages(wsize, len, &cur_len);
3052 wdata = cifs_writedata_alloc(nr_pages,
3053 cifs_uncached_writev_complete);
3056 add_credits_and_wake_if(server, credits, 0);
3060 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3062 kvfree(wdata->pages);
3064 add_credits_and_wake_if(server, credits, 0);
3068 num_pages = nr_pages;
3069 rc = wdata_fill_from_iovec(
3070 wdata, from, &cur_len, &num_pages);
3072 for (i = 0; i < nr_pages; i++)
3073 put_page(wdata->pages[i]);
3074 kvfree(wdata->pages);
3076 add_credits_and_wake_if(server, credits, 0);
3081 * Bring nr_pages down to the number of pages we
3082 * actually used, and free any pages that we didn't use.
3084 for ( ; nr_pages > num_pages; nr_pages--)
3085 put_page(wdata->pages[nr_pages - 1]);
3087 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3090 wdata->sync_mode = WB_SYNC_ALL;
3091 wdata->nr_pages = nr_pages;
3092 wdata->offset = (__u64)offset;
3093 wdata->cfile = cifsFileInfo_get(open_file);
3094 wdata->server = server;
3096 wdata->bytes = cur_len;
3097 wdata->pagesz = PAGE_SIZE;
3098 wdata->credits = credits_on_stack;
3100 kref_get(&ctx->refcount);
3102 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3105 if (wdata->cfile->invalidHandle)
3108 rc = server->ops->async_writev(wdata,
3109 cifs_uncached_writedata_release);
3113 add_credits_and_wake_if(server, &wdata->credits, 0);
3114 kref_put(&wdata->refcount,
3115 cifs_uncached_writedata_release);
3116 if (rc == -EAGAIN) {
3118 iov_iter_advance(from, offset - saved_offset);
3124 list_add_tail(&wdata->list, wdata_list);
3133 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3135 struct cifs_writedata *wdata, *tmp;
3136 struct cifs_tcon *tcon;
3137 struct cifs_sb_info *cifs_sb;
3138 struct dentry *dentry = ctx->cfile->dentry;
3141 tcon = tlink_tcon(ctx->cfile->tlink);
3142 cifs_sb = CIFS_SB(dentry->d_sb);
3144 mutex_lock(&ctx->aio_mutex);
3146 if (list_empty(&ctx->list)) {
3147 mutex_unlock(&ctx->aio_mutex);
3153 * Wait for and collect replies for any successful sends in order of
3154 * increasing offset. Once an error is hit, then return without waiting
3155 * for any more replies.
3158 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3160 if (!try_wait_for_completion(&wdata->done)) {
3161 mutex_unlock(&ctx->aio_mutex);
3168 ctx->total_len += wdata->bytes;
3170 /* resend call if it's a retryable error */
3171 if (rc == -EAGAIN) {
3172 struct list_head tmp_list;
3173 struct iov_iter tmp_from = ctx->iter;
3175 INIT_LIST_HEAD(&tmp_list);
3176 list_del_init(&wdata->list);
3179 rc = cifs_resend_wdata(
3180 wdata, &tmp_list, ctx);
3182 iov_iter_advance(&tmp_from,
3183 wdata->offset - ctx->pos);
3185 rc = cifs_write_from_iter(wdata->offset,
3186 wdata->bytes, &tmp_from,
3187 ctx->cfile, cifs_sb, &tmp_list,
3190 kref_put(&wdata->refcount,
3191 cifs_uncached_writedata_release);
3194 list_splice(&tmp_list, &ctx->list);
3198 list_del_init(&wdata->list);
3199 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3202 cifs_stats_bytes_written(tcon, ctx->total_len);
3203 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3205 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3207 mutex_unlock(&ctx->aio_mutex);
3209 if (ctx->iocb && ctx->iocb->ki_complete)
3210 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3212 complete(&ctx->done);
3215 static ssize_t __cifs_writev(
3216 struct kiocb *iocb, struct iov_iter *from, bool direct)
3218 struct file *file = iocb->ki_filp;
3219 ssize_t total_written = 0;
3220 struct cifsFileInfo *cfile;
3221 struct cifs_tcon *tcon;
3222 struct cifs_sb_info *cifs_sb;
3223 struct cifs_aio_ctx *ctx;
3224 struct iov_iter saved_from = *from;
3225 size_t len = iov_iter_count(from);
3229 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3230 * In this case, fall back to non-direct write function.
3231 * this could be improved by getting pages directly in ITER_KVEC
3233 if (direct && iov_iter_is_kvec(from)) {
3234 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3238 rc = generic_write_checks(iocb, from);
3242 cifs_sb = CIFS_FILE_SB(file);
3243 cfile = file->private_data;
3244 tcon = tlink_tcon(cfile->tlink);
3246 if (!tcon->ses->server->ops->async_writev)
3249 ctx = cifs_aio_ctx_alloc();
3253 ctx->cfile = cifsFileInfo_get(cfile);
3255 if (!is_sync_kiocb(iocb))
3258 ctx->pos = iocb->ki_pos;
3261 ctx->direct_io = true;
3265 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3267 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3272 /* grab a lock here due to read response handlers can access ctx */
3273 mutex_lock(&ctx->aio_mutex);
3275 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3276 cfile, cifs_sb, &ctx->list, ctx);
3279 * If at least one write was successfully sent, then discard any rc
3280 * value from the later writes. If the other write succeeds, then
3281 * we'll end up returning whatever was written. If it fails, then
3282 * we'll get a new rc value from that.
3284 if (!list_empty(&ctx->list))
3287 mutex_unlock(&ctx->aio_mutex);
3290 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3294 if (!is_sync_kiocb(iocb)) {
3295 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3296 return -EIOCBQUEUED;
3299 rc = wait_for_completion_killable(&ctx->done);
3301 mutex_lock(&ctx->aio_mutex);
3302 ctx->rc = rc = -EINTR;
3303 total_written = ctx->total_len;
3304 mutex_unlock(&ctx->aio_mutex);
3307 total_written = ctx->total_len;
3310 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3312 if (unlikely(!total_written))
3315 iocb->ki_pos += total_written;
3316 return total_written;
3319 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3321 return __cifs_writev(iocb, from, true);
3324 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3326 return __cifs_writev(iocb, from, false);
3330 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3332 struct file *file = iocb->ki_filp;
3333 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3334 struct inode *inode = file->f_mapping->host;
3335 struct cifsInodeInfo *cinode = CIFS_I(inode);
3336 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3341 * We need to hold the sem to be sure nobody modifies lock list
3342 * with a brlock that prevents writing.
3344 down_read(&cinode->lock_sem);
3346 rc = generic_write_checks(iocb, from);
3350 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3351 server->vals->exclusive_lock_type, 0,
3352 NULL, CIFS_WRITE_OP))
3353 rc = __generic_file_write_iter(iocb, from);
3357 up_read(&cinode->lock_sem);
3358 inode_unlock(inode);
3361 rc = generic_write_sync(iocb, rc);
3366 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3368 struct inode *inode = file_inode(iocb->ki_filp);
3369 struct cifsInodeInfo *cinode = CIFS_I(inode);
3370 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3371 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3372 iocb->ki_filp->private_data;
3373 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3376 written = cifs_get_writer(cinode);
3380 if (CIFS_CACHE_WRITE(cinode)) {
3381 if (cap_unix(tcon->ses) &&
3382 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3383 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3384 written = generic_file_write_iter(iocb, from);
3387 written = cifs_writev(iocb, from);
3391 * For non-oplocked files in strict cache mode we need to write the data
3392 * to the server exactly from the pos to pos+len-1 rather than flush all
3393 * affected pages because it may cause a error with mandatory locks on
3394 * these pages but not on the region from pos to ppos+len-1.
3396 written = cifs_user_writev(iocb, from);
3397 if (CIFS_CACHE_READ(cinode)) {
3399 * We have read level caching and we have just sent a write
3400 * request to the server thus making data in the cache stale.
3401 * Zap the cache and set oplock/lease level to NONE to avoid
3402 * reading stale data from the cache. All subsequent read
3403 * operations will read new data from the server.
3405 cifs_zap_mapping(inode);
3406 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3411 cifs_put_writer(cinode);
3415 static struct cifs_readdata *
3416 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3418 struct cifs_readdata *rdata;
3420 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3421 if (rdata != NULL) {
3422 rdata->pages = pages;
3423 kref_init(&rdata->refcount);
3424 INIT_LIST_HEAD(&rdata->list);
3425 init_completion(&rdata->done);
3426 INIT_WORK(&rdata->work, complete);
3432 static struct cifs_readdata *
3433 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3435 struct page **pages =
3436 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3437 struct cifs_readdata *ret = NULL;
3440 ret = cifs_readdata_direct_alloc(pages, complete);
3449 cifs_readdata_release(struct kref *refcount)
3451 struct cifs_readdata *rdata = container_of(refcount,
3452 struct cifs_readdata, refcount);
3453 #ifdef CONFIG_CIFS_SMB_DIRECT
3455 smbd_deregister_mr(rdata->mr);
3460 cifsFileInfo_put(rdata->cfile);
3462 kvfree(rdata->pages);
3467 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3473 for (i = 0; i < nr_pages; i++) {
3474 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3479 rdata->pages[i] = page;
3483 unsigned int nr_page_failed = i;
3485 for (i = 0; i < nr_page_failed; i++) {
3486 put_page(rdata->pages[i]);
3487 rdata->pages[i] = NULL;
3494 cifs_uncached_readdata_release(struct kref *refcount)
3496 struct cifs_readdata *rdata = container_of(refcount,
3497 struct cifs_readdata, refcount);
3500 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3501 for (i = 0; i < rdata->nr_pages; i++) {
3502 put_page(rdata->pages[i]);
3504 cifs_readdata_release(refcount);
3508 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3509 * @rdata: the readdata response with list of pages holding data
3510 * @iter: destination for our data
3512 * This function copies data from a list of pages in a readdata response into
3513 * an array of iovecs. It will first calculate where the data should go
3514 * based on the info in the readdata and then copy the data into that spot.
3517 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3519 size_t remaining = rdata->got_bytes;
3522 for (i = 0; i < rdata->nr_pages; i++) {
3523 struct page *page = rdata->pages[i];
3524 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3527 if (unlikely(iov_iter_is_pipe(iter))) {
3528 void *addr = kmap_atomic(page);
3530 written = copy_to_iter(addr, copy, iter);
3531 kunmap_atomic(addr);
3533 written = copy_page_to_iter(page, 0, copy, iter);
3534 remaining -= written;
3535 if (written < copy && iov_iter_count(iter) > 0)
3538 return remaining ? -EFAULT : 0;
3541 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3544 cifs_uncached_readv_complete(struct work_struct *work)
3546 struct cifs_readdata *rdata = container_of(work,
3547 struct cifs_readdata, work);
3549 complete(&rdata->done);
3550 collect_uncached_read_data(rdata->ctx);
3551 /* the below call can possibly free the last ref to aio ctx */
3552 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3556 uncached_fill_pages(struct TCP_Server_Info *server,
3557 struct cifs_readdata *rdata, struct iov_iter *iter,
3562 unsigned int nr_pages = rdata->nr_pages;
3563 unsigned int page_offset = rdata->page_offset;
3565 rdata->got_bytes = 0;
3566 rdata->tailsz = PAGE_SIZE;
3567 for (i = 0; i < nr_pages; i++) {
3568 struct page *page = rdata->pages[i];
3570 unsigned int segment_size = rdata->pagesz;
3573 segment_size -= page_offset;
3579 /* no need to hold page hostage */
3580 rdata->pages[i] = NULL;
3587 if (len >= segment_size)
3588 /* enough data to fill the page */
3591 rdata->tailsz = len;
3595 result = copy_page_from_iter(
3596 page, page_offset, n, iter);
3597 #ifdef CONFIG_CIFS_SMB_DIRECT
3602 result = cifs_read_page_from_socket(
3603 server, page, page_offset, n);
3607 rdata->got_bytes += result;
3610 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3611 rdata->got_bytes : result;
3615 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3616 struct cifs_readdata *rdata, unsigned int len)
3618 return uncached_fill_pages(server, rdata, NULL, len);
3622 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3623 struct cifs_readdata *rdata,
3624 struct iov_iter *iter)
3626 return uncached_fill_pages(server, rdata, iter, iter->count);
3629 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3630 struct list_head *rdata_list,
3631 struct cifs_aio_ctx *ctx)
3634 struct cifs_credits credits;
3636 struct TCP_Server_Info *server;
3638 /* XXX: should we pick a new channel here? */
3639 server = rdata->server;
3642 if (rdata->cfile->invalidHandle) {
3643 rc = cifs_reopen_file(rdata->cfile, true);
3651 * Wait for credits to resend this rdata.
3652 * Note: we are attempting to resend the whole rdata not in
3656 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3662 if (rsize < rdata->bytes) {
3663 add_credits_and_wake_if(server, &credits, 0);
3666 } while (rsize < rdata->bytes);
3667 rdata->credits = credits;
3669 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3671 if (rdata->cfile->invalidHandle)
3674 #ifdef CONFIG_CIFS_SMB_DIRECT
3676 rdata->mr->need_invalidate = true;
3677 smbd_deregister_mr(rdata->mr);
3681 rc = server->ops->async_readv(rdata);
3685 /* If the read was successfully sent, we are done */
3687 /* Add to aio pending list */
3688 list_add_tail(&rdata->list, rdata_list);
3692 /* Roll back credits and retry if needed */
3693 add_credits_and_wake_if(server, &rdata->credits, 0);
3694 } while (rc == -EAGAIN);
3697 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3702 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3703 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3704 struct cifs_aio_ctx *ctx)
3706 struct cifs_readdata *rdata;
3707 unsigned int npages, rsize;
3708 struct cifs_credits credits_on_stack;
3709 struct cifs_credits *credits = &credits_on_stack;
3713 struct TCP_Server_Info *server;
3714 struct page **pagevec;
3716 struct iov_iter direct_iov = ctx->iter;
3718 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3720 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3721 pid = open_file->pid;
3723 pid = current->tgid;
3726 iov_iter_advance(&direct_iov, offset - ctx->pos);
3729 if (open_file->invalidHandle) {
3730 rc = cifs_reopen_file(open_file, true);
3737 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3742 cur_len = min_t(const size_t, len, rsize);
3744 if (ctx->direct_io) {
3747 result = iov_iter_get_pages_alloc(
3748 &direct_iov, &pagevec,
3752 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3753 result, iov_iter_type(&direct_iov),
3754 direct_iov.iov_offset,
3759 add_credits_and_wake_if(server, credits, 0);
3762 cur_len = (size_t)result;
3763 iov_iter_advance(&direct_iov, cur_len);
3765 rdata = cifs_readdata_direct_alloc(
3766 pagevec, cifs_uncached_readv_complete);
3768 add_credits_and_wake_if(server, credits, 0);
3773 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3774 rdata->page_offset = start;
3775 rdata->tailsz = npages > 1 ?
3776 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3781 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3782 /* allocate a readdata struct */
3783 rdata = cifs_readdata_alloc(npages,
3784 cifs_uncached_readv_complete);
3786 add_credits_and_wake_if(server, credits, 0);
3791 rc = cifs_read_allocate_pages(rdata, npages);
3793 kvfree(rdata->pages);
3795 add_credits_and_wake_if(server, credits, 0);
3799 rdata->tailsz = PAGE_SIZE;
3802 rdata->server = server;
3803 rdata->cfile = cifsFileInfo_get(open_file);
3804 rdata->nr_pages = npages;
3805 rdata->offset = offset;
3806 rdata->bytes = cur_len;
3808 rdata->pagesz = PAGE_SIZE;
3809 rdata->read_into_pages = cifs_uncached_read_into_pages;
3810 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3811 rdata->credits = credits_on_stack;
3813 kref_get(&ctx->refcount);
3815 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3818 if (rdata->cfile->invalidHandle)
3821 rc = server->ops->async_readv(rdata);
3825 add_credits_and_wake_if(server, &rdata->credits, 0);
3826 kref_put(&rdata->refcount,
3827 cifs_uncached_readdata_release);
3828 if (rc == -EAGAIN) {
3829 iov_iter_revert(&direct_iov, cur_len);
3835 list_add_tail(&rdata->list, rdata_list);
3844 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3846 struct cifs_readdata *rdata, *tmp;
3847 struct iov_iter *to = &ctx->iter;
3848 struct cifs_sb_info *cifs_sb;
3851 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3853 mutex_lock(&ctx->aio_mutex);
3855 if (list_empty(&ctx->list)) {
3856 mutex_unlock(&ctx->aio_mutex);
3861 /* the loop below should proceed in the order of increasing offsets */
3863 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3865 if (!try_wait_for_completion(&rdata->done)) {
3866 mutex_unlock(&ctx->aio_mutex);
3870 if (rdata->result == -EAGAIN) {
3871 /* resend call if it's a retryable error */
3872 struct list_head tmp_list;
3873 unsigned int got_bytes = rdata->got_bytes;
3875 list_del_init(&rdata->list);
3876 INIT_LIST_HEAD(&tmp_list);
3879 * Got a part of data and then reconnect has
3880 * happened -- fill the buffer and continue
3883 if (got_bytes && got_bytes < rdata->bytes) {
3885 if (!ctx->direct_io)
3886 rc = cifs_readdata_to_iov(rdata, to);
3888 kref_put(&rdata->refcount,
3889 cifs_uncached_readdata_release);
3894 if (ctx->direct_io) {
3896 * Re-use rdata as this is a
3899 rc = cifs_resend_rdata(
3903 rc = cifs_send_async_read(
3904 rdata->offset + got_bytes,
3905 rdata->bytes - got_bytes,
3906 rdata->cfile, cifs_sb,
3909 kref_put(&rdata->refcount,
3910 cifs_uncached_readdata_release);
3913 list_splice(&tmp_list, &ctx->list);
3916 } else if (rdata->result)
3918 else if (!ctx->direct_io)
3919 rc = cifs_readdata_to_iov(rdata, to);
3921 /* if there was a short read -- discard anything left */
3922 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3925 ctx->total_len += rdata->got_bytes;
3927 list_del_init(&rdata->list);
3928 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3931 if (!ctx->direct_io)
3932 ctx->total_len = ctx->len - iov_iter_count(to);
3934 /* mask nodata case */
3938 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3940 mutex_unlock(&ctx->aio_mutex);
3942 if (ctx->iocb && ctx->iocb->ki_complete)
3943 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3945 complete(&ctx->done);
3948 static ssize_t __cifs_readv(
3949 struct kiocb *iocb, struct iov_iter *to, bool direct)
3952 struct file *file = iocb->ki_filp;
3953 struct cifs_sb_info *cifs_sb;
3954 struct cifsFileInfo *cfile;
3955 struct cifs_tcon *tcon;
3956 ssize_t rc, total_read = 0;
3957 loff_t offset = iocb->ki_pos;
3958 struct cifs_aio_ctx *ctx;
3961 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3962 * fall back to data copy read path
3963 * this could be improved by getting pages directly in ITER_KVEC
3965 if (direct && iov_iter_is_kvec(to)) {
3966 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3970 len = iov_iter_count(to);
3974 cifs_sb = CIFS_FILE_SB(file);
3975 cfile = file->private_data;
3976 tcon = tlink_tcon(cfile->tlink);
3978 if (!tcon->ses->server->ops->async_readv)
3981 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3982 cifs_dbg(FYI, "attempting read on write only file instance\n");
3984 ctx = cifs_aio_ctx_alloc();
3988 ctx->cfile = cifsFileInfo_get(cfile);
3990 if (!is_sync_kiocb(iocb))
3993 if (iter_is_iovec(to))
3994 ctx->should_dirty = true;
3998 ctx->direct_io = true;
4002 rc = setup_aio_ctx_iter(ctx, to, READ);
4004 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4010 /* grab a lock here due to read response handlers can access ctx */
4011 mutex_lock(&ctx->aio_mutex);
4013 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4015 /* if at least one read request send succeeded, then reset rc */
4016 if (!list_empty(&ctx->list))
4019 mutex_unlock(&ctx->aio_mutex);
4022 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4026 if (!is_sync_kiocb(iocb)) {
4027 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4028 return -EIOCBQUEUED;
4031 rc = wait_for_completion_killable(&ctx->done);
4033 mutex_lock(&ctx->aio_mutex);
4034 ctx->rc = rc = -EINTR;
4035 total_read = ctx->total_len;
4036 mutex_unlock(&ctx->aio_mutex);
4039 total_read = ctx->total_len;
4042 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4045 iocb->ki_pos += total_read;
4051 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4053 return __cifs_readv(iocb, to, true);
4056 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4058 return __cifs_readv(iocb, to, false);
4062 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4064 struct inode *inode = file_inode(iocb->ki_filp);
4065 struct cifsInodeInfo *cinode = CIFS_I(inode);
4066 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4067 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4068 iocb->ki_filp->private_data;
4069 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4073 * In strict cache mode we need to read from the server all the time
4074 * if we don't have level II oplock because the server can delay mtime
4075 * change - so we can't make a decision about inode invalidating.
4076 * And we can also fail with pagereading if there are mandatory locks
4077 * on pages affected by this read but not on the region from pos to
4080 if (!CIFS_CACHE_READ(cinode))
4081 return cifs_user_readv(iocb, to);
4083 if (cap_unix(tcon->ses) &&
4084 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4085 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4086 return generic_file_read_iter(iocb, to);
4089 * We need to hold the sem to be sure nobody modifies lock list
4090 * with a brlock that prevents reading.
4092 down_read(&cinode->lock_sem);
4093 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4094 tcon->ses->server->vals->shared_lock_type,
4095 0, NULL, CIFS_READ_OP))
4096 rc = generic_file_read_iter(iocb, to);
4097 up_read(&cinode->lock_sem);
4102 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4105 unsigned int bytes_read = 0;
4106 unsigned int total_read;
4107 unsigned int current_read_size;
4109 struct cifs_sb_info *cifs_sb;
4110 struct cifs_tcon *tcon;
4111 struct TCP_Server_Info *server;
4114 struct cifsFileInfo *open_file;
4115 struct cifs_io_parms io_parms = {0};
4116 int buf_type = CIFS_NO_BUFFER;
4120 cifs_sb = CIFS_FILE_SB(file);
4122 /* FIXME: set up handlers for larger reads and/or convert to async */
4123 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4125 if (file->private_data == NULL) {
4130 open_file = file->private_data;
4131 tcon = tlink_tcon(open_file->tlink);
4132 server = cifs_pick_channel(tcon->ses);
4134 if (!server->ops->sync_read) {
4139 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4140 pid = open_file->pid;
4142 pid = current->tgid;
4144 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4145 cifs_dbg(FYI, "attempting read on write only file instance\n");
4147 for (total_read = 0, cur_offset = read_data; read_size > total_read;
4148 total_read += bytes_read, cur_offset += bytes_read) {
4150 current_read_size = min_t(uint, read_size - total_read,
4153 * For windows me and 9x we do not want to request more
4154 * than it negotiated since it will refuse the read
4157 if (!(tcon->ses->capabilities &
4158 tcon->ses->server->vals->cap_large_files)) {
4159 current_read_size = min_t(uint,
4160 current_read_size, CIFSMaxBufSize);
4162 if (open_file->invalidHandle) {
4163 rc = cifs_reopen_file(open_file, true);
4168 io_parms.tcon = tcon;
4169 io_parms.offset = *offset;
4170 io_parms.length = current_read_size;
4171 io_parms.server = server;
4172 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4173 &bytes_read, &cur_offset,
4175 } while (rc == -EAGAIN);
4177 if (rc || (bytes_read == 0)) {
4185 cifs_stats_bytes_read(tcon, total_read);
4186 *offset += bytes_read;
4194 * If the page is mmap'ed into a process' page tables, then we need to make
4195 * sure that it doesn't change while being written back.
4198 cifs_page_mkwrite(struct vm_fault *vmf)
4200 struct page *page = vmf->page;
4201 struct file *file = vmf->vma->vm_file;
4202 struct inode *inode = file_inode(file);
4204 cifs_fscache_wait_on_page_write(inode, page);
4207 return VM_FAULT_LOCKED;
4210 static const struct vm_operations_struct cifs_file_vm_ops = {
4211 .fault = filemap_fault,
4212 .map_pages = filemap_map_pages,
4213 .page_mkwrite = cifs_page_mkwrite,
4216 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4219 struct inode *inode = file_inode(file);
4223 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4224 rc = cifs_zap_mapping(inode);
4226 rc = generic_file_mmap(file, vma);
4228 vma->vm_ops = &cifs_file_vm_ops;
4234 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4240 rc = cifs_revalidate_file(file);
4242 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4245 rc = generic_file_mmap(file, vma);
4247 vma->vm_ops = &cifs_file_vm_ops;
4254 cifs_readv_complete(struct work_struct *work)
4256 unsigned int i, got_bytes;
4257 struct cifs_readdata *rdata = container_of(work,
4258 struct cifs_readdata, work);
4260 got_bytes = rdata->got_bytes;
4261 for (i = 0; i < rdata->nr_pages; i++) {
4262 struct page *page = rdata->pages[i];
4264 lru_cache_add(page);
4266 if (rdata->result == 0 ||
4267 (rdata->result == -EAGAIN && got_bytes)) {
4268 flush_dcache_page(page);
4269 SetPageUptodate(page);
4275 if (rdata->result == 0 ||
4276 (rdata->result == -EAGAIN && got_bytes))
4277 cifs_readpage_to_fscache(rdata->mapping->host, page);
4279 cifs_fscache_uncache_page(rdata->mapping->host, page);
4281 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4284 rdata->pages[i] = NULL;
4286 kref_put(&rdata->refcount, cifs_readdata_release);
4290 readpages_fill_pages(struct TCP_Server_Info *server,
4291 struct cifs_readdata *rdata, struct iov_iter *iter,
4298 unsigned int nr_pages = rdata->nr_pages;
4299 unsigned int page_offset = rdata->page_offset;
4301 /* determine the eof that the server (probably) has */
4302 eof = CIFS_I(rdata->mapping->host)->server_eof;
4303 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4304 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4306 rdata->got_bytes = 0;
4307 rdata->tailsz = PAGE_SIZE;
4308 for (i = 0; i < nr_pages; i++) {
4309 struct page *page = rdata->pages[i];
4310 unsigned int to_read = rdata->pagesz;
4314 to_read -= page_offset;
4320 if (len >= to_read) {
4322 } else if (len > 0) {
4323 /* enough for partial page, fill and zero the rest */
4324 zero_user(page, len + page_offset, to_read - len);
4325 n = rdata->tailsz = len;
4327 } else if (page->index > eof_index) {
4329 * The VFS will not try to do readahead past the
4330 * i_size, but it's possible that we have outstanding
4331 * writes with gaps in the middle and the i_size hasn't
4332 * caught up yet. Populate those with zeroed out pages
4333 * to prevent the VFS from repeatedly attempting to
4334 * fill them until the writes are flushed.
4336 zero_user(page, 0, PAGE_SIZE);
4337 lru_cache_add(page);
4338 flush_dcache_page(page);
4339 SetPageUptodate(page);
4342 rdata->pages[i] = NULL;
4346 /* no need to hold page hostage */
4347 lru_cache_add(page);
4350 rdata->pages[i] = NULL;
4356 result = copy_page_from_iter(
4357 page, page_offset, n, iter);
4358 #ifdef CONFIG_CIFS_SMB_DIRECT
4363 result = cifs_read_page_from_socket(
4364 server, page, page_offset, n);
4368 rdata->got_bytes += result;
4371 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4372 rdata->got_bytes : result;
4376 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4377 struct cifs_readdata *rdata, unsigned int len)
4379 return readpages_fill_pages(server, rdata, NULL, len);
4383 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4384 struct cifs_readdata *rdata,
4385 struct iov_iter *iter)
4387 return readpages_fill_pages(server, rdata, iter, iter->count);
4391 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4392 unsigned int rsize, struct list_head *tmplist,
4393 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4395 struct page *page, *tpage;
4396 unsigned int expected_index;
4398 gfp_t gfp = readahead_gfp_mask(mapping);
4400 INIT_LIST_HEAD(tmplist);
4402 page = lru_to_page(page_list);
4405 * Lock the page and put it in the cache. Since no one else
4406 * should have access to this page, we're safe to simply set
4407 * PG_locked without checking it first.
4409 __SetPageLocked(page);
4410 rc = add_to_page_cache_locked(page, mapping,
4413 /* give up if we can't stick it in the cache */
4415 __ClearPageLocked(page);
4419 /* move first page to the tmplist */
4420 *offset = (loff_t)page->index << PAGE_SHIFT;
4423 list_move_tail(&page->lru, tmplist);
4425 /* now try and add more pages onto the request */
4426 expected_index = page->index + 1;
4427 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4428 /* discontinuity ? */
4429 if (page->index != expected_index)
4432 /* would this page push the read over the rsize? */
4433 if (*bytes + PAGE_SIZE > rsize)
4436 __SetPageLocked(page);
4437 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4439 __ClearPageLocked(page);
4442 list_move_tail(&page->lru, tmplist);
4443 (*bytes) += PAGE_SIZE;
4450 static int cifs_readpages(struct file *file, struct address_space *mapping,
4451 struct list_head *page_list, unsigned num_pages)
4455 struct list_head tmplist;
4456 struct cifsFileInfo *open_file = file->private_data;
4457 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4458 struct TCP_Server_Info *server;
4464 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4465 * immediately if the cookie is negative
4467 * After this point, every page in the list might have PG_fscache set,
4468 * so we will need to clean that up off of every page we don't use.
4470 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4477 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4478 pid = open_file->pid;
4480 pid = current->tgid;
4483 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4485 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4486 __func__, file, mapping, num_pages);
4489 * Start with the page at end of list and move it to private
4490 * list. Do the same with any following pages until we hit
4491 * the rsize limit, hit an index discontinuity, or run out of
4492 * pages. Issue the async read and then start the loop again
4493 * until the list is empty.
4495 * Note that list order is important. The page_list is in
4496 * the order of declining indexes. When we put the pages in
4497 * the rdata->pages, then we want them in increasing order.
4499 while (!list_empty(page_list) && !err) {
4500 unsigned int i, nr_pages, bytes, rsize;
4502 struct page *page, *tpage;
4503 struct cifs_readdata *rdata;
4504 struct cifs_credits credits_on_stack;
4505 struct cifs_credits *credits = &credits_on_stack;
4507 if (open_file->invalidHandle) {
4508 rc = cifs_reopen_file(open_file, true);
4515 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4521 * Give up immediately if rsize is too small to read an entire
4522 * page. The VFS will fall back to readpage. We should never
4523 * reach this point however since we set ra_pages to 0 when the
4524 * rsize is smaller than a cache page.
4526 if (unlikely(rsize < PAGE_SIZE)) {
4527 add_credits_and_wake_if(server, credits, 0);
4533 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4534 &nr_pages, &offset, &bytes);
4536 add_credits_and_wake_if(server, credits, 0);
4540 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4542 /* best to give up if we're out of mem */
4543 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4544 list_del(&page->lru);
4545 lru_cache_add(page);
4550 add_credits_and_wake_if(server, credits, 0);
4554 rdata->cfile = cifsFileInfo_get(open_file);
4555 rdata->server = server;
4556 rdata->mapping = mapping;
4557 rdata->offset = offset;
4558 rdata->bytes = bytes;
4560 rdata->pagesz = PAGE_SIZE;
4561 rdata->tailsz = PAGE_SIZE;
4562 rdata->read_into_pages = cifs_readpages_read_into_pages;
4563 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4564 rdata->credits = credits_on_stack;
4566 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4567 list_del(&page->lru);
4568 rdata->pages[rdata->nr_pages++] = page;
4571 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4574 if (rdata->cfile->invalidHandle)
4577 rc = server->ops->async_readv(rdata);
4581 add_credits_and_wake_if(server, &rdata->credits, 0);
4582 for (i = 0; i < rdata->nr_pages; i++) {
4583 page = rdata->pages[i];
4584 lru_cache_add(page);
4588 /* Fallback to the readpage in error/reconnect cases */
4589 kref_put(&rdata->refcount, cifs_readdata_release);
4593 kref_put(&rdata->refcount, cifs_readdata_release);
4596 /* Any pages that have been shown to fscache but didn't get added to
4597 * the pagecache must be uncached before they get returned to the
4600 cifs_fscache_readpages_cancel(mapping->host, page_list);
4606 * cifs_readpage_worker must be called with the page pinned
4608 static int cifs_readpage_worker(struct file *file, struct page *page,
4614 /* Is the page cached? */
4615 rc = cifs_readpage_from_fscache(file_inode(file), page);
4619 read_data = kmap(page);
4620 /* for reads over a certain size could initiate async read ahead */
4622 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4627 cifs_dbg(FYI, "Bytes read %d\n", rc);
4629 /* we do not want atime to be less than mtime, it broke some apps */
4630 file_inode(file)->i_atime = current_time(file_inode(file));
4631 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4632 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4634 file_inode(file)->i_atime = current_time(file_inode(file));
4637 memset(read_data + rc, 0, PAGE_SIZE - rc);
4639 flush_dcache_page(page);
4640 SetPageUptodate(page);
4642 /* send this page to the cache */
4643 cifs_readpage_to_fscache(file_inode(file), page);
4655 static int cifs_readpage(struct file *file, struct page *page)
4657 loff_t offset = page_file_offset(page);
4663 if (file->private_data == NULL) {
4669 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4670 page, (int)offset, (int)offset);
4672 rc = cifs_readpage_worker(file, page, &offset);
4678 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4680 struct cifsFileInfo *open_file;
4682 spin_lock(&cifs_inode->open_file_lock);
4683 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4684 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4685 spin_unlock(&cifs_inode->open_file_lock);
4689 spin_unlock(&cifs_inode->open_file_lock);
4693 /* We do not want to update the file size from server for inodes
4694 open for write - to avoid races with writepage extending
4695 the file - in the future we could consider allowing
4696 refreshing the inode only on increases in the file size
4697 but this is tricky to do without racing with writebehind
4698 page caching in the current Linux kernel design */
4699 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4704 if (is_inode_writable(cifsInode)) {
4705 /* This inode is open for write at least once */
4706 struct cifs_sb_info *cifs_sb;
4708 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4709 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4710 /* since no page cache to corrupt on directio
4711 we can change size safely */
4715 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4723 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4724 loff_t pos, unsigned len, unsigned flags,
4725 struct page **pagep, void **fsdata)
4728 pgoff_t index = pos >> PAGE_SHIFT;
4729 loff_t offset = pos & (PAGE_SIZE - 1);
4730 loff_t page_start = pos & PAGE_MASK;
4735 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4738 page = grab_cache_page_write_begin(mapping, index, flags);
4744 if (PageUptodate(page))
4748 * If we write a full page it will be up to date, no need to read from
4749 * the server. If the write is short, we'll end up doing a sync write
4752 if (len == PAGE_SIZE)
4756 * optimize away the read when we have an oplock, and we're not
4757 * expecting to use any of the data we'd be reading in. That
4758 * is, when the page lies beyond the EOF, or straddles the EOF
4759 * and the write will cover all of the existing data.
4761 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4762 i_size = i_size_read(mapping->host);
4763 if (page_start >= i_size ||
4764 (offset == 0 && (pos + len) >= i_size)) {
4765 zero_user_segments(page, 0, offset,
4769 * PageChecked means that the parts of the page
4770 * to which we're not writing are considered up
4771 * to date. Once the data is copied to the
4772 * page, it can be set uptodate.
4774 SetPageChecked(page);
4779 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4781 * might as well read a page, it is fast enough. If we get
4782 * an error, we don't need to return it. cifs_write_end will
4783 * do a sync write instead since PG_uptodate isn't set.
4785 cifs_readpage_worker(file, page, &page_start);
4790 /* we could try using another file handle if there is one -
4791 but how would we lock it to prevent close of that handle
4792 racing with this read? In any case
4793 this will be written out by write_end so is fine */
4800 static int cifs_release_page(struct page *page, gfp_t gfp)
4802 if (PagePrivate(page))
4805 return cifs_fscache_release_page(page, gfp);
4808 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4809 unsigned int length)
4811 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4813 if (offset == 0 && length == PAGE_SIZE)
4814 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4817 static int cifs_launder_page(struct page *page)
4820 loff_t range_start = page_offset(page);
4821 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4822 struct writeback_control wbc = {
4823 .sync_mode = WB_SYNC_ALL,
4825 .range_start = range_start,
4826 .range_end = range_end,
4829 cifs_dbg(FYI, "Launder page: %p\n", page);
4831 if (clear_page_dirty_for_io(page))
4832 rc = cifs_writepage_locked(page, &wbc);
4834 cifs_fscache_invalidate_page(page, page->mapping->host);
4838 void cifs_oplock_break(struct work_struct *work)
4840 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4842 struct inode *inode = d_inode(cfile->dentry);
4843 struct cifsInodeInfo *cinode = CIFS_I(inode);
4844 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4845 struct TCP_Server_Info *server = tcon->ses->server;
4847 bool purge_cache = false;
4848 bool is_deferred = false;
4849 struct cifs_deferred_close *dclose;
4851 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4852 TASK_UNINTERRUPTIBLE);
4854 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4855 cfile->oplock_epoch, &purge_cache);
4857 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4858 cifs_has_mand_locks(cinode)) {
4859 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4864 if (inode && S_ISREG(inode->i_mode)) {
4865 if (CIFS_CACHE_READ(cinode))
4866 break_lease(inode, O_RDONLY);
4868 break_lease(inode, O_WRONLY);
4869 rc = filemap_fdatawrite(inode->i_mapping);
4870 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4871 rc = filemap_fdatawait(inode->i_mapping);
4872 mapping_set_error(inode->i_mapping, rc);
4873 cifs_zap_mapping(inode);
4875 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4876 if (CIFS_CACHE_WRITE(cinode))
4877 goto oplock_break_ack;
4880 rc = cifs_push_locks(cfile);
4882 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4886 * When oplock break is received and there are no active
4887 * file handles but cached, then schedule deferred close immediately.
4888 * So, new open will not use cached handle.
4890 spin_lock(&CIFS_I(inode)->deferred_lock);
4891 is_deferred = cifs_is_deferred_close(cfile, &dclose);
4892 spin_unlock(&CIFS_I(inode)->deferred_lock);
4894 cfile->deferred_close_scheduled &&
4895 delayed_work_pending(&cfile->deferred)) {
4896 if (cancel_delayed_work(&cfile->deferred)) {
4897 _cifsFileInfo_put(cfile, false, false);
4898 goto oplock_break_done;
4902 * releasing stale oplock after recent reconnect of smb session using
4903 * a now incorrect file handle is not a data integrity issue but do
4904 * not bother sending an oplock release if session to server still is
4905 * disconnected since oplock already released by the server
4907 if (!cfile->oplock_break_cancelled) {
4908 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4910 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4913 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4914 cifs_done_oplock_break(cinode);
4918 * The presence of cifs_direct_io() in the address space ops vector
4919 * allowes open() O_DIRECT flags which would have failed otherwise.
4921 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4922 * so this method should never be called.
4924 * Direct IO is not yet supported in the cached mode.
4927 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4931 * Eventually need to support direct IO for non forcedirectio mounts
4936 static int cifs_swap_activate(struct swap_info_struct *sis,
4937 struct file *swap_file, sector_t *span)
4939 struct cifsFileInfo *cfile = swap_file->private_data;
4940 struct inode *inode = swap_file->f_mapping->host;
4941 unsigned long blocks;
4944 cifs_dbg(FYI, "swap activate\n");
4946 spin_lock(&inode->i_lock);
4947 blocks = inode->i_blocks;
4948 isize = inode->i_size;
4949 spin_unlock(&inode->i_lock);
4950 if (blocks*512 < isize) {
4951 pr_warn("swap activate: swapfile has holes\n");
4956 pr_warn_once("Swap support over SMB3 is experimental\n");
4959 * TODO: consider adding ACL (or documenting how) to prevent other
4960 * users (on this or other systems) from reading it
4964 /* TODO: add sk_set_memalloc(inet) or similar */
4967 cfile->swapfile = true;
4969 * TODO: Since file already open, we can't open with DENY_ALL here
4970 * but we could add call to grab a byte range lock to prevent others
4971 * from reading or writing the file
4977 static void cifs_swap_deactivate(struct file *file)
4979 struct cifsFileInfo *cfile = file->private_data;
4981 cifs_dbg(FYI, "swap deactivate\n");
4983 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4986 cfile->swapfile = false;
4988 /* do we need to unpin (or unlock) the file */
4991 const struct address_space_operations cifs_addr_ops = {
4992 .readpage = cifs_readpage,
4993 .readpages = cifs_readpages,
4994 .writepage = cifs_writepage,
4995 .writepages = cifs_writepages,
4996 .write_begin = cifs_write_begin,
4997 .write_end = cifs_write_end,
4998 .set_page_dirty = __set_page_dirty_nobuffers,
4999 .releasepage = cifs_release_page,
5000 .direct_IO = cifs_direct_io,
5001 .invalidatepage = cifs_invalidate_page,
5002 .launder_page = cifs_launder_page,
5004 * TODO: investigate and if useful we could add an cifs_migratePage
5005 * helper (under an CONFIG_MIGRATION) in the future, and also
5006 * investigate and add an is_dirty_writeback helper if needed
5008 .swap_activate = cifs_swap_activate,
5009 .swap_deactivate = cifs_swap_deactivate,
5013 * cifs_readpages requires the server to support a buffer large enough to
5014 * contain the header plus one complete page of data. Otherwise, we need
5015 * to leave cifs_readpages out of the address space operations.
5017 const struct address_space_operations cifs_addr_ops_smallbuf = {
5018 .readpage = cifs_readpage,
5019 .writepage = cifs_writepage,
5020 .writepages = cifs_writepages,
5021 .write_begin = cifs_write_begin,
5022 .write_end = cifs_write_end,
5023 .set_page_dirty = __set_page_dirty_nobuffers,
5024 .releasepage = cifs_release_page,
5025 .invalidatepage = cifs_invalidate_page,
5026 .launder_page = cifs_launder_page,