1 // SPDX-License-Identifier: LGPL-2.1
5 * vfs operations that deal with files
7 * Copyright (C) International Business Machines Corp., 2002,2010
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
25 #include <asm/div64.h>
29 #include "cifsproto.h"
30 #include "cifs_unicode.h"
31 #include "cifs_debug.h"
32 #include "cifs_fs_sb.h"
34 #include "smbdirect.h"
35 #include "fs_context.h"
36 #include "cifs_ioctl.h"
38 static inline int cifs_convert_flags(unsigned int flags)
40 if ((flags & O_ACCMODE) == O_RDONLY)
42 else if ((flags & O_ACCMODE) == O_WRONLY)
44 else if ((flags & O_ACCMODE) == O_RDWR) {
45 /* GENERIC_ALL is too much permission to request
46 can cause unnecessary access denied on create */
47 /* return GENERIC_ALL; */
48 return (GENERIC_READ | GENERIC_WRITE);
51 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
52 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
56 static u32 cifs_posix_convert_flags(unsigned int flags)
60 if ((flags & O_ACCMODE) == O_RDONLY)
61 posix_flags = SMB_O_RDONLY;
62 else if ((flags & O_ACCMODE) == O_WRONLY)
63 posix_flags = SMB_O_WRONLY;
64 else if ((flags & O_ACCMODE) == O_RDWR)
65 posix_flags = SMB_O_RDWR;
67 if (flags & O_CREAT) {
68 posix_flags |= SMB_O_CREAT;
70 posix_flags |= SMB_O_EXCL;
71 } else if (flags & O_EXCL)
72 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
73 current->comm, current->tgid);
76 posix_flags |= SMB_O_TRUNC;
77 /* be safe and imply O_SYNC for O_DSYNC */
79 posix_flags |= SMB_O_SYNC;
80 if (flags & O_DIRECTORY)
81 posix_flags |= SMB_O_DIRECTORY;
82 if (flags & O_NOFOLLOW)
83 posix_flags |= SMB_O_NOFOLLOW;
85 posix_flags |= SMB_O_DIRECT;
90 static inline int cifs_get_disposition(unsigned int flags)
92 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
94 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
95 return FILE_OVERWRITE_IF;
96 else if ((flags & O_CREAT) == O_CREAT)
98 else if ((flags & O_TRUNC) == O_TRUNC)
99 return FILE_OVERWRITE;
104 int cifs_posix_open(const char *full_path, struct inode **pinode,
105 struct super_block *sb, int mode, unsigned int f_flags,
106 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
109 FILE_UNIX_BASIC_INFO *presp_data;
110 __u32 posix_flags = 0;
111 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
112 struct cifs_fattr fattr;
113 struct tcon_link *tlink;
114 struct cifs_tcon *tcon;
116 cifs_dbg(FYI, "posix open %s\n", full_path);
118 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
119 if (presp_data == NULL)
122 tlink = cifs_sb_tlink(cifs_sb);
128 tcon = tlink_tcon(tlink);
129 mode &= ~current_umask();
131 posix_flags = cifs_posix_convert_flags(f_flags);
132 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
133 poplock, full_path, cifs_sb->local_nls,
134 cifs_remap(cifs_sb));
135 cifs_put_tlink(tlink);
140 if (presp_data->Type == cpu_to_le32(-1))
141 goto posix_open_ret; /* open ok, caller does qpathinfo */
144 goto posix_open_ret; /* caller does not need info */
146 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
148 /* get new inode and set it up */
149 if (*pinode == NULL) {
150 cifs_fill_uniqueid(sb, &fattr);
151 *pinode = cifs_iget(sb, &fattr);
157 cifs_revalidate_mapping(*pinode);
158 rc = cifs_fattr_to_inode(*pinode, &fattr);
167 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
168 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
169 struct cifs_fid *fid, unsigned int xid)
174 int create_options = CREATE_NOT_DIR;
176 struct TCP_Server_Info *server = tcon->ses->server;
177 struct cifs_open_parms oparms;
179 if (!server->ops->open)
182 desired_access = cifs_convert_flags(f_flags);
184 /*********************************************************************
185 * open flag mapping table:
187 * POSIX Flag CIFS Disposition
188 * ---------- ----------------
189 * O_CREAT FILE_OPEN_IF
190 * O_CREAT | O_EXCL FILE_CREATE
191 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
192 * O_TRUNC FILE_OVERWRITE
193 * none of the above FILE_OPEN
195 * Note that there is not a direct match between disposition
196 * FILE_SUPERSEDE (ie create whether or not file exists although
197 * O_CREAT | O_TRUNC is similar but truncates the existing
198 * file rather than creating a new file as FILE_SUPERSEDE does
199 * (which uses the attributes / metadata passed in on open call)
201 *? O_SYNC is a reasonable match to CIFS writethrough flag
202 *? and the read write flags match reasonably. O_LARGEFILE
203 *? is irrelevant because largefile support is always used
204 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
205 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
206 *********************************************************************/
208 disposition = cifs_get_disposition(f_flags);
210 /* BB pass O_SYNC flag through on file attributes .. BB */
212 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
216 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
217 if (f_flags & O_SYNC)
218 create_options |= CREATE_WRITE_THROUGH;
220 if (f_flags & O_DIRECT)
221 create_options |= CREATE_NO_BUFFER;
224 oparms.cifs_sb = cifs_sb;
225 oparms.desired_access = desired_access;
226 oparms.create_options = cifs_create_options(cifs_sb, create_options);
227 oparms.disposition = disposition;
228 oparms.path = full_path;
230 oparms.reconnect = false;
232 rc = server->ops->open(xid, &oparms, oplock, buf);
237 /* TODO: Add support for calling posix query info but with passing in fid */
239 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
242 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
246 server->ops->close(xid, tcon, fid);
257 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
259 struct cifs_fid_locks *cur;
260 bool has_locks = false;
262 down_read(&cinode->lock_sem);
263 list_for_each_entry(cur, &cinode->llist, llist) {
264 if (!list_empty(&cur->locks)) {
269 up_read(&cinode->lock_sem);
274 cifs_down_write(struct rw_semaphore *sem)
276 while (!down_write_trylock(sem))
280 static void cifsFileInfo_put_work(struct work_struct *work);
282 struct cifsFileInfo *
283 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
284 struct tcon_link *tlink, __u32 oplock)
286 struct dentry *dentry = file_dentry(file);
287 struct inode *inode = d_inode(dentry);
288 struct cifsInodeInfo *cinode = CIFS_I(inode);
289 struct cifsFileInfo *cfile;
290 struct cifs_fid_locks *fdlocks;
291 struct cifs_tcon *tcon = tlink_tcon(tlink);
292 struct TCP_Server_Info *server = tcon->ses->server;
294 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
298 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
304 INIT_LIST_HEAD(&fdlocks->locks);
305 fdlocks->cfile = cfile;
306 cfile->llist = fdlocks;
309 cfile->pid = current->tgid;
310 cfile->uid = current_fsuid();
311 cfile->dentry = dget(dentry);
312 cfile->f_flags = file->f_flags;
313 cfile->invalidHandle = false;
314 cfile->deferred_close_scheduled = false;
315 cfile->tlink = cifs_get_tlink(tlink);
316 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
317 INIT_WORK(&cfile->put, cifsFileInfo_put_work);
318 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
319 mutex_init(&cfile->fh_mutex);
320 spin_lock_init(&cfile->file_info_lock);
322 cifs_sb_active(inode->i_sb);
325 * If the server returned a read oplock and we have mandatory brlocks,
326 * set oplock level to None.
328 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
329 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
333 cifs_down_write(&cinode->lock_sem);
334 list_add(&fdlocks->llist, &cinode->llist);
335 up_write(&cinode->lock_sem);
337 spin_lock(&tcon->open_file_lock);
338 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
339 oplock = fid->pending_open->oplock;
340 list_del(&fid->pending_open->olist);
342 fid->purge_cache = false;
343 server->ops->set_fid(cfile, fid, oplock);
345 list_add(&cfile->tlist, &tcon->openFileList);
346 atomic_inc(&tcon->num_local_opens);
348 /* if readable file instance put first in list*/
349 spin_lock(&cinode->open_file_lock);
350 if (file->f_mode & FMODE_READ)
351 list_add(&cfile->flist, &cinode->openFileList);
353 list_add_tail(&cfile->flist, &cinode->openFileList);
354 spin_unlock(&cinode->open_file_lock);
355 spin_unlock(&tcon->open_file_lock);
357 if (fid->purge_cache)
358 cifs_zap_mapping(inode);
360 file->private_data = cfile;
364 struct cifsFileInfo *
365 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
367 spin_lock(&cifs_file->file_info_lock);
368 cifsFileInfo_get_locked(cifs_file);
369 spin_unlock(&cifs_file->file_info_lock);
373 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
375 struct inode *inode = d_inode(cifs_file->dentry);
376 struct cifsInodeInfo *cifsi = CIFS_I(inode);
377 struct cifsLockInfo *li, *tmp;
378 struct super_block *sb = inode->i_sb;
380 cifs_fscache_release_inode_cookie(inode);
383 * Delete any outstanding lock records. We'll lose them when the file
386 cifs_down_write(&cifsi->lock_sem);
387 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
388 list_del(&li->llist);
389 cifs_del_lock_waiters(li);
392 list_del(&cifs_file->llist->llist);
393 kfree(cifs_file->llist);
394 up_write(&cifsi->lock_sem);
396 cifs_put_tlink(cifs_file->tlink);
397 dput(cifs_file->dentry);
398 cifs_sb_deactive(sb);
402 static void cifsFileInfo_put_work(struct work_struct *work)
404 struct cifsFileInfo *cifs_file = container_of(work,
405 struct cifsFileInfo, put);
407 cifsFileInfo_put_final(cifs_file);
411 * cifsFileInfo_put - release a reference of file priv data
413 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
415 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
417 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
419 _cifsFileInfo_put(cifs_file, true, true);
423 * _cifsFileInfo_put - release a reference of file priv data
425 * This may involve closing the filehandle @cifs_file out on the
426 * server. Must be called without holding tcon->open_file_lock,
427 * cinode->open_file_lock and cifs_file->file_info_lock.
429 * If @wait_for_oplock_handler is true and we are releasing the last
430 * reference, wait for any running oplock break handler of the file
431 * and cancel any pending one.
433 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
434 * @wait_oplock_handler: must be false if called from oplock_break_handler
435 * @offload: not offloaded on close and oplock breaks
438 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
439 bool wait_oplock_handler, bool offload)
441 struct inode *inode = d_inode(cifs_file->dentry);
442 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
443 struct TCP_Server_Info *server = tcon->ses->server;
444 struct cifsInodeInfo *cifsi = CIFS_I(inode);
445 struct super_block *sb = inode->i_sb;
446 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
448 struct cifs_pending_open open;
449 bool oplock_break_cancelled;
451 spin_lock(&tcon->open_file_lock);
452 spin_lock(&cifsi->open_file_lock);
453 spin_lock(&cifs_file->file_info_lock);
454 if (--cifs_file->count > 0) {
455 spin_unlock(&cifs_file->file_info_lock);
456 spin_unlock(&cifsi->open_file_lock);
457 spin_unlock(&tcon->open_file_lock);
460 spin_unlock(&cifs_file->file_info_lock);
462 if (server->ops->get_lease_key)
463 server->ops->get_lease_key(inode, &fid);
465 /* store open in pending opens to make sure we don't miss lease break */
466 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
468 /* remove it from the lists */
469 list_del(&cifs_file->flist);
470 list_del(&cifs_file->tlist);
471 atomic_dec(&tcon->num_local_opens);
473 if (list_empty(&cifsi->openFileList)) {
474 cifs_dbg(FYI, "closing last open instance for inode %p\n",
475 d_inode(cifs_file->dentry));
477 * In strict cache mode we need invalidate mapping on the last
478 * close because it may cause a error when we open this file
479 * again and get at least level II oplock.
481 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
482 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
483 cifs_set_oplock_level(cifsi, 0);
486 spin_unlock(&cifsi->open_file_lock);
487 spin_unlock(&tcon->open_file_lock);
489 oplock_break_cancelled = wait_oplock_handler ?
490 cancel_work_sync(&cifs_file->oplock_break) : false;
492 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
493 struct TCP_Server_Info *server = tcon->ses->server;
497 if (server->ops->close_getattr)
498 server->ops->close_getattr(xid, tcon, cifs_file);
499 else if (server->ops->close)
500 server->ops->close(xid, tcon, &cifs_file->fid);
504 if (oplock_break_cancelled)
505 cifs_done_oplock_break(cifsi);
507 cifs_del_pending_open(&open);
510 queue_work(fileinfo_put_wq, &cifs_file->put);
512 cifsFileInfo_put_final(cifs_file);
515 int cifs_open(struct inode *inode, struct file *file)
521 struct cifs_sb_info *cifs_sb;
522 struct TCP_Server_Info *server;
523 struct cifs_tcon *tcon;
524 struct tcon_link *tlink;
525 struct cifsFileInfo *cfile = NULL;
527 const char *full_path;
528 bool posix_open_ok = false;
530 struct cifs_pending_open open;
534 cifs_sb = CIFS_SB(inode->i_sb);
535 if (unlikely(cifs_forced_shutdown(cifs_sb))) {
540 tlink = cifs_sb_tlink(cifs_sb);
543 return PTR_ERR(tlink);
545 tcon = tlink_tcon(tlink);
546 server = tcon->ses->server;
548 page = alloc_dentry_path();
549 full_path = build_path_from_dentry(file_dentry(file), page);
550 if (IS_ERR(full_path)) {
551 rc = PTR_ERR(full_path);
555 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
556 inode, file->f_flags, full_path);
558 if (file->f_flags & O_DIRECT &&
559 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
560 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
561 file->f_op = &cifs_file_direct_nobrl_ops;
563 file->f_op = &cifs_file_direct_ops;
566 /* Get the cached handle as SMB2 close is deferred */
567 rc = cifs_get_readable_path(tcon, full_path, &cfile);
569 if (file->f_flags == cfile->f_flags) {
570 file->private_data = cfile;
571 spin_lock(&CIFS_I(inode)->deferred_lock);
572 cifs_del_deferred_close(cfile);
573 spin_unlock(&CIFS_I(inode)->deferred_lock);
576 _cifsFileInfo_put(cfile, true, false);
585 if (!tcon->broken_posix_open && tcon->unix_ext &&
586 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
587 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
588 /* can not refresh inode info since size could be stale */
589 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
590 cifs_sb->ctx->file_mode /* ignored */,
591 file->f_flags, &oplock, &fid.netfid, xid);
593 cifs_dbg(FYI, "posix open succeeded\n");
594 posix_open_ok = true;
595 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
596 if (tcon->ses->serverNOS)
597 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
599 tcon->ses->serverNOS);
600 tcon->broken_posix_open = true;
601 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
602 (rc != -EOPNOTSUPP)) /* path not found or net err */
605 * Else fallthrough to retry open the old way on network i/o
610 if (server->ops->get_lease_key)
611 server->ops->get_lease_key(inode, &fid);
613 cifs_add_pending_open(&fid, tlink, &open);
615 if (!posix_open_ok) {
616 if (server->ops->get_lease_key)
617 server->ops->get_lease_key(inode, &fid);
619 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
620 file->f_flags, &oplock, &fid, xid);
622 cifs_del_pending_open(&open);
627 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
629 if (server->ops->close)
630 server->ops->close(xid, tcon, &fid);
631 cifs_del_pending_open(&open);
636 cifs_fscache_set_inode_cookie(inode, file);
638 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
640 * Time to set mode which we can not set earlier due to
641 * problems creating new read-only files.
643 struct cifs_unix_set_info_args args = {
644 .mode = inode->i_mode,
645 .uid = INVALID_UID, /* no change */
646 .gid = INVALID_GID, /* no change */
647 .ctime = NO_CHANGE_64,
648 .atime = NO_CHANGE_64,
649 .mtime = NO_CHANGE_64,
652 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
657 free_dentry_path(page);
659 cifs_put_tlink(tlink);
663 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
666 * Try to reacquire byte range locks that were released when session
667 * to server was lost.
670 cifs_relock_file(struct cifsFileInfo *cfile)
672 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
673 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
674 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
677 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
678 if (cinode->can_cache_brlcks) {
679 /* can cache locks - no need to relock */
680 up_read(&cinode->lock_sem);
684 if (cap_unix(tcon->ses) &&
685 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
686 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
687 rc = cifs_push_posix_locks(cfile);
689 rc = tcon->ses->server->ops->push_mand_locks(cfile);
691 up_read(&cinode->lock_sem);
696 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
701 struct cifs_sb_info *cifs_sb;
702 struct cifs_tcon *tcon;
703 struct TCP_Server_Info *server;
704 struct cifsInodeInfo *cinode;
707 const char *full_path;
709 int disposition = FILE_OPEN;
710 int create_options = CREATE_NOT_DIR;
711 struct cifs_open_parms oparms;
714 mutex_lock(&cfile->fh_mutex);
715 if (!cfile->invalidHandle) {
716 mutex_unlock(&cfile->fh_mutex);
721 inode = d_inode(cfile->dentry);
722 cifs_sb = CIFS_SB(inode->i_sb);
723 tcon = tlink_tcon(cfile->tlink);
724 server = tcon->ses->server;
727 * Can not grab rename sem here because various ops, including those
728 * that already have the rename sem can end up causing writepage to get
729 * called and if the server was down that means we end up here, and we
730 * can never tell if the caller already has the rename_sem.
732 page = alloc_dentry_path();
733 full_path = build_path_from_dentry(cfile->dentry, page);
734 if (IS_ERR(full_path)) {
735 mutex_unlock(&cfile->fh_mutex);
736 free_dentry_path(page);
738 return PTR_ERR(full_path);
741 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
742 inode, cfile->f_flags, full_path);
744 if (tcon->ses->server->oplocks)
749 if (tcon->unix_ext && cap_unix(tcon->ses) &&
750 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
751 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
753 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
754 * original open. Must mask them off for a reopen.
756 unsigned int oflags = cfile->f_flags &
757 ~(O_CREAT | O_EXCL | O_TRUNC);
759 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
760 cifs_sb->ctx->file_mode /* ignored */,
761 oflags, &oplock, &cfile->fid.netfid, xid);
763 cifs_dbg(FYI, "posix reopen succeeded\n");
764 oparms.reconnect = true;
768 * fallthrough to retry open the old way on errors, especially
769 * in the reconnect path it is important to retry hard
773 desired_access = cifs_convert_flags(cfile->f_flags);
775 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
776 if (cfile->f_flags & O_SYNC)
777 create_options |= CREATE_WRITE_THROUGH;
779 if (cfile->f_flags & O_DIRECT)
780 create_options |= CREATE_NO_BUFFER;
782 if (server->ops->get_lease_key)
783 server->ops->get_lease_key(inode, &cfile->fid);
786 oparms.cifs_sb = cifs_sb;
787 oparms.desired_access = desired_access;
788 oparms.create_options = cifs_create_options(cifs_sb, create_options);
789 oparms.disposition = disposition;
790 oparms.path = full_path;
791 oparms.fid = &cfile->fid;
792 oparms.reconnect = true;
795 * Can not refresh inode by passing in file_info buf to be returned by
796 * ops->open and then calling get_inode_info with returned buf since
797 * file might have write behind data that needs to be flushed and server
798 * version of file size can be stale. If we knew for sure that inode was
799 * not dirty locally we could do this.
801 rc = server->ops->open(xid, &oparms, &oplock, NULL);
802 if (rc == -ENOENT && oparms.reconnect == false) {
803 /* durable handle timeout is expired - open the file again */
804 rc = server->ops->open(xid, &oparms, &oplock, NULL);
805 /* indicate that we need to relock the file */
806 oparms.reconnect = true;
810 mutex_unlock(&cfile->fh_mutex);
811 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
812 cifs_dbg(FYI, "oplock: %d\n", oplock);
813 goto reopen_error_exit;
817 cfile->invalidHandle = false;
818 mutex_unlock(&cfile->fh_mutex);
819 cinode = CIFS_I(inode);
822 rc = filemap_write_and_wait(inode->i_mapping);
823 if (!is_interrupt_error(rc))
824 mapping_set_error(inode->i_mapping, rc);
826 if (tcon->posix_extensions)
827 rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
828 else if (tcon->unix_ext)
829 rc = cifs_get_inode_info_unix(&inode, full_path,
832 rc = cifs_get_inode_info(&inode, full_path, NULL,
833 inode->i_sb, xid, NULL);
836 * Else we are writing out data to server already and could deadlock if
837 * we tried to flush data, and since we do not know if we have data that
838 * would invalidate the current end of file on the server we can not go
839 * to the server to get the new inode info.
843 * If the server returned a read oplock and we have mandatory brlocks,
844 * set oplock level to None.
846 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
847 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
851 server->ops->set_fid(cfile, &cfile->fid, oplock);
852 if (oparms.reconnect)
853 cifs_relock_file(cfile);
856 free_dentry_path(page);
861 void smb2_deferred_work_close(struct work_struct *work)
863 struct cifsFileInfo *cfile = container_of(work,
864 struct cifsFileInfo, deferred.work);
866 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
867 cifs_del_deferred_close(cfile);
868 cfile->deferred_close_scheduled = false;
869 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
870 _cifsFileInfo_put(cfile, true, false);
873 int cifs_close(struct inode *inode, struct file *file)
875 struct cifsFileInfo *cfile;
876 struct cifsInodeInfo *cinode = CIFS_I(inode);
877 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
878 struct cifs_deferred_close *dclose;
880 if (file->private_data != NULL) {
881 cfile = file->private_data;
882 file->private_data = NULL;
883 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
884 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
885 cinode->lease_granted &&
887 if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
888 inode->i_ctime = inode->i_mtime = current_time(inode);
889 cifs_fscache_update_inode_cookie(inode);
891 spin_lock(&cinode->deferred_lock);
892 cifs_add_deferred_close(cfile, dclose);
893 if (cfile->deferred_close_scheduled &&
894 delayed_work_pending(&cfile->deferred)) {
896 * If there is no pending work, mod_delayed_work queues new work.
897 * So, Increase the ref count to avoid use-after-free.
899 if (!mod_delayed_work(deferredclose_wq,
900 &cfile->deferred, cifs_sb->ctx->acregmax))
901 cifsFileInfo_get(cfile);
903 /* Deferred close for files */
904 queue_delayed_work(deferredclose_wq,
905 &cfile->deferred, cifs_sb->ctx->acregmax);
906 cfile->deferred_close_scheduled = true;
907 spin_unlock(&cinode->deferred_lock);
910 spin_unlock(&cinode->deferred_lock);
911 _cifsFileInfo_put(cfile, true, false);
913 _cifsFileInfo_put(cfile, true, false);
918 /* return code from the ->release op is always ignored */
923 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
925 struct cifsFileInfo *open_file;
926 struct list_head *tmp;
927 struct list_head *tmp1;
928 struct list_head tmp_list;
930 if (!tcon->use_persistent || !tcon->need_reopen_files)
933 tcon->need_reopen_files = false;
935 cifs_dbg(FYI, "Reopen persistent handles\n");
936 INIT_LIST_HEAD(&tmp_list);
938 /* list all files open on tree connection, reopen resilient handles */
939 spin_lock(&tcon->open_file_lock);
940 list_for_each(tmp, &tcon->openFileList) {
941 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
942 if (!open_file->invalidHandle)
944 cifsFileInfo_get(open_file);
945 list_add_tail(&open_file->rlist, &tmp_list);
947 spin_unlock(&tcon->open_file_lock);
949 list_for_each_safe(tmp, tmp1, &tmp_list) {
950 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
951 if (cifs_reopen_file(open_file, false /* do not flush */))
952 tcon->need_reopen_files = true;
953 list_del_init(&open_file->rlist);
954 cifsFileInfo_put(open_file);
958 int cifs_closedir(struct inode *inode, struct file *file)
962 struct cifsFileInfo *cfile = file->private_data;
963 struct cifs_tcon *tcon;
964 struct TCP_Server_Info *server;
967 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
973 tcon = tlink_tcon(cfile->tlink);
974 server = tcon->ses->server;
976 cifs_dbg(FYI, "Freeing private data in close dir\n");
977 spin_lock(&cfile->file_info_lock);
978 if (server->ops->dir_needs_close(cfile)) {
979 cfile->invalidHandle = true;
980 spin_unlock(&cfile->file_info_lock);
981 if (server->ops->close_dir)
982 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
985 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
986 /* not much we can do if it fails anyway, ignore rc */
989 spin_unlock(&cfile->file_info_lock);
991 buf = cfile->srch_inf.ntwrk_buf_start;
993 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
994 cfile->srch_inf.ntwrk_buf_start = NULL;
995 if (cfile->srch_inf.smallBuf)
996 cifs_small_buf_release(buf);
998 cifs_buf_release(buf);
1001 cifs_put_tlink(cfile->tlink);
1002 kfree(file->private_data);
1003 file->private_data = NULL;
1004 /* BB can we lock the filestruct while this is going on? */
1009 static struct cifsLockInfo *
1010 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1012 struct cifsLockInfo *lock =
1013 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1016 lock->offset = offset;
1017 lock->length = length;
1019 lock->pid = current->tgid;
1020 lock->flags = flags;
1021 INIT_LIST_HEAD(&lock->blist);
1022 init_waitqueue_head(&lock->block_q);
1027 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1029 struct cifsLockInfo *li, *tmp;
1030 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1031 list_del_init(&li->blist);
1032 wake_up(&li->block_q);
1036 #define CIFS_LOCK_OP 0
1037 #define CIFS_READ_OP 1
1038 #define CIFS_WRITE_OP 2
1040 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1042 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1043 __u64 length, __u8 type, __u16 flags,
1044 struct cifsFileInfo *cfile,
1045 struct cifsLockInfo **conf_lock, int rw_check)
1047 struct cifsLockInfo *li;
1048 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1049 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1051 list_for_each_entry(li, &fdlocks->locks, llist) {
1052 if (offset + length <= li->offset ||
1053 offset >= li->offset + li->length)
1055 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1056 server->ops->compare_fids(cfile, cur_cfile)) {
1057 /* shared lock prevents write op through the same fid */
1058 if (!(li->type & server->vals->shared_lock_type) ||
1059 rw_check != CIFS_WRITE_OP)
1062 if ((type & server->vals->shared_lock_type) &&
1063 ((server->ops->compare_fids(cfile, cur_cfile) &&
1064 current->tgid == li->pid) || type == li->type))
1066 if (rw_check == CIFS_LOCK_OP &&
1067 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1068 server->ops->compare_fids(cfile, cur_cfile))
1078 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1079 __u8 type, __u16 flags,
1080 struct cifsLockInfo **conf_lock, int rw_check)
1083 struct cifs_fid_locks *cur;
1084 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1086 list_for_each_entry(cur, &cinode->llist, llist) {
1087 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1088 flags, cfile, conf_lock,
1098 * Check if there is another lock that prevents us to set the lock (mandatory
1099 * style). If such a lock exists, update the flock structure with its
1100 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1101 * or leave it the same if we can't. Returns 0 if we don't need to request to
1102 * the server or 1 otherwise.
1105 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1106 __u8 type, struct file_lock *flock)
1109 struct cifsLockInfo *conf_lock;
1110 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1111 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1114 down_read(&cinode->lock_sem);
1116 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1117 flock->fl_flags, &conf_lock,
1120 flock->fl_start = conf_lock->offset;
1121 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1122 flock->fl_pid = conf_lock->pid;
1123 if (conf_lock->type & server->vals->shared_lock_type)
1124 flock->fl_type = F_RDLCK;
1126 flock->fl_type = F_WRLCK;
1127 } else if (!cinode->can_cache_brlcks)
1130 flock->fl_type = F_UNLCK;
1132 up_read(&cinode->lock_sem);
1137 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1139 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1140 cifs_down_write(&cinode->lock_sem);
1141 list_add_tail(&lock->llist, &cfile->llist->locks);
1142 up_write(&cinode->lock_sem);
1146 * Set the byte-range lock (mandatory style). Returns:
1147 * 1) 0, if we set the lock and don't need to request to the server;
1148 * 2) 1, if no locks prevent us but we need to request to the server;
1149 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1152 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1155 struct cifsLockInfo *conf_lock;
1156 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1162 cifs_down_write(&cinode->lock_sem);
1164 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1165 lock->type, lock->flags, &conf_lock,
1167 if (!exist && cinode->can_cache_brlcks) {
1168 list_add_tail(&lock->llist, &cfile->llist->locks);
1169 up_write(&cinode->lock_sem);
1178 list_add_tail(&lock->blist, &conf_lock->blist);
1179 up_write(&cinode->lock_sem);
1180 rc = wait_event_interruptible(lock->block_q,
1181 (lock->blist.prev == &lock->blist) &&
1182 (lock->blist.next == &lock->blist));
1185 cifs_down_write(&cinode->lock_sem);
1186 list_del_init(&lock->blist);
1189 up_write(&cinode->lock_sem);
1194 * Check if there is another lock that prevents us to set the lock (posix
1195 * style). If such a lock exists, update the flock structure with its
1196 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1197 * or leave it the same if we can't. Returns 0 if we don't need to request to
1198 * the server or 1 otherwise.
1201 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1204 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1205 unsigned char saved_type = flock->fl_type;
1207 if ((flock->fl_flags & FL_POSIX) == 0)
1210 down_read(&cinode->lock_sem);
1211 posix_test_lock(file, flock);
1213 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1214 flock->fl_type = saved_type;
1218 up_read(&cinode->lock_sem);
1223 * Set the byte-range lock (posix style). Returns:
1224 * 1) <0, if the error occurs while setting the lock;
1225 * 2) 0, if we set the lock and don't need to request to the server;
1226 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1227 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1230 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1232 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1233 int rc = FILE_LOCK_DEFERRED + 1;
1235 if ((flock->fl_flags & FL_POSIX) == 0)
1238 cifs_down_write(&cinode->lock_sem);
1239 if (!cinode->can_cache_brlcks) {
1240 up_write(&cinode->lock_sem);
1244 rc = posix_lock_file(file, flock, NULL);
1245 up_write(&cinode->lock_sem);
1250 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1253 int rc = 0, stored_rc;
1254 struct cifsLockInfo *li, *tmp;
1255 struct cifs_tcon *tcon;
1256 unsigned int num, max_num, max_buf;
1257 LOCKING_ANDX_RANGE *buf, *cur;
1258 static const int types[] = {
1259 LOCKING_ANDX_LARGE_FILES,
1260 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1265 tcon = tlink_tcon(cfile->tlink);
1268 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1269 * and check it before using.
1271 max_buf = tcon->ses->server->maxBuf;
1272 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1277 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1279 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1281 max_num = (max_buf - sizeof(struct smb_hdr)) /
1282 sizeof(LOCKING_ANDX_RANGE);
1283 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1289 for (i = 0; i < 2; i++) {
1292 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1293 if (li->type != types[i])
1295 cur->Pid = cpu_to_le16(li->pid);
1296 cur->LengthLow = cpu_to_le32((u32)li->length);
1297 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1298 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1299 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1300 if (++num == max_num) {
1301 stored_rc = cifs_lockv(xid, tcon,
1303 (__u8)li->type, 0, num,
1314 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1315 (__u8)types[i], 0, num, buf);
1327 hash_lockowner(fl_owner_t owner)
1329 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1332 struct lock_to_push {
1333 struct list_head llist;
1342 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1344 struct inode *inode = d_inode(cfile->dentry);
1345 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1346 struct file_lock *flock;
1347 struct file_lock_context *flctx = inode->i_flctx;
1348 unsigned int count = 0, i;
1349 int rc = 0, xid, type;
1350 struct list_head locks_to_send, *el;
1351 struct lock_to_push *lck, *tmp;
1359 spin_lock(&flctx->flc_lock);
1360 list_for_each(el, &flctx->flc_posix) {
1363 spin_unlock(&flctx->flc_lock);
1365 INIT_LIST_HEAD(&locks_to_send);
1368 * Allocating count locks is enough because no FL_POSIX locks can be
1369 * added to the list while we are holding cinode->lock_sem that
1370 * protects locking operations of this inode.
1372 for (i = 0; i < count; i++) {
1373 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1378 list_add_tail(&lck->llist, &locks_to_send);
1381 el = locks_to_send.next;
1382 spin_lock(&flctx->flc_lock);
1383 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1384 if (el == &locks_to_send) {
1386 * The list ended. We don't have enough allocated
1387 * structures - something is really wrong.
1389 cifs_dbg(VFS, "Can't push all brlocks!\n");
1392 length = 1 + flock->fl_end - flock->fl_start;
1393 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1397 lck = list_entry(el, struct lock_to_push, llist);
1398 lck->pid = hash_lockowner(flock->fl_owner);
1399 lck->netfid = cfile->fid.netfid;
1400 lck->length = length;
1402 lck->offset = flock->fl_start;
1404 spin_unlock(&flctx->flc_lock);
1406 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1409 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1410 lck->offset, lck->length, NULL,
1414 list_del(&lck->llist);
1422 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1423 list_del(&lck->llist);
1430 cifs_push_locks(struct cifsFileInfo *cfile)
1432 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1433 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1434 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1437 /* we are going to update can_cache_brlcks here - need a write access */
1438 cifs_down_write(&cinode->lock_sem);
1439 if (!cinode->can_cache_brlcks) {
1440 up_write(&cinode->lock_sem);
1444 if (cap_unix(tcon->ses) &&
1445 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1446 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1447 rc = cifs_push_posix_locks(cfile);
1449 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1451 cinode->can_cache_brlcks = false;
1452 up_write(&cinode->lock_sem);
1457 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1458 bool *wait_flag, struct TCP_Server_Info *server)
1460 if (flock->fl_flags & FL_POSIX)
1461 cifs_dbg(FYI, "Posix\n");
1462 if (flock->fl_flags & FL_FLOCK)
1463 cifs_dbg(FYI, "Flock\n");
1464 if (flock->fl_flags & FL_SLEEP) {
1465 cifs_dbg(FYI, "Blocking lock\n");
1468 if (flock->fl_flags & FL_ACCESS)
1469 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1470 if (flock->fl_flags & FL_LEASE)
1471 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1472 if (flock->fl_flags &
1473 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1474 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1475 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1477 *type = server->vals->large_lock_type;
1478 if (flock->fl_type == F_WRLCK) {
1479 cifs_dbg(FYI, "F_WRLCK\n");
1480 *type |= server->vals->exclusive_lock_type;
1482 } else if (flock->fl_type == F_UNLCK) {
1483 cifs_dbg(FYI, "F_UNLCK\n");
1484 *type |= server->vals->unlock_lock_type;
1486 /* Check if unlock includes more than one lock range */
1487 } else if (flock->fl_type == F_RDLCK) {
1488 cifs_dbg(FYI, "F_RDLCK\n");
1489 *type |= server->vals->shared_lock_type;
1491 } else if (flock->fl_type == F_EXLCK) {
1492 cifs_dbg(FYI, "F_EXLCK\n");
1493 *type |= server->vals->exclusive_lock_type;
1495 } else if (flock->fl_type == F_SHLCK) {
1496 cifs_dbg(FYI, "F_SHLCK\n");
1497 *type |= server->vals->shared_lock_type;
1500 cifs_dbg(FYI, "Unknown type of lock\n");
1504 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1505 bool wait_flag, bool posix_lck, unsigned int xid)
1508 __u64 length = 1 + flock->fl_end - flock->fl_start;
1509 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1510 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1511 struct TCP_Server_Info *server = tcon->ses->server;
1512 __u16 netfid = cfile->fid.netfid;
1515 int posix_lock_type;
1517 rc = cifs_posix_lock_test(file, flock);
1521 if (type & server->vals->shared_lock_type)
1522 posix_lock_type = CIFS_RDLCK;
1524 posix_lock_type = CIFS_WRLCK;
1525 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1526 hash_lockowner(flock->fl_owner),
1527 flock->fl_start, length, flock,
1528 posix_lock_type, wait_flag);
1532 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1536 /* BB we could chain these into one lock request BB */
1537 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1540 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1542 flock->fl_type = F_UNLCK;
1544 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1549 if (type & server->vals->shared_lock_type) {
1550 flock->fl_type = F_WRLCK;
1554 type &= ~server->vals->exclusive_lock_type;
1556 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1557 type | server->vals->shared_lock_type,
1560 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1561 type | server->vals->shared_lock_type, 0, 1, false);
1562 flock->fl_type = F_RDLCK;
1564 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1567 flock->fl_type = F_WRLCK;
1573 cifs_move_llist(struct list_head *source, struct list_head *dest)
1575 struct list_head *li, *tmp;
1576 list_for_each_safe(li, tmp, source)
1577 list_move(li, dest);
1581 cifs_free_llist(struct list_head *llist)
1583 struct cifsLockInfo *li, *tmp;
1584 list_for_each_entry_safe(li, tmp, llist, llist) {
1585 cifs_del_lock_waiters(li);
1586 list_del(&li->llist);
1592 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1595 int rc = 0, stored_rc;
1596 static const int types[] = {
1597 LOCKING_ANDX_LARGE_FILES,
1598 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1601 unsigned int max_num, num, max_buf;
1602 LOCKING_ANDX_RANGE *buf, *cur;
1603 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1604 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1605 struct cifsLockInfo *li, *tmp;
1606 __u64 length = 1 + flock->fl_end - flock->fl_start;
1607 struct list_head tmp_llist;
1609 INIT_LIST_HEAD(&tmp_llist);
1612 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1613 * and check it before using.
1615 max_buf = tcon->ses->server->maxBuf;
1616 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1619 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1621 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1623 max_num = (max_buf - sizeof(struct smb_hdr)) /
1624 sizeof(LOCKING_ANDX_RANGE);
1625 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1629 cifs_down_write(&cinode->lock_sem);
1630 for (i = 0; i < 2; i++) {
1633 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1634 if (flock->fl_start > li->offset ||
1635 (flock->fl_start + length) <
1636 (li->offset + li->length))
1638 if (current->tgid != li->pid)
1640 if (types[i] != li->type)
1642 if (cinode->can_cache_brlcks) {
1644 * We can cache brlock requests - simply remove
1645 * a lock from the file's list.
1647 list_del(&li->llist);
1648 cifs_del_lock_waiters(li);
1652 cur->Pid = cpu_to_le16(li->pid);
1653 cur->LengthLow = cpu_to_le32((u32)li->length);
1654 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1655 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1656 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1658 * We need to save a lock here to let us add it again to
1659 * the file's list if the unlock range request fails on
1662 list_move(&li->llist, &tmp_llist);
1663 if (++num == max_num) {
1664 stored_rc = cifs_lockv(xid, tcon,
1666 li->type, num, 0, buf);
1669 * We failed on the unlock range
1670 * request - add all locks from the tmp
1671 * list to the head of the file's list.
1673 cifs_move_llist(&tmp_llist,
1674 &cfile->llist->locks);
1678 * The unlock range request succeed -
1679 * free the tmp list.
1681 cifs_free_llist(&tmp_llist);
1688 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1689 types[i], num, 0, buf);
1691 cifs_move_llist(&tmp_llist,
1692 &cfile->llist->locks);
1695 cifs_free_llist(&tmp_llist);
1699 up_write(&cinode->lock_sem);
1705 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1706 bool wait_flag, bool posix_lck, int lock, int unlock,
1710 __u64 length = 1 + flock->fl_end - flock->fl_start;
1711 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1712 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1713 struct TCP_Server_Info *server = tcon->ses->server;
1714 struct inode *inode = d_inode(cfile->dentry);
1717 int posix_lock_type;
1719 rc = cifs_posix_lock_set(file, flock);
1720 if (rc <= FILE_LOCK_DEFERRED)
1723 if (type & server->vals->shared_lock_type)
1724 posix_lock_type = CIFS_RDLCK;
1726 posix_lock_type = CIFS_WRLCK;
1729 posix_lock_type = CIFS_UNLCK;
1731 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1732 hash_lockowner(flock->fl_owner),
1733 flock->fl_start, length,
1734 NULL, posix_lock_type, wait_flag);
1739 struct cifsLockInfo *lock;
1741 lock = cifs_lock_init(flock->fl_start, length, type,
1746 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1755 * Windows 7 server can delay breaking lease from read to None
1756 * if we set a byte-range lock on a file - break it explicitly
1757 * before sending the lock to the server to be sure the next
1758 * read won't conflict with non-overlapted locks due to
1761 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1762 CIFS_CACHE_READ(CIFS_I(inode))) {
1763 cifs_zap_mapping(inode);
1764 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1766 CIFS_I(inode)->oplock = 0;
1769 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1770 type, 1, 0, wait_flag);
1776 cifs_lock_add(cfile, lock);
1778 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1781 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1783 * If this is a request to remove all locks because we
1784 * are closing the file, it doesn't matter if the
1785 * unlocking failed as both cifs.ko and the SMB server
1786 * remove the lock on file close
1789 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1790 if (!(flock->fl_flags & FL_CLOSE))
1793 rc = locks_lock_file_wait(file, flock);
1798 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1801 int lock = 0, unlock = 0;
1802 bool wait_flag = false;
1803 bool posix_lck = false;
1804 struct cifs_sb_info *cifs_sb;
1805 struct cifs_tcon *tcon;
1806 struct cifsFileInfo *cfile;
1812 if (!(fl->fl_flags & FL_FLOCK))
1815 cfile = (struct cifsFileInfo *)file->private_data;
1816 tcon = tlink_tcon(cfile->tlink);
1818 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1820 cifs_sb = CIFS_FILE_SB(file);
1822 if (cap_unix(tcon->ses) &&
1823 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1824 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1827 if (!lock && !unlock) {
1829 * if no lock or unlock then nothing to do since we do not
1836 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1844 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1847 int lock = 0, unlock = 0;
1848 bool wait_flag = false;
1849 bool posix_lck = false;
1850 struct cifs_sb_info *cifs_sb;
1851 struct cifs_tcon *tcon;
1852 struct cifsFileInfo *cfile;
1858 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1859 cmd, flock->fl_flags, flock->fl_type,
1860 flock->fl_start, flock->fl_end);
1862 cfile = (struct cifsFileInfo *)file->private_data;
1863 tcon = tlink_tcon(cfile->tlink);
1865 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1867 cifs_sb = CIFS_FILE_SB(file);
1869 if (cap_unix(tcon->ses) &&
1870 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1871 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1874 * BB add code here to normalize offset and length to account for
1875 * negative length which we can not accept over the wire.
1877 if (IS_GETLK(cmd)) {
1878 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1883 if (!lock && !unlock) {
1885 * if no lock or unlock then nothing to do since we do not
1892 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1899 * update the file size (if needed) after a write. Should be called with
1900 * the inode->i_lock held
1903 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1904 unsigned int bytes_written)
1906 loff_t end_of_write = offset + bytes_written;
1908 if (end_of_write > cifsi->server_eof)
1909 cifsi->server_eof = end_of_write;
1913 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1914 size_t write_size, loff_t *offset)
1917 unsigned int bytes_written = 0;
1918 unsigned int total_written;
1919 struct cifs_tcon *tcon;
1920 struct TCP_Server_Info *server;
1922 struct dentry *dentry = open_file->dentry;
1923 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1924 struct cifs_io_parms io_parms = {0};
1926 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1927 write_size, *offset, dentry);
1929 tcon = tlink_tcon(open_file->tlink);
1930 server = tcon->ses->server;
1932 if (!server->ops->sync_write)
1937 for (total_written = 0; write_size > total_written;
1938 total_written += bytes_written) {
1940 while (rc == -EAGAIN) {
1944 if (open_file->invalidHandle) {
1945 /* we could deadlock if we called
1946 filemap_fdatawait from here so tell
1947 reopen_file not to flush data to
1949 rc = cifs_reopen_file(open_file, false);
1954 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1955 (unsigned int)write_size - total_written);
1956 /* iov[0] is reserved for smb header */
1957 iov[1].iov_base = (char *)write_data + total_written;
1958 iov[1].iov_len = len;
1960 io_parms.tcon = tcon;
1961 io_parms.offset = *offset;
1962 io_parms.length = len;
1963 rc = server->ops->sync_write(xid, &open_file->fid,
1964 &io_parms, &bytes_written, iov, 1);
1966 if (rc || (bytes_written == 0)) {
1974 spin_lock(&d_inode(dentry)->i_lock);
1975 cifs_update_eof(cifsi, *offset, bytes_written);
1976 spin_unlock(&d_inode(dentry)->i_lock);
1977 *offset += bytes_written;
1981 cifs_stats_bytes_written(tcon, total_written);
1983 if (total_written > 0) {
1984 spin_lock(&d_inode(dentry)->i_lock);
1985 if (*offset > d_inode(dentry)->i_size) {
1986 i_size_write(d_inode(dentry), *offset);
1987 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1989 spin_unlock(&d_inode(dentry)->i_lock);
1991 mark_inode_dirty_sync(d_inode(dentry));
1993 return total_written;
1996 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1999 struct cifsFileInfo *open_file = NULL;
2000 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2002 /* only filter by fsuid on multiuser mounts */
2003 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2006 spin_lock(&cifs_inode->open_file_lock);
2007 /* we could simply get the first_list_entry since write-only entries
2008 are always at the end of the list but since the first entry might
2009 have a close pending, we go through the whole list */
2010 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2011 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2013 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2014 if ((!open_file->invalidHandle)) {
2015 /* found a good file */
2016 /* lock it so it will not be closed on us */
2017 cifsFileInfo_get(open_file);
2018 spin_unlock(&cifs_inode->open_file_lock);
2020 } /* else might as well continue, and look for
2021 another, or simply have the caller reopen it
2022 again rather than trying to fix this handle */
2023 } else /* write only file */
2024 break; /* write only files are last so must be done */
2026 spin_unlock(&cifs_inode->open_file_lock);
2030 /* Return -EBADF if no handle is found and general rc otherwise */
2032 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2033 struct cifsFileInfo **ret_file)
2035 struct cifsFileInfo *open_file, *inv_file = NULL;
2036 struct cifs_sb_info *cifs_sb;
2037 bool any_available = false;
2039 unsigned int refind = 0;
2040 bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2041 bool with_delete = flags & FIND_WR_WITH_DELETE;
2045 * Having a null inode here (because mapping->host was set to zero by
2046 * the VFS or MM) should not happen but we had reports of on oops (due
2047 * to it being zero) during stress testcases so we need to check for it
2050 if (cifs_inode == NULL) {
2051 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2056 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2058 /* only filter by fsuid on multiuser mounts */
2059 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2062 spin_lock(&cifs_inode->open_file_lock);
2064 if (refind > MAX_REOPEN_ATT) {
2065 spin_unlock(&cifs_inode->open_file_lock);
2068 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2069 if (!any_available && open_file->pid != current->tgid)
2071 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2073 if (with_delete && !(open_file->fid.access & DELETE))
2075 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2076 if (!open_file->invalidHandle) {
2077 /* found a good writable file */
2078 cifsFileInfo_get(open_file);
2079 spin_unlock(&cifs_inode->open_file_lock);
2080 *ret_file = open_file;
2084 inv_file = open_file;
2088 /* couldn't find useable FH with same pid, try any available */
2089 if (!any_available) {
2090 any_available = true;
2091 goto refind_writable;
2095 any_available = false;
2096 cifsFileInfo_get(inv_file);
2099 spin_unlock(&cifs_inode->open_file_lock);
2102 rc = cifs_reopen_file(inv_file, false);
2104 *ret_file = inv_file;
2108 spin_lock(&cifs_inode->open_file_lock);
2109 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2110 spin_unlock(&cifs_inode->open_file_lock);
2111 cifsFileInfo_put(inv_file);
2114 spin_lock(&cifs_inode->open_file_lock);
2115 goto refind_writable;
2121 struct cifsFileInfo *
2122 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2124 struct cifsFileInfo *cfile;
2127 rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2129 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2135 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2137 struct cifsFileInfo **ret_file)
2139 struct cifsFileInfo *cfile;
2140 void *page = alloc_dentry_path();
2144 spin_lock(&tcon->open_file_lock);
2145 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2146 struct cifsInodeInfo *cinode;
2147 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2148 if (IS_ERR(full_path)) {
2149 spin_unlock(&tcon->open_file_lock);
2150 free_dentry_path(page);
2151 return PTR_ERR(full_path);
2153 if (strcmp(full_path, name))
2156 cinode = CIFS_I(d_inode(cfile->dentry));
2157 spin_unlock(&tcon->open_file_lock);
2158 free_dentry_path(page);
2159 return cifs_get_writable_file(cinode, flags, ret_file);
2162 spin_unlock(&tcon->open_file_lock);
2163 free_dentry_path(page);
2168 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2169 struct cifsFileInfo **ret_file)
2171 struct cifsFileInfo *cfile;
2172 void *page = alloc_dentry_path();
2176 spin_lock(&tcon->open_file_lock);
2177 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2178 struct cifsInodeInfo *cinode;
2179 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2180 if (IS_ERR(full_path)) {
2181 spin_unlock(&tcon->open_file_lock);
2182 free_dentry_path(page);
2183 return PTR_ERR(full_path);
2185 if (strcmp(full_path, name))
2188 cinode = CIFS_I(d_inode(cfile->dentry));
2189 spin_unlock(&tcon->open_file_lock);
2190 free_dentry_path(page);
2191 *ret_file = find_readable_file(cinode, 0);
2192 return *ret_file ? 0 : -ENOENT;
2195 spin_unlock(&tcon->open_file_lock);
2196 free_dentry_path(page);
2200 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2202 struct address_space *mapping = page->mapping;
2203 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2206 int bytes_written = 0;
2207 struct inode *inode;
2208 struct cifsFileInfo *open_file;
2210 if (!mapping || !mapping->host)
2213 inode = page->mapping->host;
2215 offset += (loff_t)from;
2216 write_data = kmap(page);
2219 if ((to > PAGE_SIZE) || (from > to)) {
2224 /* racing with truncate? */
2225 if (offset > mapping->host->i_size) {
2227 return 0; /* don't care */
2230 /* check to make sure that we are not extending the file */
2231 if (mapping->host->i_size - offset < (loff_t)to)
2232 to = (unsigned)(mapping->host->i_size - offset);
2234 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2237 bytes_written = cifs_write(open_file, open_file->pid,
2238 write_data, to - from, &offset);
2239 cifsFileInfo_put(open_file);
2240 /* Does mm or vfs already set times? */
2241 inode->i_atime = inode->i_mtime = current_time(inode);
2242 if ((bytes_written > 0) && (offset))
2244 else if (bytes_written < 0)
2249 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2250 if (!is_retryable_error(rc))
2258 static struct cifs_writedata *
2259 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2260 pgoff_t end, pgoff_t *index,
2261 unsigned int *found_pages)
2263 struct cifs_writedata *wdata;
2265 wdata = cifs_writedata_alloc((unsigned int)tofind,
2266 cifs_writev_complete);
2270 *found_pages = find_get_pages_range_tag(mapping, index, end,
2271 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2276 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2277 struct address_space *mapping,
2278 struct writeback_control *wbc,
2279 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2281 unsigned int nr_pages = 0, i;
2284 for (i = 0; i < found_pages; i++) {
2285 page = wdata->pages[i];
2287 * At this point we hold neither the i_pages lock nor the
2288 * page lock: the page may be truncated or invalidated
2289 * (changing page->mapping to NULL), or even swizzled
2290 * back from swapper_space to tmpfs file mapping
2295 else if (!trylock_page(page))
2298 if (unlikely(page->mapping != mapping)) {
2303 if (!wbc->range_cyclic && page->index > end) {
2309 if (*next && (page->index != *next)) {
2310 /* Not next consecutive page */
2315 if (wbc->sync_mode != WB_SYNC_NONE)
2316 wait_on_page_writeback(page);
2318 if (PageWriteback(page) ||
2319 !clear_page_dirty_for_io(page)) {
2325 * This actually clears the dirty bit in the radix tree.
2326 * See cifs_writepage() for more commentary.
2328 set_page_writeback(page);
2329 if (page_offset(page) >= i_size_read(mapping->host)) {
2332 end_page_writeback(page);
2336 wdata->pages[i] = page;
2337 *next = page->index + 1;
2341 /* reset index to refind any pages skipped */
2343 *index = wdata->pages[0]->index + 1;
2345 /* put any pages we aren't going to use */
2346 for (i = nr_pages; i < found_pages; i++) {
2347 put_page(wdata->pages[i]);
2348 wdata->pages[i] = NULL;
2355 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2356 struct address_space *mapping, struct writeback_control *wbc)
2360 wdata->sync_mode = wbc->sync_mode;
2361 wdata->nr_pages = nr_pages;
2362 wdata->offset = page_offset(wdata->pages[0]);
2363 wdata->pagesz = PAGE_SIZE;
2364 wdata->tailsz = min(i_size_read(mapping->host) -
2365 page_offset(wdata->pages[nr_pages - 1]),
2367 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2368 wdata->pid = wdata->cfile->pid;
2370 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2374 if (wdata->cfile->invalidHandle)
2377 rc = wdata->server->ops->async_writev(wdata,
2378 cifs_writedata_release);
2383 static int cifs_writepages(struct address_space *mapping,
2384 struct writeback_control *wbc)
2386 struct inode *inode = mapping->host;
2387 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2388 struct TCP_Server_Info *server;
2389 bool done = false, scanned = false, range_whole = false;
2391 struct cifs_writedata *wdata;
2392 struct cifsFileInfo *cfile = NULL;
2398 * If wsize is smaller than the page cache size, default to writing
2399 * one page at a time via cifs_writepage
2401 if (cifs_sb->ctx->wsize < PAGE_SIZE)
2402 return generic_writepages(mapping, wbc);
2405 if (wbc->range_cyclic) {
2406 index = mapping->writeback_index; /* Start from prev offset */
2409 index = wbc->range_start >> PAGE_SHIFT;
2410 end = wbc->range_end >> PAGE_SHIFT;
2411 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2415 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2418 while (!done && index <= end) {
2419 unsigned int i, nr_pages, found_pages, wsize;
2420 pgoff_t next = 0, tofind, saved_index = index;
2421 struct cifs_credits credits_on_stack;
2422 struct cifs_credits *credits = &credits_on_stack;
2423 int get_file_rc = 0;
2426 cifsFileInfo_put(cfile);
2428 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2430 /* in case of an error store it to return later */
2434 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2441 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2443 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2448 add_credits_and_wake_if(server, credits, 0);
2452 if (found_pages == 0) {
2453 kref_put(&wdata->refcount, cifs_writedata_release);
2454 add_credits_and_wake_if(server, credits, 0);
2458 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2459 end, &index, &next, &done);
2461 /* nothing to write? */
2462 if (nr_pages == 0) {
2463 kref_put(&wdata->refcount, cifs_writedata_release);
2464 add_credits_and_wake_if(server, credits, 0);
2468 wdata->credits = credits_on_stack;
2469 wdata->cfile = cfile;
2470 wdata->server = server;
2473 if (!wdata->cfile) {
2474 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2476 if (is_retryable_error(get_file_rc))
2481 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2483 for (i = 0; i < nr_pages; ++i)
2484 unlock_page(wdata->pages[i]);
2486 /* send failure -- clean up the mess */
2488 add_credits_and_wake_if(server, &wdata->credits, 0);
2489 for (i = 0; i < nr_pages; ++i) {
2490 if (is_retryable_error(rc))
2491 redirty_page_for_writepage(wbc,
2494 SetPageError(wdata->pages[i]);
2495 end_page_writeback(wdata->pages[i]);
2496 put_page(wdata->pages[i]);
2498 if (!is_retryable_error(rc))
2499 mapping_set_error(mapping, rc);
2501 kref_put(&wdata->refcount, cifs_writedata_release);
2503 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2504 index = saved_index;
2508 /* Return immediately if we received a signal during writing */
2509 if (is_interrupt_error(rc)) {
2514 if (rc != 0 && saved_rc == 0)
2517 wbc->nr_to_write -= nr_pages;
2518 if (wbc->nr_to_write <= 0)
2524 if (!scanned && !done) {
2526 * We hit the last page and there is more work to be done: wrap
2527 * back to the start of the file
2537 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2538 mapping->writeback_index = index;
2541 cifsFileInfo_put(cfile);
2543 /* Indication to update ctime and mtime as close is deferred */
2544 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2549 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2555 /* BB add check for wbc flags */
2557 if (!PageUptodate(page))
2558 cifs_dbg(FYI, "ppw - page not up to date\n");
2561 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2563 * A writepage() implementation always needs to do either this,
2564 * or re-dirty the page with "redirty_page_for_writepage()" in
2565 * the case of a failure.
2567 * Just unlocking the page will cause the radix tree tag-bits
2568 * to fail to update with the state of the page correctly.
2570 set_page_writeback(page);
2572 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2573 if (is_retryable_error(rc)) {
2574 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2576 redirty_page_for_writepage(wbc, page);
2577 } else if (rc != 0) {
2579 mapping_set_error(page->mapping, rc);
2581 SetPageUptodate(page);
2583 end_page_writeback(page);
2589 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2591 int rc = cifs_writepage_locked(page, wbc);
2596 static int cifs_write_end(struct file *file, struct address_space *mapping,
2597 loff_t pos, unsigned len, unsigned copied,
2598 struct page *page, void *fsdata)
2601 struct inode *inode = mapping->host;
2602 struct cifsFileInfo *cfile = file->private_data;
2603 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2606 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2609 pid = current->tgid;
2611 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2614 if (PageChecked(page)) {
2616 SetPageUptodate(page);
2617 ClearPageChecked(page);
2618 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2619 SetPageUptodate(page);
2621 if (!PageUptodate(page)) {
2623 unsigned offset = pos & (PAGE_SIZE - 1);
2627 /* this is probably better than directly calling
2628 partialpage_write since in this function the file handle is
2629 known which we might as well leverage */
2630 /* BB check if anything else missing out of ppw
2631 such as updating last write time */
2632 page_data = kmap(page);
2633 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2634 /* if (rc < 0) should we set writebehind rc? */
2641 set_page_dirty(page);
2645 spin_lock(&inode->i_lock);
2646 if (pos > inode->i_size) {
2647 i_size_write(inode, pos);
2648 inode->i_blocks = (512 - 1 + pos) >> 9;
2650 spin_unlock(&inode->i_lock);
2655 /* Indication to update ctime and mtime as close is deferred */
2656 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2661 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2666 struct cifs_tcon *tcon;
2667 struct TCP_Server_Info *server;
2668 struct cifsFileInfo *smbfile = file->private_data;
2669 struct inode *inode = file_inode(file);
2670 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2672 rc = file_write_and_wait_range(file, start, end);
2674 trace_cifs_fsync_err(inode->i_ino, rc);
2680 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2683 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2684 rc = cifs_zap_mapping(inode);
2686 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2687 rc = 0; /* don't care about it in fsync */
2691 tcon = tlink_tcon(smbfile->tlink);
2692 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2693 server = tcon->ses->server;
2694 if (server->ops->flush)
2695 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2704 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2708 struct cifs_tcon *tcon;
2709 struct TCP_Server_Info *server;
2710 struct cifsFileInfo *smbfile = file->private_data;
2711 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2713 rc = file_write_and_wait_range(file, start, end);
2715 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2721 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2724 tcon = tlink_tcon(smbfile->tlink);
2725 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2726 server = tcon->ses->server;
2727 if (server->ops->flush)
2728 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2738 * As file closes, flush all cached write data for this inode checking
2739 * for write behind errors.
2741 int cifs_flush(struct file *file, fl_owner_t id)
2743 struct inode *inode = file_inode(file);
2746 if (file->f_mode & FMODE_WRITE)
2747 rc = filemap_write_and_wait(inode->i_mapping);
2749 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2751 trace_cifs_flush_err(inode->i_ino, rc);
2756 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2761 for (i = 0; i < num_pages; i++) {
2762 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2765 * save number of pages we have already allocated and
2766 * return with ENOMEM error
2775 for (i = 0; i < num_pages; i++)
2782 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2787 clen = min_t(const size_t, len, wsize);
2788 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2797 cifs_uncached_writedata_release(struct kref *refcount)
2800 struct cifs_writedata *wdata = container_of(refcount,
2801 struct cifs_writedata, refcount);
2803 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2804 for (i = 0; i < wdata->nr_pages; i++)
2805 put_page(wdata->pages[i]);
2806 cifs_writedata_release(refcount);
2809 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2812 cifs_uncached_writev_complete(struct work_struct *work)
2814 struct cifs_writedata *wdata = container_of(work,
2815 struct cifs_writedata, work);
2816 struct inode *inode = d_inode(wdata->cfile->dentry);
2817 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2819 spin_lock(&inode->i_lock);
2820 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2821 if (cifsi->server_eof > inode->i_size)
2822 i_size_write(inode, cifsi->server_eof);
2823 spin_unlock(&inode->i_lock);
2825 complete(&wdata->done);
2826 collect_uncached_write_data(wdata->ctx);
2827 /* the below call can possibly free the last ref to aio ctx */
2828 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2832 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2833 size_t *len, unsigned long *num_pages)
2835 size_t save_len, copied, bytes, cur_len = *len;
2836 unsigned long i, nr_pages = *num_pages;
2839 for (i = 0; i < nr_pages; i++) {
2840 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2841 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2844 * If we didn't copy as much as we expected, then that
2845 * may mean we trod into an unmapped area. Stop copying
2846 * at that point. On the next pass through the big
2847 * loop, we'll likely end up getting a zero-length
2848 * write and bailing out of it.
2853 cur_len = save_len - cur_len;
2857 * If we have no data to send, then that probably means that
2858 * the copy above failed altogether. That's most likely because
2859 * the address in the iovec was bogus. Return -EFAULT and let
2860 * the caller free anything we allocated and bail out.
2866 * i + 1 now represents the number of pages we actually used in
2867 * the copy phase above.
2874 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2875 struct cifs_aio_ctx *ctx)
2878 struct cifs_credits credits;
2880 struct TCP_Server_Info *server = wdata->server;
2883 if (wdata->cfile->invalidHandle) {
2884 rc = cifs_reopen_file(wdata->cfile, false);
2893 * Wait for credits to resend this wdata.
2894 * Note: we are attempting to resend the whole wdata not in
2898 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2903 if (wsize < wdata->bytes) {
2904 add_credits_and_wake_if(server, &credits, 0);
2907 } while (wsize < wdata->bytes);
2908 wdata->credits = credits;
2910 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2913 if (wdata->cfile->invalidHandle)
2916 #ifdef CONFIG_CIFS_SMB_DIRECT
2918 wdata->mr->need_invalidate = true;
2919 smbd_deregister_mr(wdata->mr);
2923 rc = server->ops->async_writev(wdata,
2924 cifs_uncached_writedata_release);
2928 /* If the write was successfully sent, we are done */
2930 list_add_tail(&wdata->list, wdata_list);
2934 /* Roll back credits and retry if needed */
2935 add_credits_and_wake_if(server, &wdata->credits, 0);
2936 } while (rc == -EAGAIN);
2939 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2944 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2945 struct cifsFileInfo *open_file,
2946 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2947 struct cifs_aio_ctx *ctx)
2951 unsigned long nr_pages, num_pages, i;
2952 struct cifs_writedata *wdata;
2953 struct iov_iter saved_from = *from;
2954 loff_t saved_offset = offset;
2956 struct TCP_Server_Info *server;
2957 struct page **pagevec;
2961 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2962 pid = open_file->pid;
2964 pid = current->tgid;
2966 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2971 struct cifs_credits credits_on_stack;
2972 struct cifs_credits *credits = &credits_on_stack;
2974 if (open_file->invalidHandle) {
2975 rc = cifs_reopen_file(open_file, false);
2982 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2987 cur_len = min_t(const size_t, len, wsize);
2989 if (ctx->direct_io) {
2992 result = iov_iter_get_pages_alloc(
2993 from, &pagevec, cur_len, &start);
2996 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2997 result, iov_iter_type(from),
2998 from->iov_offset, from->count);
3002 add_credits_and_wake_if(server, credits, 0);
3005 cur_len = (size_t)result;
3006 iov_iter_advance(from, cur_len);
3009 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3011 wdata = cifs_writedata_direct_alloc(pagevec,
3012 cifs_uncached_writev_complete);
3015 add_credits_and_wake_if(server, credits, 0);
3020 wdata->page_offset = start;
3023 cur_len - (PAGE_SIZE - start) -
3024 (nr_pages - 2) * PAGE_SIZE :
3027 nr_pages = get_numpages(wsize, len, &cur_len);
3028 wdata = cifs_writedata_alloc(nr_pages,
3029 cifs_uncached_writev_complete);
3032 add_credits_and_wake_if(server, credits, 0);
3036 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3038 kvfree(wdata->pages);
3040 add_credits_and_wake_if(server, credits, 0);
3044 num_pages = nr_pages;
3045 rc = wdata_fill_from_iovec(
3046 wdata, from, &cur_len, &num_pages);
3048 for (i = 0; i < nr_pages; i++)
3049 put_page(wdata->pages[i]);
3050 kvfree(wdata->pages);
3052 add_credits_and_wake_if(server, credits, 0);
3057 * Bring nr_pages down to the number of pages we
3058 * actually used, and free any pages that we didn't use.
3060 for ( ; nr_pages > num_pages; nr_pages--)
3061 put_page(wdata->pages[nr_pages - 1]);
3063 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3066 wdata->sync_mode = WB_SYNC_ALL;
3067 wdata->nr_pages = nr_pages;
3068 wdata->offset = (__u64)offset;
3069 wdata->cfile = cifsFileInfo_get(open_file);
3070 wdata->server = server;
3072 wdata->bytes = cur_len;
3073 wdata->pagesz = PAGE_SIZE;
3074 wdata->credits = credits_on_stack;
3076 kref_get(&ctx->refcount);
3078 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3081 if (wdata->cfile->invalidHandle)
3084 rc = server->ops->async_writev(wdata,
3085 cifs_uncached_writedata_release);
3089 add_credits_and_wake_if(server, &wdata->credits, 0);
3090 kref_put(&wdata->refcount,
3091 cifs_uncached_writedata_release);
3092 if (rc == -EAGAIN) {
3094 iov_iter_advance(from, offset - saved_offset);
3100 list_add_tail(&wdata->list, wdata_list);
3109 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3111 struct cifs_writedata *wdata, *tmp;
3112 struct cifs_tcon *tcon;
3113 struct cifs_sb_info *cifs_sb;
3114 struct dentry *dentry = ctx->cfile->dentry;
3117 tcon = tlink_tcon(ctx->cfile->tlink);
3118 cifs_sb = CIFS_SB(dentry->d_sb);
3120 mutex_lock(&ctx->aio_mutex);
3122 if (list_empty(&ctx->list)) {
3123 mutex_unlock(&ctx->aio_mutex);
3129 * Wait for and collect replies for any successful sends in order of
3130 * increasing offset. Once an error is hit, then return without waiting
3131 * for any more replies.
3134 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3136 if (!try_wait_for_completion(&wdata->done)) {
3137 mutex_unlock(&ctx->aio_mutex);
3144 ctx->total_len += wdata->bytes;
3146 /* resend call if it's a retryable error */
3147 if (rc == -EAGAIN) {
3148 struct list_head tmp_list;
3149 struct iov_iter tmp_from = ctx->iter;
3151 INIT_LIST_HEAD(&tmp_list);
3152 list_del_init(&wdata->list);
3155 rc = cifs_resend_wdata(
3156 wdata, &tmp_list, ctx);
3158 iov_iter_advance(&tmp_from,
3159 wdata->offset - ctx->pos);
3161 rc = cifs_write_from_iter(wdata->offset,
3162 wdata->bytes, &tmp_from,
3163 ctx->cfile, cifs_sb, &tmp_list,
3166 kref_put(&wdata->refcount,
3167 cifs_uncached_writedata_release);
3170 list_splice(&tmp_list, &ctx->list);
3174 list_del_init(&wdata->list);
3175 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3178 cifs_stats_bytes_written(tcon, ctx->total_len);
3179 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3181 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3183 mutex_unlock(&ctx->aio_mutex);
3185 if (ctx->iocb && ctx->iocb->ki_complete)
3186 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3188 complete(&ctx->done);
3191 static ssize_t __cifs_writev(
3192 struct kiocb *iocb, struct iov_iter *from, bool direct)
3194 struct file *file = iocb->ki_filp;
3195 ssize_t total_written = 0;
3196 struct cifsFileInfo *cfile;
3197 struct cifs_tcon *tcon;
3198 struct cifs_sb_info *cifs_sb;
3199 struct cifs_aio_ctx *ctx;
3200 struct iov_iter saved_from = *from;
3201 size_t len = iov_iter_count(from);
3205 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3206 * In this case, fall back to non-direct write function.
3207 * this could be improved by getting pages directly in ITER_KVEC
3209 if (direct && iov_iter_is_kvec(from)) {
3210 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3214 rc = generic_write_checks(iocb, from);
3218 cifs_sb = CIFS_FILE_SB(file);
3219 cfile = file->private_data;
3220 tcon = tlink_tcon(cfile->tlink);
3222 if (!tcon->ses->server->ops->async_writev)
3225 ctx = cifs_aio_ctx_alloc();
3229 ctx->cfile = cifsFileInfo_get(cfile);
3231 if (!is_sync_kiocb(iocb))
3234 ctx->pos = iocb->ki_pos;
3237 ctx->direct_io = true;
3241 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3243 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3248 /* grab a lock here due to read response handlers can access ctx */
3249 mutex_lock(&ctx->aio_mutex);
3251 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3252 cfile, cifs_sb, &ctx->list, ctx);
3255 * If at least one write was successfully sent, then discard any rc
3256 * value from the later writes. If the other write succeeds, then
3257 * we'll end up returning whatever was written. If it fails, then
3258 * we'll get a new rc value from that.
3260 if (!list_empty(&ctx->list))
3263 mutex_unlock(&ctx->aio_mutex);
3266 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3270 if (!is_sync_kiocb(iocb)) {
3271 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3272 return -EIOCBQUEUED;
3275 rc = wait_for_completion_killable(&ctx->done);
3277 mutex_lock(&ctx->aio_mutex);
3278 ctx->rc = rc = -EINTR;
3279 total_written = ctx->total_len;
3280 mutex_unlock(&ctx->aio_mutex);
3283 total_written = ctx->total_len;
3286 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3288 if (unlikely(!total_written))
3291 iocb->ki_pos += total_written;
3292 return total_written;
3295 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3297 return __cifs_writev(iocb, from, true);
3300 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3302 return __cifs_writev(iocb, from, false);
3306 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3308 struct file *file = iocb->ki_filp;
3309 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3310 struct inode *inode = file->f_mapping->host;
3311 struct cifsInodeInfo *cinode = CIFS_I(inode);
3312 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3317 * We need to hold the sem to be sure nobody modifies lock list
3318 * with a brlock that prevents writing.
3320 down_read(&cinode->lock_sem);
3322 rc = generic_write_checks(iocb, from);
3326 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3327 server->vals->exclusive_lock_type, 0,
3328 NULL, CIFS_WRITE_OP))
3329 rc = __generic_file_write_iter(iocb, from);
3333 up_read(&cinode->lock_sem);
3334 inode_unlock(inode);
3337 rc = generic_write_sync(iocb, rc);
3342 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3344 struct inode *inode = file_inode(iocb->ki_filp);
3345 struct cifsInodeInfo *cinode = CIFS_I(inode);
3346 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3347 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3348 iocb->ki_filp->private_data;
3349 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3352 written = cifs_get_writer(cinode);
3356 if (CIFS_CACHE_WRITE(cinode)) {
3357 if (cap_unix(tcon->ses) &&
3358 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3359 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3360 written = generic_file_write_iter(iocb, from);
3363 written = cifs_writev(iocb, from);
3367 * For non-oplocked files in strict cache mode we need to write the data
3368 * to the server exactly from the pos to pos+len-1 rather than flush all
3369 * affected pages because it may cause a error with mandatory locks on
3370 * these pages but not on the region from pos to ppos+len-1.
3372 written = cifs_user_writev(iocb, from);
3373 if (CIFS_CACHE_READ(cinode)) {
3375 * We have read level caching and we have just sent a write
3376 * request to the server thus making data in the cache stale.
3377 * Zap the cache and set oplock/lease level to NONE to avoid
3378 * reading stale data from the cache. All subsequent read
3379 * operations will read new data from the server.
3381 cifs_zap_mapping(inode);
3382 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3387 cifs_put_writer(cinode);
3391 static struct cifs_readdata *
3392 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3394 struct cifs_readdata *rdata;
3396 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3397 if (rdata != NULL) {
3398 rdata->pages = pages;
3399 kref_init(&rdata->refcount);
3400 INIT_LIST_HEAD(&rdata->list);
3401 init_completion(&rdata->done);
3402 INIT_WORK(&rdata->work, complete);
3408 static struct cifs_readdata *
3409 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3411 struct page **pages =
3412 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3413 struct cifs_readdata *ret = NULL;
3416 ret = cifs_readdata_direct_alloc(pages, complete);
3425 cifs_readdata_release(struct kref *refcount)
3427 struct cifs_readdata *rdata = container_of(refcount,
3428 struct cifs_readdata, refcount);
3429 #ifdef CONFIG_CIFS_SMB_DIRECT
3431 smbd_deregister_mr(rdata->mr);
3436 cifsFileInfo_put(rdata->cfile);
3438 kvfree(rdata->pages);
3443 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3449 for (i = 0; i < nr_pages; i++) {
3450 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3455 rdata->pages[i] = page;
3459 unsigned int nr_page_failed = i;
3461 for (i = 0; i < nr_page_failed; i++) {
3462 put_page(rdata->pages[i]);
3463 rdata->pages[i] = NULL;
3470 cifs_uncached_readdata_release(struct kref *refcount)
3472 struct cifs_readdata *rdata = container_of(refcount,
3473 struct cifs_readdata, refcount);
3476 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3477 for (i = 0; i < rdata->nr_pages; i++) {
3478 put_page(rdata->pages[i]);
3480 cifs_readdata_release(refcount);
3484 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3485 * @rdata: the readdata response with list of pages holding data
3486 * @iter: destination for our data
3488 * This function copies data from a list of pages in a readdata response into
3489 * an array of iovecs. It will first calculate where the data should go
3490 * based on the info in the readdata and then copy the data into that spot.
3493 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3495 size_t remaining = rdata->got_bytes;
3498 for (i = 0; i < rdata->nr_pages; i++) {
3499 struct page *page = rdata->pages[i];
3500 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3503 if (unlikely(iov_iter_is_pipe(iter))) {
3504 void *addr = kmap_atomic(page);
3506 written = copy_to_iter(addr, copy, iter);
3507 kunmap_atomic(addr);
3509 written = copy_page_to_iter(page, 0, copy, iter);
3510 remaining -= written;
3511 if (written < copy && iov_iter_count(iter) > 0)
3514 return remaining ? -EFAULT : 0;
3517 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3520 cifs_uncached_readv_complete(struct work_struct *work)
3522 struct cifs_readdata *rdata = container_of(work,
3523 struct cifs_readdata, work);
3525 complete(&rdata->done);
3526 collect_uncached_read_data(rdata->ctx);
3527 /* the below call can possibly free the last ref to aio ctx */
3528 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3532 uncached_fill_pages(struct TCP_Server_Info *server,
3533 struct cifs_readdata *rdata, struct iov_iter *iter,
3538 unsigned int nr_pages = rdata->nr_pages;
3539 unsigned int page_offset = rdata->page_offset;
3541 rdata->got_bytes = 0;
3542 rdata->tailsz = PAGE_SIZE;
3543 for (i = 0; i < nr_pages; i++) {
3544 struct page *page = rdata->pages[i];
3546 unsigned int segment_size = rdata->pagesz;
3549 segment_size -= page_offset;
3555 /* no need to hold page hostage */
3556 rdata->pages[i] = NULL;
3563 if (len >= segment_size)
3564 /* enough data to fill the page */
3567 rdata->tailsz = len;
3571 result = copy_page_from_iter(
3572 page, page_offset, n, iter);
3573 #ifdef CONFIG_CIFS_SMB_DIRECT
3578 result = cifs_read_page_from_socket(
3579 server, page, page_offset, n);
3583 rdata->got_bytes += result;
3586 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3587 rdata->got_bytes : result;
3591 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3592 struct cifs_readdata *rdata, unsigned int len)
3594 return uncached_fill_pages(server, rdata, NULL, len);
3598 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3599 struct cifs_readdata *rdata,
3600 struct iov_iter *iter)
3602 return uncached_fill_pages(server, rdata, iter, iter->count);
3605 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3606 struct list_head *rdata_list,
3607 struct cifs_aio_ctx *ctx)
3610 struct cifs_credits credits;
3612 struct TCP_Server_Info *server;
3614 /* XXX: should we pick a new channel here? */
3615 server = rdata->server;
3618 if (rdata->cfile->invalidHandle) {
3619 rc = cifs_reopen_file(rdata->cfile, true);
3627 * Wait for credits to resend this rdata.
3628 * Note: we are attempting to resend the whole rdata not in
3632 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3638 if (rsize < rdata->bytes) {
3639 add_credits_and_wake_if(server, &credits, 0);
3642 } while (rsize < rdata->bytes);
3643 rdata->credits = credits;
3645 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3647 if (rdata->cfile->invalidHandle)
3650 #ifdef CONFIG_CIFS_SMB_DIRECT
3652 rdata->mr->need_invalidate = true;
3653 smbd_deregister_mr(rdata->mr);
3657 rc = server->ops->async_readv(rdata);
3661 /* If the read was successfully sent, we are done */
3663 /* Add to aio pending list */
3664 list_add_tail(&rdata->list, rdata_list);
3668 /* Roll back credits and retry if needed */
3669 add_credits_and_wake_if(server, &rdata->credits, 0);
3670 } while (rc == -EAGAIN);
3673 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3678 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3679 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3680 struct cifs_aio_ctx *ctx)
3682 struct cifs_readdata *rdata;
3683 unsigned int npages, rsize;
3684 struct cifs_credits credits_on_stack;
3685 struct cifs_credits *credits = &credits_on_stack;
3689 struct TCP_Server_Info *server;
3690 struct page **pagevec;
3692 struct iov_iter direct_iov = ctx->iter;
3694 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3696 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3697 pid = open_file->pid;
3699 pid = current->tgid;
3702 iov_iter_advance(&direct_iov, offset - ctx->pos);
3705 if (open_file->invalidHandle) {
3706 rc = cifs_reopen_file(open_file, true);
3713 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3718 cur_len = min_t(const size_t, len, rsize);
3720 if (ctx->direct_io) {
3723 result = iov_iter_get_pages_alloc(
3724 &direct_iov, &pagevec,
3728 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3729 result, iov_iter_type(&direct_iov),
3730 direct_iov.iov_offset,
3735 add_credits_and_wake_if(server, credits, 0);
3738 cur_len = (size_t)result;
3739 iov_iter_advance(&direct_iov, cur_len);
3741 rdata = cifs_readdata_direct_alloc(
3742 pagevec, cifs_uncached_readv_complete);
3744 add_credits_and_wake_if(server, credits, 0);
3749 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3750 rdata->page_offset = start;
3751 rdata->tailsz = npages > 1 ?
3752 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3757 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3758 /* allocate a readdata struct */
3759 rdata = cifs_readdata_alloc(npages,
3760 cifs_uncached_readv_complete);
3762 add_credits_and_wake_if(server, credits, 0);
3767 rc = cifs_read_allocate_pages(rdata, npages);
3769 kvfree(rdata->pages);
3771 add_credits_and_wake_if(server, credits, 0);
3775 rdata->tailsz = PAGE_SIZE;
3778 rdata->server = server;
3779 rdata->cfile = cifsFileInfo_get(open_file);
3780 rdata->nr_pages = npages;
3781 rdata->offset = offset;
3782 rdata->bytes = cur_len;
3784 rdata->pagesz = PAGE_SIZE;
3785 rdata->read_into_pages = cifs_uncached_read_into_pages;
3786 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3787 rdata->credits = credits_on_stack;
3789 kref_get(&ctx->refcount);
3791 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3794 if (rdata->cfile->invalidHandle)
3797 rc = server->ops->async_readv(rdata);
3801 add_credits_and_wake_if(server, &rdata->credits, 0);
3802 kref_put(&rdata->refcount,
3803 cifs_uncached_readdata_release);
3804 if (rc == -EAGAIN) {
3805 iov_iter_revert(&direct_iov, cur_len);
3811 list_add_tail(&rdata->list, rdata_list);
3820 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3822 struct cifs_readdata *rdata, *tmp;
3823 struct iov_iter *to = &ctx->iter;
3824 struct cifs_sb_info *cifs_sb;
3827 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3829 mutex_lock(&ctx->aio_mutex);
3831 if (list_empty(&ctx->list)) {
3832 mutex_unlock(&ctx->aio_mutex);
3837 /* the loop below should proceed in the order of increasing offsets */
3839 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3841 if (!try_wait_for_completion(&rdata->done)) {
3842 mutex_unlock(&ctx->aio_mutex);
3846 if (rdata->result == -EAGAIN) {
3847 /* resend call if it's a retryable error */
3848 struct list_head tmp_list;
3849 unsigned int got_bytes = rdata->got_bytes;
3851 list_del_init(&rdata->list);
3852 INIT_LIST_HEAD(&tmp_list);
3855 * Got a part of data and then reconnect has
3856 * happened -- fill the buffer and continue
3859 if (got_bytes && got_bytes < rdata->bytes) {
3861 if (!ctx->direct_io)
3862 rc = cifs_readdata_to_iov(rdata, to);
3864 kref_put(&rdata->refcount,
3865 cifs_uncached_readdata_release);
3870 if (ctx->direct_io) {
3872 * Re-use rdata as this is a
3875 rc = cifs_resend_rdata(
3879 rc = cifs_send_async_read(
3880 rdata->offset + got_bytes,
3881 rdata->bytes - got_bytes,
3882 rdata->cfile, cifs_sb,
3885 kref_put(&rdata->refcount,
3886 cifs_uncached_readdata_release);
3889 list_splice(&tmp_list, &ctx->list);
3892 } else if (rdata->result)
3894 else if (!ctx->direct_io)
3895 rc = cifs_readdata_to_iov(rdata, to);
3897 /* if there was a short read -- discard anything left */
3898 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3901 ctx->total_len += rdata->got_bytes;
3903 list_del_init(&rdata->list);
3904 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3907 if (!ctx->direct_io)
3908 ctx->total_len = ctx->len - iov_iter_count(to);
3910 /* mask nodata case */
3914 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3916 mutex_unlock(&ctx->aio_mutex);
3918 if (ctx->iocb && ctx->iocb->ki_complete)
3919 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3921 complete(&ctx->done);
3924 static ssize_t __cifs_readv(
3925 struct kiocb *iocb, struct iov_iter *to, bool direct)
3928 struct file *file = iocb->ki_filp;
3929 struct cifs_sb_info *cifs_sb;
3930 struct cifsFileInfo *cfile;
3931 struct cifs_tcon *tcon;
3932 ssize_t rc, total_read = 0;
3933 loff_t offset = iocb->ki_pos;
3934 struct cifs_aio_ctx *ctx;
3937 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3938 * fall back to data copy read path
3939 * this could be improved by getting pages directly in ITER_KVEC
3941 if (direct && iov_iter_is_kvec(to)) {
3942 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3946 len = iov_iter_count(to);
3950 cifs_sb = CIFS_FILE_SB(file);
3951 cfile = file->private_data;
3952 tcon = tlink_tcon(cfile->tlink);
3954 if (!tcon->ses->server->ops->async_readv)
3957 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3958 cifs_dbg(FYI, "attempting read on write only file instance\n");
3960 ctx = cifs_aio_ctx_alloc();
3964 ctx->cfile = cifsFileInfo_get(cfile);
3966 if (!is_sync_kiocb(iocb))
3969 if (iter_is_iovec(to))
3970 ctx->should_dirty = true;
3974 ctx->direct_io = true;
3978 rc = setup_aio_ctx_iter(ctx, to, READ);
3980 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3986 /* grab a lock here due to read response handlers can access ctx */
3987 mutex_lock(&ctx->aio_mutex);
3989 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3991 /* if at least one read request send succeeded, then reset rc */
3992 if (!list_empty(&ctx->list))
3995 mutex_unlock(&ctx->aio_mutex);
3998 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4002 if (!is_sync_kiocb(iocb)) {
4003 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4004 return -EIOCBQUEUED;
4007 rc = wait_for_completion_killable(&ctx->done);
4009 mutex_lock(&ctx->aio_mutex);
4010 ctx->rc = rc = -EINTR;
4011 total_read = ctx->total_len;
4012 mutex_unlock(&ctx->aio_mutex);
4015 total_read = ctx->total_len;
4018 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4021 iocb->ki_pos += total_read;
4027 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4029 return __cifs_readv(iocb, to, true);
4032 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4034 return __cifs_readv(iocb, to, false);
4038 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4040 struct inode *inode = file_inode(iocb->ki_filp);
4041 struct cifsInodeInfo *cinode = CIFS_I(inode);
4042 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4043 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4044 iocb->ki_filp->private_data;
4045 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4049 * In strict cache mode we need to read from the server all the time
4050 * if we don't have level II oplock because the server can delay mtime
4051 * change - so we can't make a decision about inode invalidating.
4052 * And we can also fail with pagereading if there are mandatory locks
4053 * on pages affected by this read but not on the region from pos to
4056 if (!CIFS_CACHE_READ(cinode))
4057 return cifs_user_readv(iocb, to);
4059 if (cap_unix(tcon->ses) &&
4060 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4061 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4062 return generic_file_read_iter(iocb, to);
4065 * We need to hold the sem to be sure nobody modifies lock list
4066 * with a brlock that prevents reading.
4068 down_read(&cinode->lock_sem);
4069 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4070 tcon->ses->server->vals->shared_lock_type,
4071 0, NULL, CIFS_READ_OP))
4072 rc = generic_file_read_iter(iocb, to);
4073 up_read(&cinode->lock_sem);
4078 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4081 unsigned int bytes_read = 0;
4082 unsigned int total_read;
4083 unsigned int current_read_size;
4085 struct cifs_sb_info *cifs_sb;
4086 struct cifs_tcon *tcon;
4087 struct TCP_Server_Info *server;
4090 struct cifsFileInfo *open_file;
4091 struct cifs_io_parms io_parms = {0};
4092 int buf_type = CIFS_NO_BUFFER;
4096 cifs_sb = CIFS_FILE_SB(file);
4098 /* FIXME: set up handlers for larger reads and/or convert to async */
4099 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4101 if (file->private_data == NULL) {
4106 open_file = file->private_data;
4107 tcon = tlink_tcon(open_file->tlink);
4108 server = cifs_pick_channel(tcon->ses);
4110 if (!server->ops->sync_read) {
4115 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4116 pid = open_file->pid;
4118 pid = current->tgid;
4120 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4121 cifs_dbg(FYI, "attempting read on write only file instance\n");
4123 for (total_read = 0, cur_offset = read_data; read_size > total_read;
4124 total_read += bytes_read, cur_offset += bytes_read) {
4126 current_read_size = min_t(uint, read_size - total_read,
4129 * For windows me and 9x we do not want to request more
4130 * than it negotiated since it will refuse the read
4133 if (!(tcon->ses->capabilities &
4134 tcon->ses->server->vals->cap_large_files)) {
4135 current_read_size = min_t(uint,
4136 current_read_size, CIFSMaxBufSize);
4138 if (open_file->invalidHandle) {
4139 rc = cifs_reopen_file(open_file, true);
4144 io_parms.tcon = tcon;
4145 io_parms.offset = *offset;
4146 io_parms.length = current_read_size;
4147 io_parms.server = server;
4148 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4149 &bytes_read, &cur_offset,
4151 } while (rc == -EAGAIN);
4153 if (rc || (bytes_read == 0)) {
4161 cifs_stats_bytes_read(tcon, total_read);
4162 *offset += bytes_read;
4170 * If the page is mmap'ed into a process' page tables, then we need to make
4171 * sure that it doesn't change while being written back.
4174 cifs_page_mkwrite(struct vm_fault *vmf)
4176 struct page *page = vmf->page;
4177 struct file *file = vmf->vma->vm_file;
4178 struct inode *inode = file_inode(file);
4180 cifs_fscache_wait_on_page_write(inode, page);
4183 return VM_FAULT_LOCKED;
4186 static const struct vm_operations_struct cifs_file_vm_ops = {
4187 .fault = filemap_fault,
4188 .map_pages = filemap_map_pages,
4189 .page_mkwrite = cifs_page_mkwrite,
4192 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4195 struct inode *inode = file_inode(file);
4199 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4200 rc = cifs_zap_mapping(inode);
4202 rc = generic_file_mmap(file, vma);
4204 vma->vm_ops = &cifs_file_vm_ops;
4210 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4216 rc = cifs_revalidate_file(file);
4218 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4221 rc = generic_file_mmap(file, vma);
4223 vma->vm_ops = &cifs_file_vm_ops;
4230 cifs_readv_complete(struct work_struct *work)
4232 unsigned int i, got_bytes;
4233 struct cifs_readdata *rdata = container_of(work,
4234 struct cifs_readdata, work);
4236 got_bytes = rdata->got_bytes;
4237 for (i = 0; i < rdata->nr_pages; i++) {
4238 struct page *page = rdata->pages[i];
4240 lru_cache_add(page);
4242 if (rdata->result == 0 ||
4243 (rdata->result == -EAGAIN && got_bytes)) {
4244 flush_dcache_page(page);
4245 SetPageUptodate(page);
4251 if (rdata->result == 0 ||
4252 (rdata->result == -EAGAIN && got_bytes))
4253 cifs_readpage_to_fscache(rdata->mapping->host, page);
4255 cifs_fscache_uncache_page(rdata->mapping->host, page);
4257 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4260 rdata->pages[i] = NULL;
4262 kref_put(&rdata->refcount, cifs_readdata_release);
4266 readpages_fill_pages(struct TCP_Server_Info *server,
4267 struct cifs_readdata *rdata, struct iov_iter *iter,
4274 unsigned int nr_pages = rdata->nr_pages;
4275 unsigned int page_offset = rdata->page_offset;
4277 /* determine the eof that the server (probably) has */
4278 eof = CIFS_I(rdata->mapping->host)->server_eof;
4279 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4280 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4282 rdata->got_bytes = 0;
4283 rdata->tailsz = PAGE_SIZE;
4284 for (i = 0; i < nr_pages; i++) {
4285 struct page *page = rdata->pages[i];
4286 unsigned int to_read = rdata->pagesz;
4290 to_read -= page_offset;
4296 if (len >= to_read) {
4298 } else if (len > 0) {
4299 /* enough for partial page, fill and zero the rest */
4300 zero_user(page, len + page_offset, to_read - len);
4301 n = rdata->tailsz = len;
4303 } else if (page->index > eof_index) {
4305 * The VFS will not try to do readahead past the
4306 * i_size, but it's possible that we have outstanding
4307 * writes with gaps in the middle and the i_size hasn't
4308 * caught up yet. Populate those with zeroed out pages
4309 * to prevent the VFS from repeatedly attempting to
4310 * fill them until the writes are flushed.
4312 zero_user(page, 0, PAGE_SIZE);
4313 lru_cache_add(page);
4314 flush_dcache_page(page);
4315 SetPageUptodate(page);
4318 rdata->pages[i] = NULL;
4322 /* no need to hold page hostage */
4323 lru_cache_add(page);
4326 rdata->pages[i] = NULL;
4332 result = copy_page_from_iter(
4333 page, page_offset, n, iter);
4334 #ifdef CONFIG_CIFS_SMB_DIRECT
4339 result = cifs_read_page_from_socket(
4340 server, page, page_offset, n);
4344 rdata->got_bytes += result;
4347 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4348 rdata->got_bytes : result;
4352 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4353 struct cifs_readdata *rdata, unsigned int len)
4355 return readpages_fill_pages(server, rdata, NULL, len);
4359 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4360 struct cifs_readdata *rdata,
4361 struct iov_iter *iter)
4363 return readpages_fill_pages(server, rdata, iter, iter->count);
4367 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4368 unsigned int rsize, struct list_head *tmplist,
4369 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4371 struct page *page, *tpage;
4372 unsigned int expected_index;
4374 gfp_t gfp = readahead_gfp_mask(mapping);
4376 INIT_LIST_HEAD(tmplist);
4378 page = lru_to_page(page_list);
4381 * Lock the page and put it in the cache. Since no one else
4382 * should have access to this page, we're safe to simply set
4383 * PG_locked without checking it first.
4385 __SetPageLocked(page);
4386 rc = add_to_page_cache_locked(page, mapping,
4389 /* give up if we can't stick it in the cache */
4391 __ClearPageLocked(page);
4395 /* move first page to the tmplist */
4396 *offset = (loff_t)page->index << PAGE_SHIFT;
4399 list_move_tail(&page->lru, tmplist);
4401 /* now try and add more pages onto the request */
4402 expected_index = page->index + 1;
4403 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4404 /* discontinuity ? */
4405 if (page->index != expected_index)
4408 /* would this page push the read over the rsize? */
4409 if (*bytes + PAGE_SIZE > rsize)
4412 __SetPageLocked(page);
4413 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4415 __ClearPageLocked(page);
4418 list_move_tail(&page->lru, tmplist);
4419 (*bytes) += PAGE_SIZE;
4426 static int cifs_readpages(struct file *file, struct address_space *mapping,
4427 struct list_head *page_list, unsigned num_pages)
4431 struct list_head tmplist;
4432 struct cifsFileInfo *open_file = file->private_data;
4433 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4434 struct TCP_Server_Info *server;
4440 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4441 * immediately if the cookie is negative
4443 * After this point, every page in the list might have PG_fscache set,
4444 * so we will need to clean that up off of every page we don't use.
4446 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4453 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4454 pid = open_file->pid;
4456 pid = current->tgid;
4459 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4461 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4462 __func__, file, mapping, num_pages);
4465 * Start with the page at end of list and move it to private
4466 * list. Do the same with any following pages until we hit
4467 * the rsize limit, hit an index discontinuity, or run out of
4468 * pages. Issue the async read and then start the loop again
4469 * until the list is empty.
4471 * Note that list order is important. The page_list is in
4472 * the order of declining indexes. When we put the pages in
4473 * the rdata->pages, then we want them in increasing order.
4475 while (!list_empty(page_list) && !err) {
4476 unsigned int i, nr_pages, bytes, rsize;
4478 struct page *page, *tpage;
4479 struct cifs_readdata *rdata;
4480 struct cifs_credits credits_on_stack;
4481 struct cifs_credits *credits = &credits_on_stack;
4483 if (open_file->invalidHandle) {
4484 rc = cifs_reopen_file(open_file, true);
4491 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4497 * Give up immediately if rsize is too small to read an entire
4498 * page. The VFS will fall back to readpage. We should never
4499 * reach this point however since we set ra_pages to 0 when the
4500 * rsize is smaller than a cache page.
4502 if (unlikely(rsize < PAGE_SIZE)) {
4503 add_credits_and_wake_if(server, credits, 0);
4509 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4510 &nr_pages, &offset, &bytes);
4512 add_credits_and_wake_if(server, credits, 0);
4516 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4518 /* best to give up if we're out of mem */
4519 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4520 list_del(&page->lru);
4521 lru_cache_add(page);
4526 add_credits_and_wake_if(server, credits, 0);
4530 rdata->cfile = cifsFileInfo_get(open_file);
4531 rdata->server = server;
4532 rdata->mapping = mapping;
4533 rdata->offset = offset;
4534 rdata->bytes = bytes;
4536 rdata->pagesz = PAGE_SIZE;
4537 rdata->tailsz = PAGE_SIZE;
4538 rdata->read_into_pages = cifs_readpages_read_into_pages;
4539 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4540 rdata->credits = credits_on_stack;
4542 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4543 list_del(&page->lru);
4544 rdata->pages[rdata->nr_pages++] = page;
4547 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4550 if (rdata->cfile->invalidHandle)
4553 rc = server->ops->async_readv(rdata);
4557 add_credits_and_wake_if(server, &rdata->credits, 0);
4558 for (i = 0; i < rdata->nr_pages; i++) {
4559 page = rdata->pages[i];
4560 lru_cache_add(page);
4564 /* Fallback to the readpage in error/reconnect cases */
4565 kref_put(&rdata->refcount, cifs_readdata_release);
4569 kref_put(&rdata->refcount, cifs_readdata_release);
4572 /* Any pages that have been shown to fscache but didn't get added to
4573 * the pagecache must be uncached before they get returned to the
4576 cifs_fscache_readpages_cancel(mapping->host, page_list);
4582 * cifs_readpage_worker must be called with the page pinned
4584 static int cifs_readpage_worker(struct file *file, struct page *page,
4590 /* Is the page cached? */
4591 rc = cifs_readpage_from_fscache(file_inode(file), page);
4595 read_data = kmap(page);
4596 /* for reads over a certain size could initiate async read ahead */
4598 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4603 cifs_dbg(FYI, "Bytes read %d\n", rc);
4605 /* we do not want atime to be less than mtime, it broke some apps */
4606 file_inode(file)->i_atime = current_time(file_inode(file));
4607 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4608 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4610 file_inode(file)->i_atime = current_time(file_inode(file));
4613 memset(read_data + rc, 0, PAGE_SIZE - rc);
4615 flush_dcache_page(page);
4616 SetPageUptodate(page);
4618 /* send this page to the cache */
4619 cifs_readpage_to_fscache(file_inode(file), page);
4631 static int cifs_readpage(struct file *file, struct page *page)
4633 loff_t offset = page_file_offset(page);
4639 if (file->private_data == NULL) {
4645 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4646 page, (int)offset, (int)offset);
4648 rc = cifs_readpage_worker(file, page, &offset);
4654 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4656 struct cifsFileInfo *open_file;
4658 spin_lock(&cifs_inode->open_file_lock);
4659 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4660 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4661 spin_unlock(&cifs_inode->open_file_lock);
4665 spin_unlock(&cifs_inode->open_file_lock);
4669 /* We do not want to update the file size from server for inodes
4670 open for write - to avoid races with writepage extending
4671 the file - in the future we could consider allowing
4672 refreshing the inode only on increases in the file size
4673 but this is tricky to do without racing with writebehind
4674 page caching in the current Linux kernel design */
4675 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4680 if (is_inode_writable(cifsInode)) {
4681 /* This inode is open for write at least once */
4682 struct cifs_sb_info *cifs_sb;
4684 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4685 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4686 /* since no page cache to corrupt on directio
4687 we can change size safely */
4691 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4699 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4700 loff_t pos, unsigned len, unsigned flags,
4701 struct page **pagep, void **fsdata)
4704 pgoff_t index = pos >> PAGE_SHIFT;
4705 loff_t offset = pos & (PAGE_SIZE - 1);
4706 loff_t page_start = pos & PAGE_MASK;
4711 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4714 page = grab_cache_page_write_begin(mapping, index, flags);
4720 if (PageUptodate(page))
4724 * If we write a full page it will be up to date, no need to read from
4725 * the server. If the write is short, we'll end up doing a sync write
4728 if (len == PAGE_SIZE)
4732 * optimize away the read when we have an oplock, and we're not
4733 * expecting to use any of the data we'd be reading in. That
4734 * is, when the page lies beyond the EOF, or straddles the EOF
4735 * and the write will cover all of the existing data.
4737 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4738 i_size = i_size_read(mapping->host);
4739 if (page_start >= i_size ||
4740 (offset == 0 && (pos + len) >= i_size)) {
4741 zero_user_segments(page, 0, offset,
4745 * PageChecked means that the parts of the page
4746 * to which we're not writing are considered up
4747 * to date. Once the data is copied to the
4748 * page, it can be set uptodate.
4750 SetPageChecked(page);
4755 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4757 * might as well read a page, it is fast enough. If we get
4758 * an error, we don't need to return it. cifs_write_end will
4759 * do a sync write instead since PG_uptodate isn't set.
4761 cifs_readpage_worker(file, page, &page_start);
4766 /* we could try using another file handle if there is one -
4767 but how would we lock it to prevent close of that handle
4768 racing with this read? In any case
4769 this will be written out by write_end so is fine */
4776 static int cifs_release_page(struct page *page, gfp_t gfp)
4778 if (PagePrivate(page))
4781 return cifs_fscache_release_page(page, gfp);
4784 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4785 unsigned int length)
4787 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4789 if (offset == 0 && length == PAGE_SIZE)
4790 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4793 static int cifs_launder_page(struct page *page)
4796 loff_t range_start = page_offset(page);
4797 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4798 struct writeback_control wbc = {
4799 .sync_mode = WB_SYNC_ALL,
4801 .range_start = range_start,
4802 .range_end = range_end,
4805 cifs_dbg(FYI, "Launder page: %p\n", page);
4807 if (clear_page_dirty_for_io(page))
4808 rc = cifs_writepage_locked(page, &wbc);
4810 cifs_fscache_invalidate_page(page, page->mapping->host);
4814 void cifs_oplock_break(struct work_struct *work)
4816 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4818 struct inode *inode = d_inode(cfile->dentry);
4819 struct cifsInodeInfo *cinode = CIFS_I(inode);
4820 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4821 struct TCP_Server_Info *server = tcon->ses->server;
4823 bool purge_cache = false;
4824 bool is_deferred = false;
4825 struct cifs_deferred_close *dclose;
4827 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4828 TASK_UNINTERRUPTIBLE);
4830 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4831 cfile->oplock_epoch, &purge_cache);
4833 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4834 cifs_has_mand_locks(cinode)) {
4835 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4840 if (inode && S_ISREG(inode->i_mode)) {
4841 if (CIFS_CACHE_READ(cinode))
4842 break_lease(inode, O_RDONLY);
4844 break_lease(inode, O_WRONLY);
4845 rc = filemap_fdatawrite(inode->i_mapping);
4846 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4847 rc = filemap_fdatawait(inode->i_mapping);
4848 mapping_set_error(inode->i_mapping, rc);
4849 cifs_zap_mapping(inode);
4851 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4852 if (CIFS_CACHE_WRITE(cinode))
4853 goto oplock_break_ack;
4856 rc = cifs_push_locks(cfile);
4858 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4862 * When oplock break is received and there are no active
4863 * file handles but cached, then schedule deferred close immediately.
4864 * So, new open will not use cached handle.
4866 spin_lock(&CIFS_I(inode)->deferred_lock);
4867 is_deferred = cifs_is_deferred_close(cfile, &dclose);
4868 spin_unlock(&CIFS_I(inode)->deferred_lock);
4870 cfile->deferred_close_scheduled &&
4871 delayed_work_pending(&cfile->deferred)) {
4872 if (cancel_delayed_work(&cfile->deferred)) {
4873 _cifsFileInfo_put(cfile, false, false);
4874 goto oplock_break_done;
4878 * releasing stale oplock after recent reconnect of smb session using
4879 * a now incorrect file handle is not a data integrity issue but do
4880 * not bother sending an oplock release if session to server still is
4881 * disconnected since oplock already released by the server
4883 if (!cfile->oplock_break_cancelled) {
4884 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4886 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4889 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4890 cifs_done_oplock_break(cinode);
4894 * The presence of cifs_direct_io() in the address space ops vector
4895 * allowes open() O_DIRECT flags which would have failed otherwise.
4897 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4898 * so this method should never be called.
4900 * Direct IO is not yet supported in the cached mode.
4903 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4907 * Eventually need to support direct IO for non forcedirectio mounts
4912 static int cifs_swap_activate(struct swap_info_struct *sis,
4913 struct file *swap_file, sector_t *span)
4915 struct cifsFileInfo *cfile = swap_file->private_data;
4916 struct inode *inode = swap_file->f_mapping->host;
4917 unsigned long blocks;
4920 cifs_dbg(FYI, "swap activate\n");
4922 spin_lock(&inode->i_lock);
4923 blocks = inode->i_blocks;
4924 isize = inode->i_size;
4925 spin_unlock(&inode->i_lock);
4926 if (blocks*512 < isize) {
4927 pr_warn("swap activate: swapfile has holes\n");
4932 pr_warn_once("Swap support over SMB3 is experimental\n");
4935 * TODO: consider adding ACL (or documenting how) to prevent other
4936 * users (on this or other systems) from reading it
4940 /* TODO: add sk_set_memalloc(inet) or similar */
4943 cfile->swapfile = true;
4945 * TODO: Since file already open, we can't open with DENY_ALL here
4946 * but we could add call to grab a byte range lock to prevent others
4947 * from reading or writing the file
4953 static void cifs_swap_deactivate(struct file *file)
4955 struct cifsFileInfo *cfile = file->private_data;
4957 cifs_dbg(FYI, "swap deactivate\n");
4959 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4962 cfile->swapfile = false;
4964 /* do we need to unpin (or unlock) the file */
4967 const struct address_space_operations cifs_addr_ops = {
4968 .readpage = cifs_readpage,
4969 .readpages = cifs_readpages,
4970 .writepage = cifs_writepage,
4971 .writepages = cifs_writepages,
4972 .write_begin = cifs_write_begin,
4973 .write_end = cifs_write_end,
4974 .set_page_dirty = __set_page_dirty_nobuffers,
4975 .releasepage = cifs_release_page,
4976 .direct_IO = cifs_direct_io,
4977 .invalidatepage = cifs_invalidate_page,
4978 .launder_page = cifs_launder_page,
4980 * TODO: investigate and if useful we could add an cifs_migratePage
4981 * helper (under an CONFIG_MIGRATION) in the future, and also
4982 * investigate and add an is_dirty_writeback helper if needed
4984 .swap_activate = cifs_swap_activate,
4985 .swap_deactivate = cifs_swap_deactivate,
4989 * cifs_readpages requires the server to support a buffer large enough to
4990 * contain the header plus one complete page of data. Otherwise, we need
4991 * to leave cifs_readpages out of the address space operations.
4993 const struct address_space_operations cifs_addr_ops_smallbuf = {
4994 .readpage = cifs_readpage,
4995 .writepage = cifs_writepage,
4996 .writepages = cifs_writepages,
4997 .write_begin = cifs_write_begin,
4998 .write_end = cifs_write_end,
4999 .set_page_dirty = __set_page_dirty_nobuffers,
5000 .releasepage = cifs_release_page,
5001 .invalidatepage = cifs_invalidate_page,
5002 .launder_page = cifs_launder_page,