1 // SPDX-License-Identifier: LGPL-2.1
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
24 #include <asm/div64.h>
28 #include "cifsproto.h"
29 #include "cifs_unicode.h"
30 #include "cifs_debug.h"
31 #include "cifs_fs_sb.h"
33 #include "smbdirect.h"
34 #include "fs_context.h"
35 #include "cifs_ioctl.h"
37 static inline int cifs_convert_flags(unsigned int flags)
39 if ((flags & O_ACCMODE) == O_RDONLY)
41 else if ((flags & O_ACCMODE) == O_WRONLY)
43 else if ((flags & O_ACCMODE) == O_RDWR) {
44 /* GENERIC_ALL is too much permission to request
45 can cause unnecessary access denied on create */
46 /* return GENERIC_ALL; */
47 return (GENERIC_READ | GENERIC_WRITE);
50 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
51 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
55 static u32 cifs_posix_convert_flags(unsigned int flags)
59 if ((flags & O_ACCMODE) == O_RDONLY)
60 posix_flags = SMB_O_RDONLY;
61 else if ((flags & O_ACCMODE) == O_WRONLY)
62 posix_flags = SMB_O_WRONLY;
63 else if ((flags & O_ACCMODE) == O_RDWR)
64 posix_flags = SMB_O_RDWR;
66 if (flags & O_CREAT) {
67 posix_flags |= SMB_O_CREAT;
69 posix_flags |= SMB_O_EXCL;
70 } else if (flags & O_EXCL)
71 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
72 current->comm, current->tgid);
75 posix_flags |= SMB_O_TRUNC;
76 /* be safe and imply O_SYNC for O_DSYNC */
78 posix_flags |= SMB_O_SYNC;
79 if (flags & O_DIRECTORY)
80 posix_flags |= SMB_O_DIRECTORY;
81 if (flags & O_NOFOLLOW)
82 posix_flags |= SMB_O_NOFOLLOW;
84 posix_flags |= SMB_O_DIRECT;
89 static inline int cifs_get_disposition(unsigned int flags)
91 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
93 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
94 return FILE_OVERWRITE_IF;
95 else if ((flags & O_CREAT) == O_CREAT)
97 else if ((flags & O_TRUNC) == O_TRUNC)
98 return FILE_OVERWRITE;
103 int cifs_posix_open(const char *full_path, struct inode **pinode,
104 struct super_block *sb, int mode, unsigned int f_flags,
105 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
108 FILE_UNIX_BASIC_INFO *presp_data;
109 __u32 posix_flags = 0;
110 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
111 struct cifs_fattr fattr;
112 struct tcon_link *tlink;
113 struct cifs_tcon *tcon;
115 cifs_dbg(FYI, "posix open %s\n", full_path);
117 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
118 if (presp_data == NULL)
121 tlink = cifs_sb_tlink(cifs_sb);
127 tcon = tlink_tcon(tlink);
128 mode &= ~current_umask();
130 posix_flags = cifs_posix_convert_flags(f_flags);
131 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
132 poplock, full_path, cifs_sb->local_nls,
133 cifs_remap(cifs_sb));
134 cifs_put_tlink(tlink);
139 if (presp_data->Type == cpu_to_le32(-1))
140 goto posix_open_ret; /* open ok, caller does qpathinfo */
143 goto posix_open_ret; /* caller does not need info */
145 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
147 /* get new inode and set it up */
148 if (*pinode == NULL) {
149 cifs_fill_uniqueid(sb, &fattr);
150 *pinode = cifs_iget(sb, &fattr);
156 cifs_revalidate_mapping(*pinode);
157 rc = cifs_fattr_to_inode(*pinode, &fattr);
166 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
167 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
168 struct cifs_fid *fid, unsigned int xid)
173 int create_options = CREATE_NOT_DIR;
175 struct TCP_Server_Info *server = tcon->ses->server;
176 struct cifs_open_parms oparms;
178 if (!server->ops->open)
181 desired_access = cifs_convert_flags(f_flags);
183 /*********************************************************************
184 * open flag mapping table:
186 * POSIX Flag CIFS Disposition
187 * ---------- ----------------
188 * O_CREAT FILE_OPEN_IF
189 * O_CREAT | O_EXCL FILE_CREATE
190 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
191 * O_TRUNC FILE_OVERWRITE
192 * none of the above FILE_OPEN
194 * Note that there is not a direct match between disposition
195 * FILE_SUPERSEDE (ie create whether or not file exists although
196 * O_CREAT | O_TRUNC is similar but truncates the existing
197 * file rather than creating a new file as FILE_SUPERSEDE does
198 * (which uses the attributes / metadata passed in on open call)
200 *? O_SYNC is a reasonable match to CIFS writethrough flag
201 *? and the read write flags match reasonably. O_LARGEFILE
202 *? is irrelevant because largefile support is always used
203 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205 *********************************************************************/
207 disposition = cifs_get_disposition(f_flags);
209 /* BB pass O_SYNC flag through on file attributes .. BB */
211 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
215 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
216 if (f_flags & O_SYNC)
217 create_options |= CREATE_WRITE_THROUGH;
219 if (f_flags & O_DIRECT)
220 create_options |= CREATE_NO_BUFFER;
223 oparms.cifs_sb = cifs_sb;
224 oparms.desired_access = desired_access;
225 oparms.create_options = cifs_create_options(cifs_sb, create_options);
226 oparms.disposition = disposition;
227 oparms.path = full_path;
229 oparms.reconnect = false;
231 rc = server->ops->open(xid, &oparms, oplock, buf);
236 /* TODO: Add support for calling posix query info but with passing in fid */
238 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
241 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
245 server->ops->close(xid, tcon, fid);
256 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
258 struct cifs_fid_locks *cur;
259 bool has_locks = false;
261 down_read(&cinode->lock_sem);
262 list_for_each_entry(cur, &cinode->llist, llist) {
263 if (!list_empty(&cur->locks)) {
268 up_read(&cinode->lock_sem);
273 cifs_down_write(struct rw_semaphore *sem)
275 while (!down_write_trylock(sem))
279 static void cifsFileInfo_put_work(struct work_struct *work);
281 struct cifsFileInfo *
282 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
283 struct tcon_link *tlink, __u32 oplock)
285 struct dentry *dentry = file_dentry(file);
286 struct inode *inode = d_inode(dentry);
287 struct cifsInodeInfo *cinode = CIFS_I(inode);
288 struct cifsFileInfo *cfile;
289 struct cifs_fid_locks *fdlocks;
290 struct cifs_tcon *tcon = tlink_tcon(tlink);
291 struct TCP_Server_Info *server = tcon->ses->server;
293 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
297 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
303 INIT_LIST_HEAD(&fdlocks->locks);
304 fdlocks->cfile = cfile;
305 cfile->llist = fdlocks;
308 cfile->pid = current->tgid;
309 cfile->uid = current_fsuid();
310 cfile->dentry = dget(dentry);
311 cfile->f_flags = file->f_flags;
312 cfile->invalidHandle = false;
313 cfile->deferred_close_scheduled = false;
314 cfile->tlink = cifs_get_tlink(tlink);
315 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
316 INIT_WORK(&cfile->put, cifsFileInfo_put_work);
317 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
318 mutex_init(&cfile->fh_mutex);
319 spin_lock_init(&cfile->file_info_lock);
321 cifs_sb_active(inode->i_sb);
324 * If the server returned a read oplock and we have mandatory brlocks,
325 * set oplock level to None.
327 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
328 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
332 cifs_down_write(&cinode->lock_sem);
333 list_add(&fdlocks->llist, &cinode->llist);
334 up_write(&cinode->lock_sem);
336 spin_lock(&tcon->open_file_lock);
337 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
338 oplock = fid->pending_open->oplock;
339 list_del(&fid->pending_open->olist);
341 fid->purge_cache = false;
342 server->ops->set_fid(cfile, fid, oplock);
344 list_add(&cfile->tlist, &tcon->openFileList);
345 atomic_inc(&tcon->num_local_opens);
347 /* if readable file instance put first in list*/
348 spin_lock(&cinode->open_file_lock);
349 if (file->f_mode & FMODE_READ)
350 list_add(&cfile->flist, &cinode->openFileList);
352 list_add_tail(&cfile->flist, &cinode->openFileList);
353 spin_unlock(&cinode->open_file_lock);
354 spin_unlock(&tcon->open_file_lock);
356 if (fid->purge_cache)
357 cifs_zap_mapping(inode);
359 file->private_data = cfile;
363 struct cifsFileInfo *
364 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
366 spin_lock(&cifs_file->file_info_lock);
367 cifsFileInfo_get_locked(cifs_file);
368 spin_unlock(&cifs_file->file_info_lock);
372 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
374 struct inode *inode = d_inode(cifs_file->dentry);
375 struct cifsInodeInfo *cifsi = CIFS_I(inode);
376 struct cifsLockInfo *li, *tmp;
377 struct super_block *sb = inode->i_sb;
380 * Delete any outstanding lock records. We'll lose them when the file
383 cifs_down_write(&cifsi->lock_sem);
384 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
385 list_del(&li->llist);
386 cifs_del_lock_waiters(li);
389 list_del(&cifs_file->llist->llist);
390 kfree(cifs_file->llist);
391 up_write(&cifsi->lock_sem);
393 cifs_put_tlink(cifs_file->tlink);
394 dput(cifs_file->dentry);
395 cifs_sb_deactive(sb);
399 static void cifsFileInfo_put_work(struct work_struct *work)
401 struct cifsFileInfo *cifs_file = container_of(work,
402 struct cifsFileInfo, put);
404 cifsFileInfo_put_final(cifs_file);
408 * cifsFileInfo_put - release a reference of file priv data
410 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
412 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
414 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
416 _cifsFileInfo_put(cifs_file, true, true);
420 * _cifsFileInfo_put - release a reference of file priv data
422 * This may involve closing the filehandle @cifs_file out on the
423 * server. Must be called without holding tcon->open_file_lock,
424 * cinode->open_file_lock and cifs_file->file_info_lock.
426 * If @wait_for_oplock_handler is true and we are releasing the last
427 * reference, wait for any running oplock break handler of the file
428 * and cancel any pending one.
430 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
431 * @wait_oplock_handler: must be false if called from oplock_break_handler
432 * @offload: not offloaded on close and oplock breaks
435 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
436 bool wait_oplock_handler, bool offload)
438 struct inode *inode = d_inode(cifs_file->dentry);
439 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
440 struct TCP_Server_Info *server = tcon->ses->server;
441 struct cifsInodeInfo *cifsi = CIFS_I(inode);
442 struct super_block *sb = inode->i_sb;
443 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
445 struct cifs_pending_open open;
446 bool oplock_break_cancelled;
448 spin_lock(&tcon->open_file_lock);
449 spin_lock(&cifsi->open_file_lock);
450 spin_lock(&cifs_file->file_info_lock);
451 if (--cifs_file->count > 0) {
452 spin_unlock(&cifs_file->file_info_lock);
453 spin_unlock(&cifsi->open_file_lock);
454 spin_unlock(&tcon->open_file_lock);
457 spin_unlock(&cifs_file->file_info_lock);
459 if (server->ops->get_lease_key)
460 server->ops->get_lease_key(inode, &fid);
462 /* store open in pending opens to make sure we don't miss lease break */
463 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
465 /* remove it from the lists */
466 list_del(&cifs_file->flist);
467 list_del(&cifs_file->tlist);
468 atomic_dec(&tcon->num_local_opens);
470 if (list_empty(&cifsi->openFileList)) {
471 cifs_dbg(FYI, "closing last open instance for inode %p\n",
472 d_inode(cifs_file->dentry));
474 * In strict cache mode we need invalidate mapping on the last
475 * close because it may cause a error when we open this file
476 * again and get at least level II oplock.
478 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
479 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
480 cifs_set_oplock_level(cifsi, 0);
483 spin_unlock(&cifsi->open_file_lock);
484 spin_unlock(&tcon->open_file_lock);
486 oplock_break_cancelled = wait_oplock_handler ?
487 cancel_work_sync(&cifs_file->oplock_break) : false;
489 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
490 struct TCP_Server_Info *server = tcon->ses->server;
494 if (server->ops->close_getattr)
495 server->ops->close_getattr(xid, tcon, cifs_file);
496 else if (server->ops->close)
497 server->ops->close(xid, tcon, &cifs_file->fid);
501 if (oplock_break_cancelled)
502 cifs_done_oplock_break(cifsi);
504 cifs_del_pending_open(&open);
507 queue_work(fileinfo_put_wq, &cifs_file->put);
509 cifsFileInfo_put_final(cifs_file);
512 int cifs_open(struct inode *inode, struct file *file)
518 struct cifs_sb_info *cifs_sb;
519 struct TCP_Server_Info *server;
520 struct cifs_tcon *tcon;
521 struct tcon_link *tlink;
522 struct cifsFileInfo *cfile = NULL;
524 const char *full_path;
525 bool posix_open_ok = false;
527 struct cifs_pending_open open;
531 cifs_sb = CIFS_SB(inode->i_sb);
532 if (unlikely(cifs_forced_shutdown(cifs_sb))) {
537 tlink = cifs_sb_tlink(cifs_sb);
540 return PTR_ERR(tlink);
542 tcon = tlink_tcon(tlink);
543 server = tcon->ses->server;
545 page = alloc_dentry_path();
546 full_path = build_path_from_dentry(file_dentry(file), page);
547 if (IS_ERR(full_path)) {
548 rc = PTR_ERR(full_path);
552 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
553 inode, file->f_flags, full_path);
555 if (file->f_flags & O_DIRECT &&
556 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
557 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
558 file->f_op = &cifs_file_direct_nobrl_ops;
560 file->f_op = &cifs_file_direct_ops;
563 /* Get the cached handle as SMB2 close is deferred */
564 rc = cifs_get_readable_path(tcon, full_path, &cfile);
566 if (file->f_flags == cfile->f_flags) {
567 file->private_data = cfile;
568 spin_lock(&CIFS_I(inode)->deferred_lock);
569 cifs_del_deferred_close(cfile);
570 spin_unlock(&CIFS_I(inode)->deferred_lock);
573 _cifsFileInfo_put(cfile, true, false);
582 if (!tcon->broken_posix_open && tcon->unix_ext &&
583 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
584 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
585 /* can not refresh inode info since size could be stale */
586 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
587 cifs_sb->ctx->file_mode /* ignored */,
588 file->f_flags, &oplock, &fid.netfid, xid);
590 cifs_dbg(FYI, "posix open succeeded\n");
591 posix_open_ok = true;
592 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
593 if (tcon->ses->serverNOS)
594 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
596 tcon->ses->serverNOS);
597 tcon->broken_posix_open = true;
598 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
599 (rc != -EOPNOTSUPP)) /* path not found or net err */
602 * Else fallthrough to retry open the old way on network i/o
607 if (server->ops->get_lease_key)
608 server->ops->get_lease_key(inode, &fid);
610 cifs_add_pending_open(&fid, tlink, &open);
612 if (!posix_open_ok) {
613 if (server->ops->get_lease_key)
614 server->ops->get_lease_key(inode, &fid);
616 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
617 file->f_flags, &oplock, &fid, xid);
619 cifs_del_pending_open(&open);
624 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
626 if (server->ops->close)
627 server->ops->close(xid, tcon, &fid);
628 cifs_del_pending_open(&open);
633 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
635 * Time to set mode which we can not set earlier due to
636 * problems creating new read-only files.
638 struct cifs_unix_set_info_args args = {
639 .mode = inode->i_mode,
640 .uid = INVALID_UID, /* no change */
641 .gid = INVALID_GID, /* no change */
642 .ctime = NO_CHANGE_64,
643 .atime = NO_CHANGE_64,
644 .mtime = NO_CHANGE_64,
647 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
652 fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
653 file->f_mode & FMODE_WRITE);
654 if (file->f_flags & O_DIRECT &&
655 (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
656 file->f_flags & O_APPEND))
657 cifs_invalidate_cache(file_inode(file),
658 FSCACHE_INVAL_DIO_WRITE);
661 free_dentry_path(page);
663 cifs_put_tlink(tlink);
667 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
670 * Try to reacquire byte range locks that were released when session
671 * to server was lost.
674 cifs_relock_file(struct cifsFileInfo *cfile)
676 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
677 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
678 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
681 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
682 if (cinode->can_cache_brlcks) {
683 /* can cache locks - no need to relock */
684 up_read(&cinode->lock_sem);
688 if (cap_unix(tcon->ses) &&
689 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
690 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
691 rc = cifs_push_posix_locks(cfile);
693 rc = tcon->ses->server->ops->push_mand_locks(cfile);
695 up_read(&cinode->lock_sem);
700 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
705 struct cifs_sb_info *cifs_sb;
706 struct cifs_tcon *tcon;
707 struct TCP_Server_Info *server;
708 struct cifsInodeInfo *cinode;
711 const char *full_path;
713 int disposition = FILE_OPEN;
714 int create_options = CREATE_NOT_DIR;
715 struct cifs_open_parms oparms;
718 mutex_lock(&cfile->fh_mutex);
719 if (!cfile->invalidHandle) {
720 mutex_unlock(&cfile->fh_mutex);
725 inode = d_inode(cfile->dentry);
726 cifs_sb = CIFS_SB(inode->i_sb);
727 tcon = tlink_tcon(cfile->tlink);
728 server = tcon->ses->server;
731 * Can not grab rename sem here because various ops, including those
732 * that already have the rename sem can end up causing writepage to get
733 * called and if the server was down that means we end up here, and we
734 * can never tell if the caller already has the rename_sem.
736 page = alloc_dentry_path();
737 full_path = build_path_from_dentry(cfile->dentry, page);
738 if (IS_ERR(full_path)) {
739 mutex_unlock(&cfile->fh_mutex);
740 free_dentry_path(page);
742 return PTR_ERR(full_path);
745 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
746 inode, cfile->f_flags, full_path);
748 if (tcon->ses->server->oplocks)
753 if (tcon->unix_ext && cap_unix(tcon->ses) &&
754 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
755 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
757 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
758 * original open. Must mask them off for a reopen.
760 unsigned int oflags = cfile->f_flags &
761 ~(O_CREAT | O_EXCL | O_TRUNC);
763 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
764 cifs_sb->ctx->file_mode /* ignored */,
765 oflags, &oplock, &cfile->fid.netfid, xid);
767 cifs_dbg(FYI, "posix reopen succeeded\n");
768 oparms.reconnect = true;
772 * fallthrough to retry open the old way on errors, especially
773 * in the reconnect path it is important to retry hard
777 desired_access = cifs_convert_flags(cfile->f_flags);
779 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
780 if (cfile->f_flags & O_SYNC)
781 create_options |= CREATE_WRITE_THROUGH;
783 if (cfile->f_flags & O_DIRECT)
784 create_options |= CREATE_NO_BUFFER;
786 if (server->ops->get_lease_key)
787 server->ops->get_lease_key(inode, &cfile->fid);
790 oparms.cifs_sb = cifs_sb;
791 oparms.desired_access = desired_access;
792 oparms.create_options = cifs_create_options(cifs_sb, create_options);
793 oparms.disposition = disposition;
794 oparms.path = full_path;
795 oparms.fid = &cfile->fid;
796 oparms.reconnect = true;
799 * Can not refresh inode by passing in file_info buf to be returned by
800 * ops->open and then calling get_inode_info with returned buf since
801 * file might have write behind data that needs to be flushed and server
802 * version of file size can be stale. If we knew for sure that inode was
803 * not dirty locally we could do this.
805 rc = server->ops->open(xid, &oparms, &oplock, NULL);
806 if (rc == -ENOENT && oparms.reconnect == false) {
807 /* durable handle timeout is expired - open the file again */
808 rc = server->ops->open(xid, &oparms, &oplock, NULL);
809 /* indicate that we need to relock the file */
810 oparms.reconnect = true;
814 mutex_unlock(&cfile->fh_mutex);
815 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
816 cifs_dbg(FYI, "oplock: %d\n", oplock);
817 goto reopen_error_exit;
821 cfile->invalidHandle = false;
822 mutex_unlock(&cfile->fh_mutex);
823 cinode = CIFS_I(inode);
826 rc = filemap_write_and_wait(inode->i_mapping);
827 if (!is_interrupt_error(rc))
828 mapping_set_error(inode->i_mapping, rc);
830 if (tcon->posix_extensions)
831 rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
832 else if (tcon->unix_ext)
833 rc = cifs_get_inode_info_unix(&inode, full_path,
836 rc = cifs_get_inode_info(&inode, full_path, NULL,
837 inode->i_sb, xid, NULL);
840 * Else we are writing out data to server already and could deadlock if
841 * we tried to flush data, and since we do not know if we have data that
842 * would invalidate the current end of file on the server we can not go
843 * to the server to get the new inode info.
847 * If the server returned a read oplock and we have mandatory brlocks,
848 * set oplock level to None.
850 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
851 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
855 server->ops->set_fid(cfile, &cfile->fid, oplock);
856 if (oparms.reconnect)
857 cifs_relock_file(cfile);
860 free_dentry_path(page);
865 void smb2_deferred_work_close(struct work_struct *work)
867 struct cifsFileInfo *cfile = container_of(work,
868 struct cifsFileInfo, deferred.work);
870 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
871 cifs_del_deferred_close(cfile);
872 cfile->deferred_close_scheduled = false;
873 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
874 _cifsFileInfo_put(cfile, true, false);
877 int cifs_close(struct inode *inode, struct file *file)
879 struct cifsFileInfo *cfile;
880 struct cifsInodeInfo *cinode = CIFS_I(inode);
881 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
882 struct cifs_deferred_close *dclose;
884 cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
886 if (file->private_data != NULL) {
887 cfile = file->private_data;
888 file->private_data = NULL;
889 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
890 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
891 cinode->lease_granted &&
892 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
894 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
895 inode->i_ctime = inode->i_mtime = current_time(inode);
897 spin_lock(&cinode->deferred_lock);
898 cifs_add_deferred_close(cfile, dclose);
899 if (cfile->deferred_close_scheduled &&
900 delayed_work_pending(&cfile->deferred)) {
902 * If there is no pending work, mod_delayed_work queues new work.
903 * So, Increase the ref count to avoid use-after-free.
905 if (!mod_delayed_work(deferredclose_wq,
906 &cfile->deferred, cifs_sb->ctx->acregmax))
907 cifsFileInfo_get(cfile);
909 /* Deferred close for files */
910 queue_delayed_work(deferredclose_wq,
911 &cfile->deferred, cifs_sb->ctx->acregmax);
912 cfile->deferred_close_scheduled = true;
913 spin_unlock(&cinode->deferred_lock);
916 spin_unlock(&cinode->deferred_lock);
917 _cifsFileInfo_put(cfile, true, false);
919 _cifsFileInfo_put(cfile, true, false);
924 /* return code from the ->release op is always ignored */
929 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
931 struct cifsFileInfo *open_file;
932 struct list_head *tmp;
933 struct list_head *tmp1;
934 struct list_head tmp_list;
936 if (!tcon->use_persistent || !tcon->need_reopen_files)
939 tcon->need_reopen_files = false;
941 cifs_dbg(FYI, "Reopen persistent handles\n");
942 INIT_LIST_HEAD(&tmp_list);
944 /* list all files open on tree connection, reopen resilient handles */
945 spin_lock(&tcon->open_file_lock);
946 list_for_each(tmp, &tcon->openFileList) {
947 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
948 if (!open_file->invalidHandle)
950 cifsFileInfo_get(open_file);
951 list_add_tail(&open_file->rlist, &tmp_list);
953 spin_unlock(&tcon->open_file_lock);
955 list_for_each_safe(tmp, tmp1, &tmp_list) {
956 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
957 if (cifs_reopen_file(open_file, false /* do not flush */))
958 tcon->need_reopen_files = true;
959 list_del_init(&open_file->rlist);
960 cifsFileInfo_put(open_file);
964 int cifs_closedir(struct inode *inode, struct file *file)
968 struct cifsFileInfo *cfile = file->private_data;
969 struct cifs_tcon *tcon;
970 struct TCP_Server_Info *server;
973 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
979 tcon = tlink_tcon(cfile->tlink);
980 server = tcon->ses->server;
982 cifs_dbg(FYI, "Freeing private data in close dir\n");
983 spin_lock(&cfile->file_info_lock);
984 if (server->ops->dir_needs_close(cfile)) {
985 cfile->invalidHandle = true;
986 spin_unlock(&cfile->file_info_lock);
987 if (server->ops->close_dir)
988 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
991 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
992 /* not much we can do if it fails anyway, ignore rc */
995 spin_unlock(&cfile->file_info_lock);
997 buf = cfile->srch_inf.ntwrk_buf_start;
999 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1000 cfile->srch_inf.ntwrk_buf_start = NULL;
1001 if (cfile->srch_inf.smallBuf)
1002 cifs_small_buf_release(buf);
1004 cifs_buf_release(buf);
1007 cifs_put_tlink(cfile->tlink);
1008 kfree(file->private_data);
1009 file->private_data = NULL;
1010 /* BB can we lock the filestruct while this is going on? */
1015 static struct cifsLockInfo *
1016 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1018 struct cifsLockInfo *lock =
1019 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1022 lock->offset = offset;
1023 lock->length = length;
1025 lock->pid = current->tgid;
1026 lock->flags = flags;
1027 INIT_LIST_HEAD(&lock->blist);
1028 init_waitqueue_head(&lock->block_q);
1033 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1035 struct cifsLockInfo *li, *tmp;
1036 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1037 list_del_init(&li->blist);
1038 wake_up(&li->block_q);
1042 #define CIFS_LOCK_OP 0
1043 #define CIFS_READ_OP 1
1044 #define CIFS_WRITE_OP 2
1046 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1048 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1049 __u64 length, __u8 type, __u16 flags,
1050 struct cifsFileInfo *cfile,
1051 struct cifsLockInfo **conf_lock, int rw_check)
1053 struct cifsLockInfo *li;
1054 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1055 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1057 list_for_each_entry(li, &fdlocks->locks, llist) {
1058 if (offset + length <= li->offset ||
1059 offset >= li->offset + li->length)
1061 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1062 server->ops->compare_fids(cfile, cur_cfile)) {
1063 /* shared lock prevents write op through the same fid */
1064 if (!(li->type & server->vals->shared_lock_type) ||
1065 rw_check != CIFS_WRITE_OP)
1068 if ((type & server->vals->shared_lock_type) &&
1069 ((server->ops->compare_fids(cfile, cur_cfile) &&
1070 current->tgid == li->pid) || type == li->type))
1072 if (rw_check == CIFS_LOCK_OP &&
1073 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1074 server->ops->compare_fids(cfile, cur_cfile))
1084 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1085 __u8 type, __u16 flags,
1086 struct cifsLockInfo **conf_lock, int rw_check)
1089 struct cifs_fid_locks *cur;
1090 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1092 list_for_each_entry(cur, &cinode->llist, llist) {
1093 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1094 flags, cfile, conf_lock,
1104 * Check if there is another lock that prevents us to set the lock (mandatory
1105 * style). If such a lock exists, update the flock structure with its
1106 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1107 * or leave it the same if we can't. Returns 0 if we don't need to request to
1108 * the server or 1 otherwise.
1111 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1112 __u8 type, struct file_lock *flock)
1115 struct cifsLockInfo *conf_lock;
1116 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1117 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1120 down_read(&cinode->lock_sem);
1122 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1123 flock->fl_flags, &conf_lock,
1126 flock->fl_start = conf_lock->offset;
1127 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1128 flock->fl_pid = conf_lock->pid;
1129 if (conf_lock->type & server->vals->shared_lock_type)
1130 flock->fl_type = F_RDLCK;
1132 flock->fl_type = F_WRLCK;
1133 } else if (!cinode->can_cache_brlcks)
1136 flock->fl_type = F_UNLCK;
1138 up_read(&cinode->lock_sem);
1143 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1145 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1146 cifs_down_write(&cinode->lock_sem);
1147 list_add_tail(&lock->llist, &cfile->llist->locks);
1148 up_write(&cinode->lock_sem);
1152 * Set the byte-range lock (mandatory style). Returns:
1153 * 1) 0, if we set the lock and don't need to request to the server;
1154 * 2) 1, if no locks prevent us but we need to request to the server;
1155 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1158 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1161 struct cifsLockInfo *conf_lock;
1162 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1168 cifs_down_write(&cinode->lock_sem);
1170 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1171 lock->type, lock->flags, &conf_lock,
1173 if (!exist && cinode->can_cache_brlcks) {
1174 list_add_tail(&lock->llist, &cfile->llist->locks);
1175 up_write(&cinode->lock_sem);
1184 list_add_tail(&lock->blist, &conf_lock->blist);
1185 up_write(&cinode->lock_sem);
1186 rc = wait_event_interruptible(lock->block_q,
1187 (lock->blist.prev == &lock->blist) &&
1188 (lock->blist.next == &lock->blist));
1191 cifs_down_write(&cinode->lock_sem);
1192 list_del_init(&lock->blist);
1195 up_write(&cinode->lock_sem);
1200 * Check if there is another lock that prevents us to set the lock (posix
1201 * style). If such a lock exists, update the flock structure with its
1202 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1203 * or leave it the same if we can't. Returns 0 if we don't need to request to
1204 * the server or 1 otherwise.
1207 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1210 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1211 unsigned char saved_type = flock->fl_type;
1213 if ((flock->fl_flags & FL_POSIX) == 0)
1216 down_read(&cinode->lock_sem);
1217 posix_test_lock(file, flock);
1219 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1220 flock->fl_type = saved_type;
1224 up_read(&cinode->lock_sem);
1229 * Set the byte-range lock (posix style). Returns:
1230 * 1) <0, if the error occurs while setting the lock;
1231 * 2) 0, if we set the lock and don't need to request to the server;
1232 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1233 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1236 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1238 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1239 int rc = FILE_LOCK_DEFERRED + 1;
1241 if ((flock->fl_flags & FL_POSIX) == 0)
1244 cifs_down_write(&cinode->lock_sem);
1245 if (!cinode->can_cache_brlcks) {
1246 up_write(&cinode->lock_sem);
1250 rc = posix_lock_file(file, flock, NULL);
1251 up_write(&cinode->lock_sem);
1256 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1259 int rc = 0, stored_rc;
1260 struct cifsLockInfo *li, *tmp;
1261 struct cifs_tcon *tcon;
1262 unsigned int num, max_num, max_buf;
1263 LOCKING_ANDX_RANGE *buf, *cur;
1264 static const int types[] = {
1265 LOCKING_ANDX_LARGE_FILES,
1266 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1271 tcon = tlink_tcon(cfile->tlink);
1274 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1275 * and check it before using.
1277 max_buf = tcon->ses->server->maxBuf;
1278 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1283 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1285 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1287 max_num = (max_buf - sizeof(struct smb_hdr)) /
1288 sizeof(LOCKING_ANDX_RANGE);
1289 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1295 for (i = 0; i < 2; i++) {
1298 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1299 if (li->type != types[i])
1301 cur->Pid = cpu_to_le16(li->pid);
1302 cur->LengthLow = cpu_to_le32((u32)li->length);
1303 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1304 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1305 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1306 if (++num == max_num) {
1307 stored_rc = cifs_lockv(xid, tcon,
1309 (__u8)li->type, 0, num,
1320 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1321 (__u8)types[i], 0, num, buf);
1333 hash_lockowner(fl_owner_t owner)
1335 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1338 struct lock_to_push {
1339 struct list_head llist;
1348 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1350 struct inode *inode = d_inode(cfile->dentry);
1351 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1352 struct file_lock *flock;
1353 struct file_lock_context *flctx = inode->i_flctx;
1354 unsigned int count = 0, i;
1355 int rc = 0, xid, type;
1356 struct list_head locks_to_send, *el;
1357 struct lock_to_push *lck, *tmp;
1365 spin_lock(&flctx->flc_lock);
1366 list_for_each(el, &flctx->flc_posix) {
1369 spin_unlock(&flctx->flc_lock);
1371 INIT_LIST_HEAD(&locks_to_send);
1374 * Allocating count locks is enough because no FL_POSIX locks can be
1375 * added to the list while we are holding cinode->lock_sem that
1376 * protects locking operations of this inode.
1378 for (i = 0; i < count; i++) {
1379 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1384 list_add_tail(&lck->llist, &locks_to_send);
1387 el = locks_to_send.next;
1388 spin_lock(&flctx->flc_lock);
1389 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1390 if (el == &locks_to_send) {
1392 * The list ended. We don't have enough allocated
1393 * structures - something is really wrong.
1395 cifs_dbg(VFS, "Can't push all brlocks!\n");
1398 length = 1 + flock->fl_end - flock->fl_start;
1399 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1403 lck = list_entry(el, struct lock_to_push, llist);
1404 lck->pid = hash_lockowner(flock->fl_owner);
1405 lck->netfid = cfile->fid.netfid;
1406 lck->length = length;
1408 lck->offset = flock->fl_start;
1410 spin_unlock(&flctx->flc_lock);
1412 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1415 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1416 lck->offset, lck->length, NULL,
1420 list_del(&lck->llist);
1428 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1429 list_del(&lck->llist);
1436 cifs_push_locks(struct cifsFileInfo *cfile)
1438 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1439 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1440 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1443 /* we are going to update can_cache_brlcks here - need a write access */
1444 cifs_down_write(&cinode->lock_sem);
1445 if (!cinode->can_cache_brlcks) {
1446 up_write(&cinode->lock_sem);
1450 if (cap_unix(tcon->ses) &&
1451 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1452 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1453 rc = cifs_push_posix_locks(cfile);
1455 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1457 cinode->can_cache_brlcks = false;
1458 up_write(&cinode->lock_sem);
1463 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1464 bool *wait_flag, struct TCP_Server_Info *server)
1466 if (flock->fl_flags & FL_POSIX)
1467 cifs_dbg(FYI, "Posix\n");
1468 if (flock->fl_flags & FL_FLOCK)
1469 cifs_dbg(FYI, "Flock\n");
1470 if (flock->fl_flags & FL_SLEEP) {
1471 cifs_dbg(FYI, "Blocking lock\n");
1474 if (flock->fl_flags & FL_ACCESS)
1475 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1476 if (flock->fl_flags & FL_LEASE)
1477 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1478 if (flock->fl_flags &
1479 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1480 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1481 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1483 *type = server->vals->large_lock_type;
1484 if (flock->fl_type == F_WRLCK) {
1485 cifs_dbg(FYI, "F_WRLCK\n");
1486 *type |= server->vals->exclusive_lock_type;
1488 } else if (flock->fl_type == F_UNLCK) {
1489 cifs_dbg(FYI, "F_UNLCK\n");
1490 *type |= server->vals->unlock_lock_type;
1492 /* Check if unlock includes more than one lock range */
1493 } else if (flock->fl_type == F_RDLCK) {
1494 cifs_dbg(FYI, "F_RDLCK\n");
1495 *type |= server->vals->shared_lock_type;
1497 } else if (flock->fl_type == F_EXLCK) {
1498 cifs_dbg(FYI, "F_EXLCK\n");
1499 *type |= server->vals->exclusive_lock_type;
1501 } else if (flock->fl_type == F_SHLCK) {
1502 cifs_dbg(FYI, "F_SHLCK\n");
1503 *type |= server->vals->shared_lock_type;
1506 cifs_dbg(FYI, "Unknown type of lock\n");
1510 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1511 bool wait_flag, bool posix_lck, unsigned int xid)
1514 __u64 length = 1 + flock->fl_end - flock->fl_start;
1515 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1516 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1517 struct TCP_Server_Info *server = tcon->ses->server;
1518 __u16 netfid = cfile->fid.netfid;
1521 int posix_lock_type;
1523 rc = cifs_posix_lock_test(file, flock);
1527 if (type & server->vals->shared_lock_type)
1528 posix_lock_type = CIFS_RDLCK;
1530 posix_lock_type = CIFS_WRLCK;
1531 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1532 hash_lockowner(flock->fl_owner),
1533 flock->fl_start, length, flock,
1534 posix_lock_type, wait_flag);
1538 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1542 /* BB we could chain these into one lock request BB */
1543 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1546 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1548 flock->fl_type = F_UNLCK;
1550 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1555 if (type & server->vals->shared_lock_type) {
1556 flock->fl_type = F_WRLCK;
1560 type &= ~server->vals->exclusive_lock_type;
1562 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1563 type | server->vals->shared_lock_type,
1566 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1567 type | server->vals->shared_lock_type, 0, 1, false);
1568 flock->fl_type = F_RDLCK;
1570 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1573 flock->fl_type = F_WRLCK;
1579 cifs_move_llist(struct list_head *source, struct list_head *dest)
1581 struct list_head *li, *tmp;
1582 list_for_each_safe(li, tmp, source)
1583 list_move(li, dest);
1587 cifs_free_llist(struct list_head *llist)
1589 struct cifsLockInfo *li, *tmp;
1590 list_for_each_entry_safe(li, tmp, llist, llist) {
1591 cifs_del_lock_waiters(li);
1592 list_del(&li->llist);
1598 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1601 int rc = 0, stored_rc;
1602 static const int types[] = {
1603 LOCKING_ANDX_LARGE_FILES,
1604 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1607 unsigned int max_num, num, max_buf;
1608 LOCKING_ANDX_RANGE *buf, *cur;
1609 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1610 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1611 struct cifsLockInfo *li, *tmp;
1612 __u64 length = 1 + flock->fl_end - flock->fl_start;
1613 struct list_head tmp_llist;
1615 INIT_LIST_HEAD(&tmp_llist);
1618 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1619 * and check it before using.
1621 max_buf = tcon->ses->server->maxBuf;
1622 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1625 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1627 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1629 max_num = (max_buf - sizeof(struct smb_hdr)) /
1630 sizeof(LOCKING_ANDX_RANGE);
1631 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1635 cifs_down_write(&cinode->lock_sem);
1636 for (i = 0; i < 2; i++) {
1639 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1640 if (flock->fl_start > li->offset ||
1641 (flock->fl_start + length) <
1642 (li->offset + li->length))
1644 if (current->tgid != li->pid)
1646 if (types[i] != li->type)
1648 if (cinode->can_cache_brlcks) {
1650 * We can cache brlock requests - simply remove
1651 * a lock from the file's list.
1653 list_del(&li->llist);
1654 cifs_del_lock_waiters(li);
1658 cur->Pid = cpu_to_le16(li->pid);
1659 cur->LengthLow = cpu_to_le32((u32)li->length);
1660 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1661 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1662 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1664 * We need to save a lock here to let us add it again to
1665 * the file's list if the unlock range request fails on
1668 list_move(&li->llist, &tmp_llist);
1669 if (++num == max_num) {
1670 stored_rc = cifs_lockv(xid, tcon,
1672 li->type, num, 0, buf);
1675 * We failed on the unlock range
1676 * request - add all locks from the tmp
1677 * list to the head of the file's list.
1679 cifs_move_llist(&tmp_llist,
1680 &cfile->llist->locks);
1684 * The unlock range request succeed -
1685 * free the tmp list.
1687 cifs_free_llist(&tmp_llist);
1694 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1695 types[i], num, 0, buf);
1697 cifs_move_llist(&tmp_llist,
1698 &cfile->llist->locks);
1701 cifs_free_llist(&tmp_llist);
1705 up_write(&cinode->lock_sem);
1711 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1712 bool wait_flag, bool posix_lck, int lock, int unlock,
1716 __u64 length = 1 + flock->fl_end - flock->fl_start;
1717 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1718 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1719 struct TCP_Server_Info *server = tcon->ses->server;
1720 struct inode *inode = d_inode(cfile->dentry);
1723 int posix_lock_type;
1725 rc = cifs_posix_lock_set(file, flock);
1726 if (rc <= FILE_LOCK_DEFERRED)
1729 if (type & server->vals->shared_lock_type)
1730 posix_lock_type = CIFS_RDLCK;
1732 posix_lock_type = CIFS_WRLCK;
1735 posix_lock_type = CIFS_UNLCK;
1737 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1738 hash_lockowner(flock->fl_owner),
1739 flock->fl_start, length,
1740 NULL, posix_lock_type, wait_flag);
1745 struct cifsLockInfo *lock;
1747 lock = cifs_lock_init(flock->fl_start, length, type,
1752 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1761 * Windows 7 server can delay breaking lease from read to None
1762 * if we set a byte-range lock on a file - break it explicitly
1763 * before sending the lock to the server to be sure the next
1764 * read won't conflict with non-overlapted locks due to
1767 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1768 CIFS_CACHE_READ(CIFS_I(inode))) {
1769 cifs_zap_mapping(inode);
1770 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1772 CIFS_I(inode)->oplock = 0;
1775 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1776 type, 1, 0, wait_flag);
1782 cifs_lock_add(cfile, lock);
1784 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1787 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1789 * If this is a request to remove all locks because we
1790 * are closing the file, it doesn't matter if the
1791 * unlocking failed as both cifs.ko and the SMB server
1792 * remove the lock on file close
1795 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1796 if (!(flock->fl_flags & FL_CLOSE))
1799 rc = locks_lock_file_wait(file, flock);
1804 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1807 int lock = 0, unlock = 0;
1808 bool wait_flag = false;
1809 bool posix_lck = false;
1810 struct cifs_sb_info *cifs_sb;
1811 struct cifs_tcon *tcon;
1812 struct cifsFileInfo *cfile;
1818 if (!(fl->fl_flags & FL_FLOCK))
1821 cfile = (struct cifsFileInfo *)file->private_data;
1822 tcon = tlink_tcon(cfile->tlink);
1824 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1826 cifs_sb = CIFS_FILE_SB(file);
1828 if (cap_unix(tcon->ses) &&
1829 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1830 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1833 if (!lock && !unlock) {
1835 * if no lock or unlock then nothing to do since we do not
1842 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1850 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1853 int lock = 0, unlock = 0;
1854 bool wait_flag = false;
1855 bool posix_lck = false;
1856 struct cifs_sb_info *cifs_sb;
1857 struct cifs_tcon *tcon;
1858 struct cifsFileInfo *cfile;
1864 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1865 cmd, flock->fl_flags, flock->fl_type,
1866 flock->fl_start, flock->fl_end);
1868 cfile = (struct cifsFileInfo *)file->private_data;
1869 tcon = tlink_tcon(cfile->tlink);
1871 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1873 cifs_sb = CIFS_FILE_SB(file);
1874 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1876 if (cap_unix(tcon->ses) &&
1877 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1878 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1881 * BB add code here to normalize offset and length to account for
1882 * negative length which we can not accept over the wire.
1884 if (IS_GETLK(cmd)) {
1885 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1890 if (!lock && !unlock) {
1892 * if no lock or unlock then nothing to do since we do not
1899 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1906 * update the file size (if needed) after a write. Should be called with
1907 * the inode->i_lock held
1910 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1911 unsigned int bytes_written)
1913 loff_t end_of_write = offset + bytes_written;
1915 if (end_of_write > cifsi->server_eof)
1916 cifsi->server_eof = end_of_write;
1920 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1921 size_t write_size, loff_t *offset)
1924 unsigned int bytes_written = 0;
1925 unsigned int total_written;
1926 struct cifs_tcon *tcon;
1927 struct TCP_Server_Info *server;
1929 struct dentry *dentry = open_file->dentry;
1930 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1931 struct cifs_io_parms io_parms = {0};
1933 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1934 write_size, *offset, dentry);
1936 tcon = tlink_tcon(open_file->tlink);
1937 server = tcon->ses->server;
1939 if (!server->ops->sync_write)
1944 for (total_written = 0; write_size > total_written;
1945 total_written += bytes_written) {
1947 while (rc == -EAGAIN) {
1951 if (open_file->invalidHandle) {
1952 /* we could deadlock if we called
1953 filemap_fdatawait from here so tell
1954 reopen_file not to flush data to
1956 rc = cifs_reopen_file(open_file, false);
1961 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1962 (unsigned int)write_size - total_written);
1963 /* iov[0] is reserved for smb header */
1964 iov[1].iov_base = (char *)write_data + total_written;
1965 iov[1].iov_len = len;
1967 io_parms.tcon = tcon;
1968 io_parms.offset = *offset;
1969 io_parms.length = len;
1970 rc = server->ops->sync_write(xid, &open_file->fid,
1971 &io_parms, &bytes_written, iov, 1);
1973 if (rc || (bytes_written == 0)) {
1981 spin_lock(&d_inode(dentry)->i_lock);
1982 cifs_update_eof(cifsi, *offset, bytes_written);
1983 spin_unlock(&d_inode(dentry)->i_lock);
1984 *offset += bytes_written;
1988 cifs_stats_bytes_written(tcon, total_written);
1990 if (total_written > 0) {
1991 spin_lock(&d_inode(dentry)->i_lock);
1992 if (*offset > d_inode(dentry)->i_size) {
1993 i_size_write(d_inode(dentry), *offset);
1994 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1996 spin_unlock(&d_inode(dentry)->i_lock);
1998 mark_inode_dirty_sync(d_inode(dentry));
2000 return total_written;
2003 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2006 struct cifsFileInfo *open_file = NULL;
2007 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2009 /* only filter by fsuid on multiuser mounts */
2010 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2013 spin_lock(&cifs_inode->open_file_lock);
2014 /* we could simply get the first_list_entry since write-only entries
2015 are always at the end of the list but since the first entry might
2016 have a close pending, we go through the whole list */
2017 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2018 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2020 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2021 if ((!open_file->invalidHandle)) {
2022 /* found a good file */
2023 /* lock it so it will not be closed on us */
2024 cifsFileInfo_get(open_file);
2025 spin_unlock(&cifs_inode->open_file_lock);
2027 } /* else might as well continue, and look for
2028 another, or simply have the caller reopen it
2029 again rather than trying to fix this handle */
2030 } else /* write only file */
2031 break; /* write only files are last so must be done */
2033 spin_unlock(&cifs_inode->open_file_lock);
2037 /* Return -EBADF if no handle is found and general rc otherwise */
2039 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2040 struct cifsFileInfo **ret_file)
2042 struct cifsFileInfo *open_file, *inv_file = NULL;
2043 struct cifs_sb_info *cifs_sb;
2044 bool any_available = false;
2046 unsigned int refind = 0;
2047 bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2048 bool with_delete = flags & FIND_WR_WITH_DELETE;
2052 * Having a null inode here (because mapping->host was set to zero by
2053 * the VFS or MM) should not happen but we had reports of on oops (due
2054 * to it being zero) during stress testcases so we need to check for it
2057 if (cifs_inode == NULL) {
2058 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2063 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2065 /* only filter by fsuid on multiuser mounts */
2066 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2069 spin_lock(&cifs_inode->open_file_lock);
2071 if (refind > MAX_REOPEN_ATT) {
2072 spin_unlock(&cifs_inode->open_file_lock);
2075 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2076 if (!any_available && open_file->pid != current->tgid)
2078 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2080 if (with_delete && !(open_file->fid.access & DELETE))
2082 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2083 if (!open_file->invalidHandle) {
2084 /* found a good writable file */
2085 cifsFileInfo_get(open_file);
2086 spin_unlock(&cifs_inode->open_file_lock);
2087 *ret_file = open_file;
2091 inv_file = open_file;
2095 /* couldn't find useable FH with same pid, try any available */
2096 if (!any_available) {
2097 any_available = true;
2098 goto refind_writable;
2102 any_available = false;
2103 cifsFileInfo_get(inv_file);
2106 spin_unlock(&cifs_inode->open_file_lock);
2109 rc = cifs_reopen_file(inv_file, false);
2111 *ret_file = inv_file;
2115 spin_lock(&cifs_inode->open_file_lock);
2116 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2117 spin_unlock(&cifs_inode->open_file_lock);
2118 cifsFileInfo_put(inv_file);
2121 spin_lock(&cifs_inode->open_file_lock);
2122 goto refind_writable;
2128 struct cifsFileInfo *
2129 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2131 struct cifsFileInfo *cfile;
2134 rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2136 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2142 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2144 struct cifsFileInfo **ret_file)
2146 struct cifsFileInfo *cfile;
2147 void *page = alloc_dentry_path();
2151 spin_lock(&tcon->open_file_lock);
2152 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2153 struct cifsInodeInfo *cinode;
2154 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2155 if (IS_ERR(full_path)) {
2156 spin_unlock(&tcon->open_file_lock);
2157 free_dentry_path(page);
2158 return PTR_ERR(full_path);
2160 if (strcmp(full_path, name))
2163 cinode = CIFS_I(d_inode(cfile->dentry));
2164 spin_unlock(&tcon->open_file_lock);
2165 free_dentry_path(page);
2166 return cifs_get_writable_file(cinode, flags, ret_file);
2169 spin_unlock(&tcon->open_file_lock);
2170 free_dentry_path(page);
2175 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2176 struct cifsFileInfo **ret_file)
2178 struct cifsFileInfo *cfile;
2179 void *page = alloc_dentry_path();
2183 spin_lock(&tcon->open_file_lock);
2184 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2185 struct cifsInodeInfo *cinode;
2186 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2187 if (IS_ERR(full_path)) {
2188 spin_unlock(&tcon->open_file_lock);
2189 free_dentry_path(page);
2190 return PTR_ERR(full_path);
2192 if (strcmp(full_path, name))
2195 cinode = CIFS_I(d_inode(cfile->dentry));
2196 spin_unlock(&tcon->open_file_lock);
2197 free_dentry_path(page);
2198 *ret_file = find_readable_file(cinode, 0);
2199 return *ret_file ? 0 : -ENOENT;
2202 spin_unlock(&tcon->open_file_lock);
2203 free_dentry_path(page);
2207 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2209 struct address_space *mapping = page->mapping;
2210 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2213 int bytes_written = 0;
2214 struct inode *inode;
2215 struct cifsFileInfo *open_file;
2217 if (!mapping || !mapping->host)
2220 inode = page->mapping->host;
2222 offset += (loff_t)from;
2223 write_data = kmap(page);
2226 if ((to > PAGE_SIZE) || (from > to)) {
2231 /* racing with truncate? */
2232 if (offset > mapping->host->i_size) {
2234 return 0; /* don't care */
2237 /* check to make sure that we are not extending the file */
2238 if (mapping->host->i_size - offset < (loff_t)to)
2239 to = (unsigned)(mapping->host->i_size - offset);
2241 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2244 bytes_written = cifs_write(open_file, open_file->pid,
2245 write_data, to - from, &offset);
2246 cifsFileInfo_put(open_file);
2247 /* Does mm or vfs already set times? */
2248 inode->i_atime = inode->i_mtime = current_time(inode);
2249 if ((bytes_written > 0) && (offset))
2251 else if (bytes_written < 0)
2256 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2257 if (!is_retryable_error(rc))
2265 static struct cifs_writedata *
2266 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2267 pgoff_t end, pgoff_t *index,
2268 unsigned int *found_pages)
2270 struct cifs_writedata *wdata;
2272 wdata = cifs_writedata_alloc((unsigned int)tofind,
2273 cifs_writev_complete);
2277 *found_pages = find_get_pages_range_tag(mapping, index, end,
2278 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2283 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2284 struct address_space *mapping,
2285 struct writeback_control *wbc,
2286 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2288 unsigned int nr_pages = 0, i;
2291 for (i = 0; i < found_pages; i++) {
2292 page = wdata->pages[i];
2294 * At this point we hold neither the i_pages lock nor the
2295 * page lock: the page may be truncated or invalidated
2296 * (changing page->mapping to NULL), or even swizzled
2297 * back from swapper_space to tmpfs file mapping
2302 else if (!trylock_page(page))
2305 if (unlikely(page->mapping != mapping)) {
2310 if (!wbc->range_cyclic && page->index > end) {
2316 if (*next && (page->index != *next)) {
2317 /* Not next consecutive page */
2322 if (wbc->sync_mode != WB_SYNC_NONE)
2323 wait_on_page_writeback(page);
2325 if (PageWriteback(page) ||
2326 !clear_page_dirty_for_io(page)) {
2332 * This actually clears the dirty bit in the radix tree.
2333 * See cifs_writepage() for more commentary.
2335 set_page_writeback(page);
2336 if (page_offset(page) >= i_size_read(mapping->host)) {
2339 end_page_writeback(page);
2343 wdata->pages[i] = page;
2344 *next = page->index + 1;
2348 /* reset index to refind any pages skipped */
2350 *index = wdata->pages[0]->index + 1;
2352 /* put any pages we aren't going to use */
2353 for (i = nr_pages; i < found_pages; i++) {
2354 put_page(wdata->pages[i]);
2355 wdata->pages[i] = NULL;
2362 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2363 struct address_space *mapping, struct writeback_control *wbc)
2367 wdata->sync_mode = wbc->sync_mode;
2368 wdata->nr_pages = nr_pages;
2369 wdata->offset = page_offset(wdata->pages[0]);
2370 wdata->pagesz = PAGE_SIZE;
2371 wdata->tailsz = min(i_size_read(mapping->host) -
2372 page_offset(wdata->pages[nr_pages - 1]),
2374 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2375 wdata->pid = wdata->cfile->pid;
2377 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2381 if (wdata->cfile->invalidHandle)
2384 rc = wdata->server->ops->async_writev(wdata,
2385 cifs_writedata_release);
2390 static int cifs_writepages(struct address_space *mapping,
2391 struct writeback_control *wbc)
2393 struct inode *inode = mapping->host;
2394 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2395 struct TCP_Server_Info *server;
2396 bool done = false, scanned = false, range_whole = false;
2398 struct cifs_writedata *wdata;
2399 struct cifsFileInfo *cfile = NULL;
2405 * If wsize is smaller than the page cache size, default to writing
2406 * one page at a time via cifs_writepage
2408 if (cifs_sb->ctx->wsize < PAGE_SIZE)
2409 return generic_writepages(mapping, wbc);
2412 if (wbc->range_cyclic) {
2413 index = mapping->writeback_index; /* Start from prev offset */
2416 index = wbc->range_start >> PAGE_SHIFT;
2417 end = wbc->range_end >> PAGE_SHIFT;
2418 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2422 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2425 while (!done && index <= end) {
2426 unsigned int i, nr_pages, found_pages, wsize;
2427 pgoff_t next = 0, tofind, saved_index = index;
2428 struct cifs_credits credits_on_stack;
2429 struct cifs_credits *credits = &credits_on_stack;
2430 int get_file_rc = 0;
2433 cifsFileInfo_put(cfile);
2435 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2437 /* in case of an error store it to return later */
2441 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2448 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2450 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2455 add_credits_and_wake_if(server, credits, 0);
2459 if (found_pages == 0) {
2460 kref_put(&wdata->refcount, cifs_writedata_release);
2461 add_credits_and_wake_if(server, credits, 0);
2465 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2466 end, &index, &next, &done);
2468 /* nothing to write? */
2469 if (nr_pages == 0) {
2470 kref_put(&wdata->refcount, cifs_writedata_release);
2471 add_credits_and_wake_if(server, credits, 0);
2475 wdata->credits = credits_on_stack;
2476 wdata->cfile = cfile;
2477 wdata->server = server;
2480 if (!wdata->cfile) {
2481 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2483 if (is_retryable_error(get_file_rc))
2488 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2490 for (i = 0; i < nr_pages; ++i)
2491 unlock_page(wdata->pages[i]);
2493 /* send failure -- clean up the mess */
2495 add_credits_and_wake_if(server, &wdata->credits, 0);
2496 for (i = 0; i < nr_pages; ++i) {
2497 if (is_retryable_error(rc))
2498 redirty_page_for_writepage(wbc,
2501 SetPageError(wdata->pages[i]);
2502 end_page_writeback(wdata->pages[i]);
2503 put_page(wdata->pages[i]);
2505 if (!is_retryable_error(rc))
2506 mapping_set_error(mapping, rc);
2508 kref_put(&wdata->refcount, cifs_writedata_release);
2510 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2511 index = saved_index;
2515 /* Return immediately if we received a signal during writing */
2516 if (is_interrupt_error(rc)) {
2521 if (rc != 0 && saved_rc == 0)
2524 wbc->nr_to_write -= nr_pages;
2525 if (wbc->nr_to_write <= 0)
2531 if (!scanned && !done) {
2533 * We hit the last page and there is more work to be done: wrap
2534 * back to the start of the file
2544 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2545 mapping->writeback_index = index;
2548 cifsFileInfo_put(cfile);
2550 /* Indication to update ctime and mtime as close is deferred */
2551 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2556 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2562 /* BB add check for wbc flags */
2564 if (!PageUptodate(page))
2565 cifs_dbg(FYI, "ppw - page not up to date\n");
2568 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2570 * A writepage() implementation always needs to do either this,
2571 * or re-dirty the page with "redirty_page_for_writepage()" in
2572 * the case of a failure.
2574 * Just unlocking the page will cause the radix tree tag-bits
2575 * to fail to update with the state of the page correctly.
2577 set_page_writeback(page);
2579 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2580 if (is_retryable_error(rc)) {
2581 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2583 redirty_page_for_writepage(wbc, page);
2584 } else if (rc != 0) {
2586 mapping_set_error(page->mapping, rc);
2588 SetPageUptodate(page);
2590 end_page_writeback(page);
2596 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2598 int rc = cifs_writepage_locked(page, wbc);
2603 static int cifs_write_end(struct file *file, struct address_space *mapping,
2604 loff_t pos, unsigned len, unsigned copied,
2605 struct page *page, void *fsdata)
2608 struct inode *inode = mapping->host;
2609 struct cifsFileInfo *cfile = file->private_data;
2610 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2613 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2616 pid = current->tgid;
2618 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2621 if (PageChecked(page)) {
2623 SetPageUptodate(page);
2624 ClearPageChecked(page);
2625 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2626 SetPageUptodate(page);
2628 if (!PageUptodate(page)) {
2630 unsigned offset = pos & (PAGE_SIZE - 1);
2634 /* this is probably better than directly calling
2635 partialpage_write since in this function the file handle is
2636 known which we might as well leverage */
2637 /* BB check if anything else missing out of ppw
2638 such as updating last write time */
2639 page_data = kmap(page);
2640 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2641 /* if (rc < 0) should we set writebehind rc? */
2648 set_page_dirty(page);
2652 spin_lock(&inode->i_lock);
2653 if (pos > inode->i_size) {
2654 i_size_write(inode, pos);
2655 inode->i_blocks = (512 - 1 + pos) >> 9;
2657 spin_unlock(&inode->i_lock);
2662 /* Indication to update ctime and mtime as close is deferred */
2663 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2668 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2673 struct cifs_tcon *tcon;
2674 struct TCP_Server_Info *server;
2675 struct cifsFileInfo *smbfile = file->private_data;
2676 struct inode *inode = file_inode(file);
2677 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2679 rc = file_write_and_wait_range(file, start, end);
2681 trace_cifs_fsync_err(inode->i_ino, rc);
2687 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2690 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2691 rc = cifs_zap_mapping(inode);
2693 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2694 rc = 0; /* don't care about it in fsync */
2698 tcon = tlink_tcon(smbfile->tlink);
2699 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2700 server = tcon->ses->server;
2701 if (server->ops->flush == NULL) {
2703 goto strict_fsync_exit;
2706 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2707 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2709 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2710 cifsFileInfo_put(smbfile);
2712 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2714 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2722 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2726 struct cifs_tcon *tcon;
2727 struct TCP_Server_Info *server;
2728 struct cifsFileInfo *smbfile = file->private_data;
2729 struct inode *inode = file_inode(file);
2730 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2732 rc = file_write_and_wait_range(file, start, end);
2734 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2740 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2743 tcon = tlink_tcon(smbfile->tlink);
2744 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2745 server = tcon->ses->server;
2746 if (server->ops->flush == NULL) {
2751 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2752 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2754 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2755 cifsFileInfo_put(smbfile);
2757 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2759 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2768 * As file closes, flush all cached write data for this inode checking
2769 * for write behind errors.
2771 int cifs_flush(struct file *file, fl_owner_t id)
2773 struct inode *inode = file_inode(file);
2776 if (file->f_mode & FMODE_WRITE)
2777 rc = filemap_write_and_wait(inode->i_mapping);
2779 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2781 trace_cifs_flush_err(inode->i_ino, rc);
2786 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2791 for (i = 0; i < num_pages; i++) {
2792 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2795 * save number of pages we have already allocated and
2796 * return with ENOMEM error
2805 for (i = 0; i < num_pages; i++)
2812 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2817 clen = min_t(const size_t, len, wsize);
2818 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2827 cifs_uncached_writedata_release(struct kref *refcount)
2830 struct cifs_writedata *wdata = container_of(refcount,
2831 struct cifs_writedata, refcount);
2833 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2834 for (i = 0; i < wdata->nr_pages; i++)
2835 put_page(wdata->pages[i]);
2836 cifs_writedata_release(refcount);
2839 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2842 cifs_uncached_writev_complete(struct work_struct *work)
2844 struct cifs_writedata *wdata = container_of(work,
2845 struct cifs_writedata, work);
2846 struct inode *inode = d_inode(wdata->cfile->dentry);
2847 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2849 spin_lock(&inode->i_lock);
2850 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2851 if (cifsi->server_eof > inode->i_size)
2852 i_size_write(inode, cifsi->server_eof);
2853 spin_unlock(&inode->i_lock);
2855 complete(&wdata->done);
2856 collect_uncached_write_data(wdata->ctx);
2857 /* the below call can possibly free the last ref to aio ctx */
2858 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2862 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2863 size_t *len, unsigned long *num_pages)
2865 size_t save_len, copied, bytes, cur_len = *len;
2866 unsigned long i, nr_pages = *num_pages;
2869 for (i = 0; i < nr_pages; i++) {
2870 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2871 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2874 * If we didn't copy as much as we expected, then that
2875 * may mean we trod into an unmapped area. Stop copying
2876 * at that point. On the next pass through the big
2877 * loop, we'll likely end up getting a zero-length
2878 * write and bailing out of it.
2883 cur_len = save_len - cur_len;
2887 * If we have no data to send, then that probably means that
2888 * the copy above failed altogether. That's most likely because
2889 * the address in the iovec was bogus. Return -EFAULT and let
2890 * the caller free anything we allocated and bail out.
2896 * i + 1 now represents the number of pages we actually used in
2897 * the copy phase above.
2904 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2905 struct cifs_aio_ctx *ctx)
2908 struct cifs_credits credits;
2910 struct TCP_Server_Info *server = wdata->server;
2913 if (wdata->cfile->invalidHandle) {
2914 rc = cifs_reopen_file(wdata->cfile, false);
2923 * Wait for credits to resend this wdata.
2924 * Note: we are attempting to resend the whole wdata not in
2928 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2933 if (wsize < wdata->bytes) {
2934 add_credits_and_wake_if(server, &credits, 0);
2937 } while (wsize < wdata->bytes);
2938 wdata->credits = credits;
2940 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2943 if (wdata->cfile->invalidHandle)
2946 #ifdef CONFIG_CIFS_SMB_DIRECT
2948 wdata->mr->need_invalidate = true;
2949 smbd_deregister_mr(wdata->mr);
2953 rc = server->ops->async_writev(wdata,
2954 cifs_uncached_writedata_release);
2958 /* If the write was successfully sent, we are done */
2960 list_add_tail(&wdata->list, wdata_list);
2964 /* Roll back credits and retry if needed */
2965 add_credits_and_wake_if(server, &wdata->credits, 0);
2966 } while (rc == -EAGAIN);
2969 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2974 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2975 struct cifsFileInfo *open_file,
2976 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2977 struct cifs_aio_ctx *ctx)
2981 unsigned long nr_pages, num_pages, i;
2982 struct cifs_writedata *wdata;
2983 struct iov_iter saved_from = *from;
2984 loff_t saved_offset = offset;
2986 struct TCP_Server_Info *server;
2987 struct page **pagevec;
2991 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2992 pid = open_file->pid;
2994 pid = current->tgid;
2996 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3001 struct cifs_credits credits_on_stack;
3002 struct cifs_credits *credits = &credits_on_stack;
3004 if (open_file->invalidHandle) {
3005 rc = cifs_reopen_file(open_file, false);
3012 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3017 cur_len = min_t(const size_t, len, wsize);
3019 if (ctx->direct_io) {
3022 result = iov_iter_get_pages_alloc(
3023 from, &pagevec, cur_len, &start);
3026 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3027 result, iov_iter_type(from),
3028 from->iov_offset, from->count);
3032 add_credits_and_wake_if(server, credits, 0);
3035 cur_len = (size_t)result;
3036 iov_iter_advance(from, cur_len);
3039 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3041 wdata = cifs_writedata_direct_alloc(pagevec,
3042 cifs_uncached_writev_complete);
3045 add_credits_and_wake_if(server, credits, 0);
3050 wdata->page_offset = start;
3053 cur_len - (PAGE_SIZE - start) -
3054 (nr_pages - 2) * PAGE_SIZE :
3057 nr_pages = get_numpages(wsize, len, &cur_len);
3058 wdata = cifs_writedata_alloc(nr_pages,
3059 cifs_uncached_writev_complete);
3062 add_credits_and_wake_if(server, credits, 0);
3066 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3068 kvfree(wdata->pages);
3070 add_credits_and_wake_if(server, credits, 0);
3074 num_pages = nr_pages;
3075 rc = wdata_fill_from_iovec(
3076 wdata, from, &cur_len, &num_pages);
3078 for (i = 0; i < nr_pages; i++)
3079 put_page(wdata->pages[i]);
3080 kvfree(wdata->pages);
3082 add_credits_and_wake_if(server, credits, 0);
3087 * Bring nr_pages down to the number of pages we
3088 * actually used, and free any pages that we didn't use.
3090 for ( ; nr_pages > num_pages; nr_pages--)
3091 put_page(wdata->pages[nr_pages - 1]);
3093 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3096 wdata->sync_mode = WB_SYNC_ALL;
3097 wdata->nr_pages = nr_pages;
3098 wdata->offset = (__u64)offset;
3099 wdata->cfile = cifsFileInfo_get(open_file);
3100 wdata->server = server;
3102 wdata->bytes = cur_len;
3103 wdata->pagesz = PAGE_SIZE;
3104 wdata->credits = credits_on_stack;
3106 kref_get(&ctx->refcount);
3108 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3111 if (wdata->cfile->invalidHandle)
3114 rc = server->ops->async_writev(wdata,
3115 cifs_uncached_writedata_release);
3119 add_credits_and_wake_if(server, &wdata->credits, 0);
3120 kref_put(&wdata->refcount,
3121 cifs_uncached_writedata_release);
3122 if (rc == -EAGAIN) {
3124 iov_iter_advance(from, offset - saved_offset);
3130 list_add_tail(&wdata->list, wdata_list);
3139 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3141 struct cifs_writedata *wdata, *tmp;
3142 struct cifs_tcon *tcon;
3143 struct cifs_sb_info *cifs_sb;
3144 struct dentry *dentry = ctx->cfile->dentry;
3147 tcon = tlink_tcon(ctx->cfile->tlink);
3148 cifs_sb = CIFS_SB(dentry->d_sb);
3150 mutex_lock(&ctx->aio_mutex);
3152 if (list_empty(&ctx->list)) {
3153 mutex_unlock(&ctx->aio_mutex);
3159 * Wait for and collect replies for any successful sends in order of
3160 * increasing offset. Once an error is hit, then return without waiting
3161 * for any more replies.
3164 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3166 if (!try_wait_for_completion(&wdata->done)) {
3167 mutex_unlock(&ctx->aio_mutex);
3174 ctx->total_len += wdata->bytes;
3176 /* resend call if it's a retryable error */
3177 if (rc == -EAGAIN) {
3178 struct list_head tmp_list;
3179 struct iov_iter tmp_from = ctx->iter;
3181 INIT_LIST_HEAD(&tmp_list);
3182 list_del_init(&wdata->list);
3185 rc = cifs_resend_wdata(
3186 wdata, &tmp_list, ctx);
3188 iov_iter_advance(&tmp_from,
3189 wdata->offset - ctx->pos);
3191 rc = cifs_write_from_iter(wdata->offset,
3192 wdata->bytes, &tmp_from,
3193 ctx->cfile, cifs_sb, &tmp_list,
3196 kref_put(&wdata->refcount,
3197 cifs_uncached_writedata_release);
3200 list_splice(&tmp_list, &ctx->list);
3204 list_del_init(&wdata->list);
3205 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3208 cifs_stats_bytes_written(tcon, ctx->total_len);
3209 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3211 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3213 mutex_unlock(&ctx->aio_mutex);
3215 if (ctx->iocb && ctx->iocb->ki_complete)
3216 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3218 complete(&ctx->done);
3221 static ssize_t __cifs_writev(
3222 struct kiocb *iocb, struct iov_iter *from, bool direct)
3224 struct file *file = iocb->ki_filp;
3225 ssize_t total_written = 0;
3226 struct cifsFileInfo *cfile;
3227 struct cifs_tcon *tcon;
3228 struct cifs_sb_info *cifs_sb;
3229 struct cifs_aio_ctx *ctx;
3230 struct iov_iter saved_from = *from;
3231 size_t len = iov_iter_count(from);
3235 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3236 * In this case, fall back to non-direct write function.
3237 * this could be improved by getting pages directly in ITER_KVEC
3239 if (direct && iov_iter_is_kvec(from)) {
3240 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3244 rc = generic_write_checks(iocb, from);
3248 cifs_sb = CIFS_FILE_SB(file);
3249 cfile = file->private_data;
3250 tcon = tlink_tcon(cfile->tlink);
3252 if (!tcon->ses->server->ops->async_writev)
3255 ctx = cifs_aio_ctx_alloc();
3259 ctx->cfile = cifsFileInfo_get(cfile);
3261 if (!is_sync_kiocb(iocb))
3264 ctx->pos = iocb->ki_pos;
3267 ctx->direct_io = true;
3271 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3273 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3278 /* grab a lock here due to read response handlers can access ctx */
3279 mutex_lock(&ctx->aio_mutex);
3281 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3282 cfile, cifs_sb, &ctx->list, ctx);
3285 * If at least one write was successfully sent, then discard any rc
3286 * value from the later writes. If the other write succeeds, then
3287 * we'll end up returning whatever was written. If it fails, then
3288 * we'll get a new rc value from that.
3290 if (!list_empty(&ctx->list))
3293 mutex_unlock(&ctx->aio_mutex);
3296 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3300 if (!is_sync_kiocb(iocb)) {
3301 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3302 return -EIOCBQUEUED;
3305 rc = wait_for_completion_killable(&ctx->done);
3307 mutex_lock(&ctx->aio_mutex);
3308 ctx->rc = rc = -EINTR;
3309 total_written = ctx->total_len;
3310 mutex_unlock(&ctx->aio_mutex);
3313 total_written = ctx->total_len;
3316 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3318 if (unlikely(!total_written))
3321 iocb->ki_pos += total_written;
3322 return total_written;
3325 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3327 return __cifs_writev(iocb, from, true);
3330 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3332 return __cifs_writev(iocb, from, false);
3336 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3338 struct file *file = iocb->ki_filp;
3339 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3340 struct inode *inode = file->f_mapping->host;
3341 struct cifsInodeInfo *cinode = CIFS_I(inode);
3342 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3347 * We need to hold the sem to be sure nobody modifies lock list
3348 * with a brlock that prevents writing.
3350 down_read(&cinode->lock_sem);
3352 rc = generic_write_checks(iocb, from);
3356 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3357 server->vals->exclusive_lock_type, 0,
3358 NULL, CIFS_WRITE_OP))
3359 rc = __generic_file_write_iter(iocb, from);
3363 up_read(&cinode->lock_sem);
3364 inode_unlock(inode);
3367 rc = generic_write_sync(iocb, rc);
3372 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3374 struct inode *inode = file_inode(iocb->ki_filp);
3375 struct cifsInodeInfo *cinode = CIFS_I(inode);
3376 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3377 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3378 iocb->ki_filp->private_data;
3379 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3382 written = cifs_get_writer(cinode);
3386 if (CIFS_CACHE_WRITE(cinode)) {
3387 if (cap_unix(tcon->ses) &&
3388 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3389 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3390 written = generic_file_write_iter(iocb, from);
3393 written = cifs_writev(iocb, from);
3397 * For non-oplocked files in strict cache mode we need to write the data
3398 * to the server exactly from the pos to pos+len-1 rather than flush all
3399 * affected pages because it may cause a error with mandatory locks on
3400 * these pages but not on the region from pos to ppos+len-1.
3402 written = cifs_user_writev(iocb, from);
3403 if (CIFS_CACHE_READ(cinode)) {
3405 * We have read level caching and we have just sent a write
3406 * request to the server thus making data in the cache stale.
3407 * Zap the cache and set oplock/lease level to NONE to avoid
3408 * reading stale data from the cache. All subsequent read
3409 * operations will read new data from the server.
3411 cifs_zap_mapping(inode);
3412 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3417 cifs_put_writer(cinode);
3421 static struct cifs_readdata *
3422 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3424 struct cifs_readdata *rdata;
3426 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3427 if (rdata != NULL) {
3428 rdata->pages = pages;
3429 kref_init(&rdata->refcount);
3430 INIT_LIST_HEAD(&rdata->list);
3431 init_completion(&rdata->done);
3432 INIT_WORK(&rdata->work, complete);
3438 static struct cifs_readdata *
3439 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3441 struct page **pages =
3442 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3443 struct cifs_readdata *ret = NULL;
3446 ret = cifs_readdata_direct_alloc(pages, complete);
3455 cifs_readdata_release(struct kref *refcount)
3457 struct cifs_readdata *rdata = container_of(refcount,
3458 struct cifs_readdata, refcount);
3459 #ifdef CONFIG_CIFS_SMB_DIRECT
3461 smbd_deregister_mr(rdata->mr);
3466 cifsFileInfo_put(rdata->cfile);
3468 kvfree(rdata->pages);
3473 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3479 for (i = 0; i < nr_pages; i++) {
3480 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3485 rdata->pages[i] = page;
3489 unsigned int nr_page_failed = i;
3491 for (i = 0; i < nr_page_failed; i++) {
3492 put_page(rdata->pages[i]);
3493 rdata->pages[i] = NULL;
3500 cifs_uncached_readdata_release(struct kref *refcount)
3502 struct cifs_readdata *rdata = container_of(refcount,
3503 struct cifs_readdata, refcount);
3506 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3507 for (i = 0; i < rdata->nr_pages; i++) {
3508 put_page(rdata->pages[i]);
3510 cifs_readdata_release(refcount);
3514 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3515 * @rdata: the readdata response with list of pages holding data
3516 * @iter: destination for our data
3518 * This function copies data from a list of pages in a readdata response into
3519 * an array of iovecs. It will first calculate where the data should go
3520 * based on the info in the readdata and then copy the data into that spot.
3523 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3525 size_t remaining = rdata->got_bytes;
3528 for (i = 0; i < rdata->nr_pages; i++) {
3529 struct page *page = rdata->pages[i];
3530 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3533 if (unlikely(iov_iter_is_pipe(iter))) {
3534 void *addr = kmap_atomic(page);
3536 written = copy_to_iter(addr, copy, iter);
3537 kunmap_atomic(addr);
3539 written = copy_page_to_iter(page, 0, copy, iter);
3540 remaining -= written;
3541 if (written < copy && iov_iter_count(iter) > 0)
3544 return remaining ? -EFAULT : 0;
3547 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3550 cifs_uncached_readv_complete(struct work_struct *work)
3552 struct cifs_readdata *rdata = container_of(work,
3553 struct cifs_readdata, work);
3555 complete(&rdata->done);
3556 collect_uncached_read_data(rdata->ctx);
3557 /* the below call can possibly free the last ref to aio ctx */
3558 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3562 uncached_fill_pages(struct TCP_Server_Info *server,
3563 struct cifs_readdata *rdata, struct iov_iter *iter,
3568 unsigned int nr_pages = rdata->nr_pages;
3569 unsigned int page_offset = rdata->page_offset;
3571 rdata->got_bytes = 0;
3572 rdata->tailsz = PAGE_SIZE;
3573 for (i = 0; i < nr_pages; i++) {
3574 struct page *page = rdata->pages[i];
3576 unsigned int segment_size = rdata->pagesz;
3579 segment_size -= page_offset;
3585 /* no need to hold page hostage */
3586 rdata->pages[i] = NULL;
3593 if (len >= segment_size)
3594 /* enough data to fill the page */
3597 rdata->tailsz = len;
3601 result = copy_page_from_iter(
3602 page, page_offset, n, iter);
3603 #ifdef CONFIG_CIFS_SMB_DIRECT
3608 result = cifs_read_page_from_socket(
3609 server, page, page_offset, n);
3613 rdata->got_bytes += result;
3616 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3617 rdata->got_bytes : result;
3621 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3622 struct cifs_readdata *rdata, unsigned int len)
3624 return uncached_fill_pages(server, rdata, NULL, len);
3628 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3629 struct cifs_readdata *rdata,
3630 struct iov_iter *iter)
3632 return uncached_fill_pages(server, rdata, iter, iter->count);
3635 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3636 struct list_head *rdata_list,
3637 struct cifs_aio_ctx *ctx)
3640 struct cifs_credits credits;
3642 struct TCP_Server_Info *server;
3644 /* XXX: should we pick a new channel here? */
3645 server = rdata->server;
3648 if (rdata->cfile->invalidHandle) {
3649 rc = cifs_reopen_file(rdata->cfile, true);
3657 * Wait for credits to resend this rdata.
3658 * Note: we are attempting to resend the whole rdata not in
3662 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3668 if (rsize < rdata->bytes) {
3669 add_credits_and_wake_if(server, &credits, 0);
3672 } while (rsize < rdata->bytes);
3673 rdata->credits = credits;
3675 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3677 if (rdata->cfile->invalidHandle)
3680 #ifdef CONFIG_CIFS_SMB_DIRECT
3682 rdata->mr->need_invalidate = true;
3683 smbd_deregister_mr(rdata->mr);
3687 rc = server->ops->async_readv(rdata);
3691 /* If the read was successfully sent, we are done */
3693 /* Add to aio pending list */
3694 list_add_tail(&rdata->list, rdata_list);
3698 /* Roll back credits and retry if needed */
3699 add_credits_and_wake_if(server, &rdata->credits, 0);
3700 } while (rc == -EAGAIN);
3703 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3708 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3709 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3710 struct cifs_aio_ctx *ctx)
3712 struct cifs_readdata *rdata;
3713 unsigned int npages, rsize;
3714 struct cifs_credits credits_on_stack;
3715 struct cifs_credits *credits = &credits_on_stack;
3719 struct TCP_Server_Info *server;
3720 struct page **pagevec;
3722 struct iov_iter direct_iov = ctx->iter;
3724 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3726 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3727 pid = open_file->pid;
3729 pid = current->tgid;
3732 iov_iter_advance(&direct_iov, offset - ctx->pos);
3735 if (open_file->invalidHandle) {
3736 rc = cifs_reopen_file(open_file, true);
3743 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3748 cur_len = min_t(const size_t, len, rsize);
3750 if (ctx->direct_io) {
3753 result = iov_iter_get_pages_alloc(
3754 &direct_iov, &pagevec,
3758 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3759 result, iov_iter_type(&direct_iov),
3760 direct_iov.iov_offset,
3765 add_credits_and_wake_if(server, credits, 0);
3768 cur_len = (size_t)result;
3769 iov_iter_advance(&direct_iov, cur_len);
3771 rdata = cifs_readdata_direct_alloc(
3772 pagevec, cifs_uncached_readv_complete);
3774 add_credits_and_wake_if(server, credits, 0);
3779 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3780 rdata->page_offset = start;
3781 rdata->tailsz = npages > 1 ?
3782 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3787 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3788 /* allocate a readdata struct */
3789 rdata = cifs_readdata_alloc(npages,
3790 cifs_uncached_readv_complete);
3792 add_credits_and_wake_if(server, credits, 0);
3797 rc = cifs_read_allocate_pages(rdata, npages);
3799 kvfree(rdata->pages);
3801 add_credits_and_wake_if(server, credits, 0);
3805 rdata->tailsz = PAGE_SIZE;
3808 rdata->server = server;
3809 rdata->cfile = cifsFileInfo_get(open_file);
3810 rdata->nr_pages = npages;
3811 rdata->offset = offset;
3812 rdata->bytes = cur_len;
3814 rdata->pagesz = PAGE_SIZE;
3815 rdata->read_into_pages = cifs_uncached_read_into_pages;
3816 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3817 rdata->credits = credits_on_stack;
3819 kref_get(&ctx->refcount);
3821 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3824 if (rdata->cfile->invalidHandle)
3827 rc = server->ops->async_readv(rdata);
3831 add_credits_and_wake_if(server, &rdata->credits, 0);
3832 kref_put(&rdata->refcount,
3833 cifs_uncached_readdata_release);
3834 if (rc == -EAGAIN) {
3835 iov_iter_revert(&direct_iov, cur_len);
3841 list_add_tail(&rdata->list, rdata_list);
3850 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3852 struct cifs_readdata *rdata, *tmp;
3853 struct iov_iter *to = &ctx->iter;
3854 struct cifs_sb_info *cifs_sb;
3857 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3859 mutex_lock(&ctx->aio_mutex);
3861 if (list_empty(&ctx->list)) {
3862 mutex_unlock(&ctx->aio_mutex);
3867 /* the loop below should proceed in the order of increasing offsets */
3869 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3871 if (!try_wait_for_completion(&rdata->done)) {
3872 mutex_unlock(&ctx->aio_mutex);
3876 if (rdata->result == -EAGAIN) {
3877 /* resend call if it's a retryable error */
3878 struct list_head tmp_list;
3879 unsigned int got_bytes = rdata->got_bytes;
3881 list_del_init(&rdata->list);
3882 INIT_LIST_HEAD(&tmp_list);
3885 * Got a part of data and then reconnect has
3886 * happened -- fill the buffer and continue
3889 if (got_bytes && got_bytes < rdata->bytes) {
3891 if (!ctx->direct_io)
3892 rc = cifs_readdata_to_iov(rdata, to);
3894 kref_put(&rdata->refcount,
3895 cifs_uncached_readdata_release);
3900 if (ctx->direct_io) {
3902 * Re-use rdata as this is a
3905 rc = cifs_resend_rdata(
3909 rc = cifs_send_async_read(
3910 rdata->offset + got_bytes,
3911 rdata->bytes - got_bytes,
3912 rdata->cfile, cifs_sb,
3915 kref_put(&rdata->refcount,
3916 cifs_uncached_readdata_release);
3919 list_splice(&tmp_list, &ctx->list);
3922 } else if (rdata->result)
3924 else if (!ctx->direct_io)
3925 rc = cifs_readdata_to_iov(rdata, to);
3927 /* if there was a short read -- discard anything left */
3928 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3931 ctx->total_len += rdata->got_bytes;
3933 list_del_init(&rdata->list);
3934 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3937 if (!ctx->direct_io)
3938 ctx->total_len = ctx->len - iov_iter_count(to);
3940 /* mask nodata case */
3944 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3946 mutex_unlock(&ctx->aio_mutex);
3948 if (ctx->iocb && ctx->iocb->ki_complete)
3949 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3951 complete(&ctx->done);
3954 static ssize_t __cifs_readv(
3955 struct kiocb *iocb, struct iov_iter *to, bool direct)
3958 struct file *file = iocb->ki_filp;
3959 struct cifs_sb_info *cifs_sb;
3960 struct cifsFileInfo *cfile;
3961 struct cifs_tcon *tcon;
3962 ssize_t rc, total_read = 0;
3963 loff_t offset = iocb->ki_pos;
3964 struct cifs_aio_ctx *ctx;
3967 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3968 * fall back to data copy read path
3969 * this could be improved by getting pages directly in ITER_KVEC
3971 if (direct && iov_iter_is_kvec(to)) {
3972 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3976 len = iov_iter_count(to);
3980 cifs_sb = CIFS_FILE_SB(file);
3981 cfile = file->private_data;
3982 tcon = tlink_tcon(cfile->tlink);
3984 if (!tcon->ses->server->ops->async_readv)
3987 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3988 cifs_dbg(FYI, "attempting read on write only file instance\n");
3990 ctx = cifs_aio_ctx_alloc();
3994 ctx->cfile = cifsFileInfo_get(cfile);
3996 if (!is_sync_kiocb(iocb))
3999 if (iter_is_iovec(to))
4000 ctx->should_dirty = true;
4004 ctx->direct_io = true;
4008 rc = setup_aio_ctx_iter(ctx, to, READ);
4010 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4016 /* grab a lock here due to read response handlers can access ctx */
4017 mutex_lock(&ctx->aio_mutex);
4019 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4021 /* if at least one read request send succeeded, then reset rc */
4022 if (!list_empty(&ctx->list))
4025 mutex_unlock(&ctx->aio_mutex);
4028 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4032 if (!is_sync_kiocb(iocb)) {
4033 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4034 return -EIOCBQUEUED;
4037 rc = wait_for_completion_killable(&ctx->done);
4039 mutex_lock(&ctx->aio_mutex);
4040 ctx->rc = rc = -EINTR;
4041 total_read = ctx->total_len;
4042 mutex_unlock(&ctx->aio_mutex);
4045 total_read = ctx->total_len;
4048 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4051 iocb->ki_pos += total_read;
4057 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4059 return __cifs_readv(iocb, to, true);
4062 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4064 return __cifs_readv(iocb, to, false);
4068 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4070 struct inode *inode = file_inode(iocb->ki_filp);
4071 struct cifsInodeInfo *cinode = CIFS_I(inode);
4072 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4073 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4074 iocb->ki_filp->private_data;
4075 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4079 * In strict cache mode we need to read from the server all the time
4080 * if we don't have level II oplock because the server can delay mtime
4081 * change - so we can't make a decision about inode invalidating.
4082 * And we can also fail with pagereading if there are mandatory locks
4083 * on pages affected by this read but not on the region from pos to
4086 if (!CIFS_CACHE_READ(cinode))
4087 return cifs_user_readv(iocb, to);
4089 if (cap_unix(tcon->ses) &&
4090 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4091 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4092 return generic_file_read_iter(iocb, to);
4095 * We need to hold the sem to be sure nobody modifies lock list
4096 * with a brlock that prevents reading.
4098 down_read(&cinode->lock_sem);
4099 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4100 tcon->ses->server->vals->shared_lock_type,
4101 0, NULL, CIFS_READ_OP))
4102 rc = generic_file_read_iter(iocb, to);
4103 up_read(&cinode->lock_sem);
4108 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4111 unsigned int bytes_read = 0;
4112 unsigned int total_read;
4113 unsigned int current_read_size;
4115 struct cifs_sb_info *cifs_sb;
4116 struct cifs_tcon *tcon;
4117 struct TCP_Server_Info *server;
4120 struct cifsFileInfo *open_file;
4121 struct cifs_io_parms io_parms = {0};
4122 int buf_type = CIFS_NO_BUFFER;
4126 cifs_sb = CIFS_FILE_SB(file);
4128 /* FIXME: set up handlers for larger reads and/or convert to async */
4129 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4131 if (file->private_data == NULL) {
4136 open_file = file->private_data;
4137 tcon = tlink_tcon(open_file->tlink);
4138 server = cifs_pick_channel(tcon->ses);
4140 if (!server->ops->sync_read) {
4145 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4146 pid = open_file->pid;
4148 pid = current->tgid;
4150 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4151 cifs_dbg(FYI, "attempting read on write only file instance\n");
4153 for (total_read = 0, cur_offset = read_data; read_size > total_read;
4154 total_read += bytes_read, cur_offset += bytes_read) {
4156 current_read_size = min_t(uint, read_size - total_read,
4159 * For windows me and 9x we do not want to request more
4160 * than it negotiated since it will refuse the read
4163 if (!(tcon->ses->capabilities &
4164 tcon->ses->server->vals->cap_large_files)) {
4165 current_read_size = min_t(uint,
4166 current_read_size, CIFSMaxBufSize);
4168 if (open_file->invalidHandle) {
4169 rc = cifs_reopen_file(open_file, true);
4174 io_parms.tcon = tcon;
4175 io_parms.offset = *offset;
4176 io_parms.length = current_read_size;
4177 io_parms.server = server;
4178 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4179 &bytes_read, &cur_offset,
4181 } while (rc == -EAGAIN);
4183 if (rc || (bytes_read == 0)) {
4191 cifs_stats_bytes_read(tcon, total_read);
4192 *offset += bytes_read;
4200 * If the page is mmap'ed into a process' page tables, then we need to make
4201 * sure that it doesn't change while being written back.
4204 cifs_page_mkwrite(struct vm_fault *vmf)
4206 struct page *page = vmf->page;
4208 #ifdef CONFIG_CIFS_FSCACHE
4209 if (PageFsCache(page) &&
4210 wait_on_page_fscache_killable(page) < 0)
4211 return VM_FAULT_RETRY;
4215 return VM_FAULT_LOCKED;
4218 static const struct vm_operations_struct cifs_file_vm_ops = {
4219 .fault = filemap_fault,
4220 .map_pages = filemap_map_pages,
4221 .page_mkwrite = cifs_page_mkwrite,
4224 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4227 struct inode *inode = file_inode(file);
4231 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4232 rc = cifs_zap_mapping(inode);
4234 rc = generic_file_mmap(file, vma);
4236 vma->vm_ops = &cifs_file_vm_ops;
4242 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4248 rc = cifs_revalidate_file(file);
4250 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4253 rc = generic_file_mmap(file, vma);
4255 vma->vm_ops = &cifs_file_vm_ops;
4262 cifs_readv_complete(struct work_struct *work)
4264 unsigned int i, got_bytes;
4265 struct cifs_readdata *rdata = container_of(work,
4266 struct cifs_readdata, work);
4268 got_bytes = rdata->got_bytes;
4269 for (i = 0; i < rdata->nr_pages; i++) {
4270 struct page *page = rdata->pages[i];
4272 lru_cache_add(page);
4274 if (rdata->result == 0 ||
4275 (rdata->result == -EAGAIN && got_bytes)) {
4276 flush_dcache_page(page);
4277 SetPageUptodate(page);
4283 if (rdata->result == 0 ||
4284 (rdata->result == -EAGAIN && got_bytes))
4285 cifs_readpage_to_fscache(rdata->mapping->host, page);
4287 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4290 rdata->pages[i] = NULL;
4292 kref_put(&rdata->refcount, cifs_readdata_release);
4296 readpages_fill_pages(struct TCP_Server_Info *server,
4297 struct cifs_readdata *rdata, struct iov_iter *iter,
4304 unsigned int nr_pages = rdata->nr_pages;
4305 unsigned int page_offset = rdata->page_offset;
4307 /* determine the eof that the server (probably) has */
4308 eof = CIFS_I(rdata->mapping->host)->server_eof;
4309 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4310 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4312 rdata->got_bytes = 0;
4313 rdata->tailsz = PAGE_SIZE;
4314 for (i = 0; i < nr_pages; i++) {
4315 struct page *page = rdata->pages[i];
4316 unsigned int to_read = rdata->pagesz;
4320 to_read -= page_offset;
4326 if (len >= to_read) {
4328 } else if (len > 0) {
4329 /* enough for partial page, fill and zero the rest */
4330 zero_user(page, len + page_offset, to_read - len);
4331 n = rdata->tailsz = len;
4333 } else if (page->index > eof_index) {
4335 * The VFS will not try to do readahead past the
4336 * i_size, but it's possible that we have outstanding
4337 * writes with gaps in the middle and the i_size hasn't
4338 * caught up yet. Populate those with zeroed out pages
4339 * to prevent the VFS from repeatedly attempting to
4340 * fill them until the writes are flushed.
4342 zero_user(page, 0, PAGE_SIZE);
4343 lru_cache_add(page);
4344 flush_dcache_page(page);
4345 SetPageUptodate(page);
4348 rdata->pages[i] = NULL;
4352 /* no need to hold page hostage */
4353 lru_cache_add(page);
4356 rdata->pages[i] = NULL;
4362 result = copy_page_from_iter(
4363 page, page_offset, n, iter);
4364 #ifdef CONFIG_CIFS_SMB_DIRECT
4369 result = cifs_read_page_from_socket(
4370 server, page, page_offset, n);
4374 rdata->got_bytes += result;
4377 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4378 rdata->got_bytes : result;
4382 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4383 struct cifs_readdata *rdata, unsigned int len)
4385 return readpages_fill_pages(server, rdata, NULL, len);
4389 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4390 struct cifs_readdata *rdata,
4391 struct iov_iter *iter)
4393 return readpages_fill_pages(server, rdata, iter, iter->count);
4397 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4398 unsigned int rsize, struct list_head *tmplist,
4399 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4401 struct page *page, *tpage;
4402 unsigned int expected_index;
4404 gfp_t gfp = readahead_gfp_mask(mapping);
4406 INIT_LIST_HEAD(tmplist);
4408 page = lru_to_page(page_list);
4411 * Lock the page and put it in the cache. Since no one else
4412 * should have access to this page, we're safe to simply set
4413 * PG_locked without checking it first.
4415 __SetPageLocked(page);
4416 rc = add_to_page_cache_locked(page, mapping,
4419 /* give up if we can't stick it in the cache */
4421 __ClearPageLocked(page);
4425 /* move first page to the tmplist */
4426 *offset = (loff_t)page->index << PAGE_SHIFT;
4429 list_move_tail(&page->lru, tmplist);
4431 /* now try and add more pages onto the request */
4432 expected_index = page->index + 1;
4433 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4434 /* discontinuity ? */
4435 if (page->index != expected_index)
4438 /* would this page push the read over the rsize? */
4439 if (*bytes + PAGE_SIZE > rsize)
4442 __SetPageLocked(page);
4443 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4445 __ClearPageLocked(page);
4448 list_move_tail(&page->lru, tmplist);
4449 (*bytes) += PAGE_SIZE;
4456 static int cifs_readpages(struct file *file, struct address_space *mapping,
4457 struct list_head *page_list, unsigned num_pages)
4461 struct list_head tmplist;
4462 struct cifsFileInfo *open_file = file->private_data;
4463 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4464 struct TCP_Server_Info *server;
4470 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4471 * immediately if the cookie is negative
4473 * After this point, every page in the list might have PG_fscache set,
4474 * so we will need to clean that up off of every page we don't use.
4476 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4483 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4484 pid = open_file->pid;
4486 pid = current->tgid;
4489 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4491 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4492 __func__, file, mapping, num_pages);
4495 * Start with the page at end of list and move it to private
4496 * list. Do the same with any following pages until we hit
4497 * the rsize limit, hit an index discontinuity, or run out of
4498 * pages. Issue the async read and then start the loop again
4499 * until the list is empty.
4501 * Note that list order is important. The page_list is in
4502 * the order of declining indexes. When we put the pages in
4503 * the rdata->pages, then we want them in increasing order.
4505 while (!list_empty(page_list) && !err) {
4506 unsigned int i, nr_pages, bytes, rsize;
4508 struct page *page, *tpage;
4509 struct cifs_readdata *rdata;
4510 struct cifs_credits credits_on_stack;
4511 struct cifs_credits *credits = &credits_on_stack;
4513 if (open_file->invalidHandle) {
4514 rc = cifs_reopen_file(open_file, true);
4521 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4527 * Give up immediately if rsize is too small to read an entire
4528 * page. The VFS will fall back to readpage. We should never
4529 * reach this point however since we set ra_pages to 0 when the
4530 * rsize is smaller than a cache page.
4532 if (unlikely(rsize < PAGE_SIZE)) {
4533 add_credits_and_wake_if(server, credits, 0);
4539 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4540 &nr_pages, &offset, &bytes);
4542 add_credits_and_wake_if(server, credits, 0);
4546 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4548 /* best to give up if we're out of mem */
4549 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4550 list_del(&page->lru);
4551 lru_cache_add(page);
4556 add_credits_and_wake_if(server, credits, 0);
4560 rdata->cfile = cifsFileInfo_get(open_file);
4561 rdata->server = server;
4562 rdata->mapping = mapping;
4563 rdata->offset = offset;
4564 rdata->bytes = bytes;
4566 rdata->pagesz = PAGE_SIZE;
4567 rdata->tailsz = PAGE_SIZE;
4568 rdata->read_into_pages = cifs_readpages_read_into_pages;
4569 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4570 rdata->credits = credits_on_stack;
4572 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4573 list_del(&page->lru);
4574 rdata->pages[rdata->nr_pages++] = page;
4577 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4580 if (rdata->cfile->invalidHandle)
4583 rc = server->ops->async_readv(rdata);
4587 add_credits_and_wake_if(server, &rdata->credits, 0);
4588 for (i = 0; i < rdata->nr_pages; i++) {
4589 page = rdata->pages[i];
4590 lru_cache_add(page);
4594 /* Fallback to the readpage in error/reconnect cases */
4595 kref_put(&rdata->refcount, cifs_readdata_release);
4599 kref_put(&rdata->refcount, cifs_readdata_release);
4607 * cifs_readpage_worker must be called with the page pinned
4609 static int cifs_readpage_worker(struct file *file, struct page *page,
4615 /* Is the page cached? */
4616 rc = cifs_readpage_from_fscache(file_inode(file), page);
4620 read_data = kmap(page);
4621 /* for reads over a certain size could initiate async read ahead */
4623 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4628 cifs_dbg(FYI, "Bytes read %d\n", rc);
4630 /* we do not want atime to be less than mtime, it broke some apps */
4631 file_inode(file)->i_atime = current_time(file_inode(file));
4632 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4633 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4635 file_inode(file)->i_atime = current_time(file_inode(file));
4638 memset(read_data + rc, 0, PAGE_SIZE - rc);
4640 flush_dcache_page(page);
4641 SetPageUptodate(page);
4643 /* send this page to the cache */
4644 cifs_readpage_to_fscache(file_inode(file), page);
4656 static int cifs_readpage(struct file *file, struct page *page)
4658 loff_t offset = page_file_offset(page);
4664 if (file->private_data == NULL) {
4670 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4671 page, (int)offset, (int)offset);
4673 rc = cifs_readpage_worker(file, page, &offset);
4679 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4681 struct cifsFileInfo *open_file;
4683 spin_lock(&cifs_inode->open_file_lock);
4684 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4685 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4686 spin_unlock(&cifs_inode->open_file_lock);
4690 spin_unlock(&cifs_inode->open_file_lock);
4694 /* We do not want to update the file size from server for inodes
4695 open for write - to avoid races with writepage extending
4696 the file - in the future we could consider allowing
4697 refreshing the inode only on increases in the file size
4698 but this is tricky to do without racing with writebehind
4699 page caching in the current Linux kernel design */
4700 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4705 if (is_inode_writable(cifsInode)) {
4706 /* This inode is open for write at least once */
4707 struct cifs_sb_info *cifs_sb;
4709 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4710 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4711 /* since no page cache to corrupt on directio
4712 we can change size safely */
4716 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4724 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4725 loff_t pos, unsigned len, unsigned flags,
4726 struct page **pagep, void **fsdata)
4729 pgoff_t index = pos >> PAGE_SHIFT;
4730 loff_t offset = pos & (PAGE_SIZE - 1);
4731 loff_t page_start = pos & PAGE_MASK;
4736 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4739 page = grab_cache_page_write_begin(mapping, index, flags);
4745 if (PageUptodate(page))
4749 * If we write a full page it will be up to date, no need to read from
4750 * the server. If the write is short, we'll end up doing a sync write
4753 if (len == PAGE_SIZE)
4757 * optimize away the read when we have an oplock, and we're not
4758 * expecting to use any of the data we'd be reading in. That
4759 * is, when the page lies beyond the EOF, or straddles the EOF
4760 * and the write will cover all of the existing data.
4762 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4763 i_size = i_size_read(mapping->host);
4764 if (page_start >= i_size ||
4765 (offset == 0 && (pos + len) >= i_size)) {
4766 zero_user_segments(page, 0, offset,
4770 * PageChecked means that the parts of the page
4771 * to which we're not writing are considered up
4772 * to date. Once the data is copied to the
4773 * page, it can be set uptodate.
4775 SetPageChecked(page);
4780 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4782 * might as well read a page, it is fast enough. If we get
4783 * an error, we don't need to return it. cifs_write_end will
4784 * do a sync write instead since PG_uptodate isn't set.
4786 cifs_readpage_worker(file, page, &page_start);
4791 /* we could try using another file handle if there is one -
4792 but how would we lock it to prevent close of that handle
4793 racing with this read? In any case
4794 this will be written out by write_end so is fine */
4801 static int cifs_release_page(struct page *page, gfp_t gfp)
4803 if (PagePrivate(page))
4805 if (PageFsCache(page)) {
4806 if (current_is_kswapd() || !(gfp & __GFP_FS))
4808 wait_on_page_fscache(page);
4810 fscache_note_page_release(cifs_inode_cookie(page->mapping->host));
4814 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4815 unsigned int length)
4817 wait_on_page_fscache(page);
4820 static int cifs_launder_page(struct page *page)
4823 loff_t range_start = page_offset(page);
4824 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4825 struct writeback_control wbc = {
4826 .sync_mode = WB_SYNC_ALL,
4828 .range_start = range_start,
4829 .range_end = range_end,
4832 cifs_dbg(FYI, "Launder page: %p\n", page);
4834 if (clear_page_dirty_for_io(page))
4835 rc = cifs_writepage_locked(page, &wbc);
4837 wait_on_page_fscache(page);
4841 void cifs_oplock_break(struct work_struct *work)
4843 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4845 struct inode *inode = d_inode(cfile->dentry);
4846 struct cifsInodeInfo *cinode = CIFS_I(inode);
4847 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4848 struct TCP_Server_Info *server = tcon->ses->server;
4850 bool purge_cache = false;
4851 bool is_deferred = false;
4852 struct cifs_deferred_close *dclose;
4854 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4855 TASK_UNINTERRUPTIBLE);
4857 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4858 cfile->oplock_epoch, &purge_cache);
4860 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4861 cifs_has_mand_locks(cinode)) {
4862 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4867 if (inode && S_ISREG(inode->i_mode)) {
4868 if (CIFS_CACHE_READ(cinode))
4869 break_lease(inode, O_RDONLY);
4871 break_lease(inode, O_WRONLY);
4872 rc = filemap_fdatawrite(inode->i_mapping);
4873 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4874 rc = filemap_fdatawait(inode->i_mapping);
4875 mapping_set_error(inode->i_mapping, rc);
4876 cifs_zap_mapping(inode);
4878 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4879 if (CIFS_CACHE_WRITE(cinode))
4880 goto oplock_break_ack;
4883 rc = cifs_push_locks(cfile);
4885 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4889 * When oplock break is received and there are no active
4890 * file handles but cached, then schedule deferred close immediately.
4891 * So, new open will not use cached handle.
4893 spin_lock(&CIFS_I(inode)->deferred_lock);
4894 is_deferred = cifs_is_deferred_close(cfile, &dclose);
4895 spin_unlock(&CIFS_I(inode)->deferred_lock);
4897 cfile->deferred_close_scheduled &&
4898 delayed_work_pending(&cfile->deferred)) {
4899 if (cancel_delayed_work(&cfile->deferred)) {
4900 _cifsFileInfo_put(cfile, false, false);
4901 goto oplock_break_done;
4905 * releasing stale oplock after recent reconnect of smb session using
4906 * a now incorrect file handle is not a data integrity issue but do
4907 * not bother sending an oplock release if session to server still is
4908 * disconnected since oplock already released by the server
4910 if (!cfile->oplock_break_cancelled) {
4911 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4913 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4916 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4917 cifs_done_oplock_break(cinode);
4921 * The presence of cifs_direct_io() in the address space ops vector
4922 * allowes open() O_DIRECT flags which would have failed otherwise.
4924 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4925 * so this method should never be called.
4927 * Direct IO is not yet supported in the cached mode.
4930 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4934 * Eventually need to support direct IO for non forcedirectio mounts
4939 static int cifs_swap_activate(struct swap_info_struct *sis,
4940 struct file *swap_file, sector_t *span)
4942 struct cifsFileInfo *cfile = swap_file->private_data;
4943 struct inode *inode = swap_file->f_mapping->host;
4944 unsigned long blocks;
4947 cifs_dbg(FYI, "swap activate\n");
4949 spin_lock(&inode->i_lock);
4950 blocks = inode->i_blocks;
4951 isize = inode->i_size;
4952 spin_unlock(&inode->i_lock);
4953 if (blocks*512 < isize) {
4954 pr_warn("swap activate: swapfile has holes\n");
4959 pr_warn_once("Swap support over SMB3 is experimental\n");
4962 * TODO: consider adding ACL (or documenting how) to prevent other
4963 * users (on this or other systems) from reading it
4967 /* TODO: add sk_set_memalloc(inet) or similar */
4970 cfile->swapfile = true;
4972 * TODO: Since file already open, we can't open with DENY_ALL here
4973 * but we could add call to grab a byte range lock to prevent others
4974 * from reading or writing the file
4980 static void cifs_swap_deactivate(struct file *file)
4982 struct cifsFileInfo *cfile = file->private_data;
4984 cifs_dbg(FYI, "swap deactivate\n");
4986 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4989 cfile->swapfile = false;
4991 /* do we need to unpin (or unlock) the file */
4995 * Mark a page as having been made dirty and thus needing writeback. We also
4996 * need to pin the cache object to write back to.
4998 #ifdef CONFIG_CIFS_FSCACHE
4999 static int cifs_set_page_dirty(struct page *page)
5001 return fscache_set_page_dirty(page, cifs_inode_cookie(page->mapping->host));
5004 #define cifs_set_page_dirty __set_page_dirty_nobuffers
5007 const struct address_space_operations cifs_addr_ops = {
5008 .readpage = cifs_readpage,
5009 .readpages = cifs_readpages,
5010 .writepage = cifs_writepage,
5011 .writepages = cifs_writepages,
5012 .write_begin = cifs_write_begin,
5013 .write_end = cifs_write_end,
5014 .set_page_dirty = cifs_set_page_dirty,
5015 .releasepage = cifs_release_page,
5016 .direct_IO = cifs_direct_io,
5017 .invalidatepage = cifs_invalidate_page,
5018 .launder_page = cifs_launder_page,
5020 * TODO: investigate and if useful we could add an cifs_migratePage
5021 * helper (under an CONFIG_MIGRATION) in the future, and also
5022 * investigate and add an is_dirty_writeback helper if needed
5024 .swap_activate = cifs_swap_activate,
5025 .swap_deactivate = cifs_swap_deactivate,
5029 * cifs_readpages requires the server to support a buffer large enough to
5030 * contain the header plus one complete page of data. Otherwise, we need
5031 * to leave cifs_readpages out of the address space operations.
5033 const struct address_space_operations cifs_addr_ops_smallbuf = {
5034 .readpage = cifs_readpage,
5035 .writepage = cifs_writepage,
5036 .writepages = cifs_writepages,
5037 .write_begin = cifs_write_begin,
5038 .write_end = cifs_write_end,
5039 .set_page_dirty = cifs_set_page_dirty,
5040 .releasepage = cifs_release_page,
5041 .invalidatepage = cifs_invalidate_page,
5042 .launder_page = cifs_launder_page,