4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
47 static inline int cifs_convert_flags(unsigned int flags)
49 if ((flags & O_ACCMODE) == O_RDONLY)
51 else if ((flags & O_ACCMODE) == O_WRONLY)
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
65 static u32 cifs_posix_convert_flags(unsigned int flags)
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
94 posix_flags |= SMB_O_DIRECT;
99 static inline int cifs_get_disposition(unsigned int flags)
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
125 cifs_dbg(FYI, "posix open %s\n", full_path);
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
131 tlink = cifs_sb_tlink(cifs_sb);
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_sb->mnt_cifs_flags &
144 CIFS_MOUNT_MAP_SPECIAL_CHR);
145 cifs_put_tlink(tlink);
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
154 goto posix_open_ret; /* caller does not need info */
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
167 cifs_fattr_to_inode(*pinode, &fattr);
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
183 int create_options = CREATE_NOT_DIR;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
188 if (!server->ops->open)
191 desired_access = cifs_convert_flags(f_flags);
193 /*********************************************************************
194 * open flag mapping table:
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
217 disposition = cifs_get_disposition(f_flags);
219 /* BB pass O_SYNC flag through on file attributes .. BB */
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
229 oparms.cifs_sb = cifs_sb;
230 oparms.desired_access = desired_access;
231 oparms.create_options = create_options;
232 oparms.disposition = disposition;
233 oparms.path = full_path;
235 oparms.reconnect = false;
237 rc = server->ops->open(xid, &oparms, oplock, buf);
243 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
246 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
255 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
257 struct cifs_fid_locks *cur;
258 bool has_locks = false;
260 down_read(&cinode->lock_sem);
261 list_for_each_entry(cur, &cinode->llist, llist) {
262 if (!list_empty(&cur->locks)) {
267 up_read(&cinode->lock_sem);
271 struct cifsFileInfo *
272 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
273 struct tcon_link *tlink, __u32 oplock)
275 struct dentry *dentry = file->f_path.dentry;
276 struct inode *inode = dentry->d_inode;
277 struct cifsInodeInfo *cinode = CIFS_I(inode);
278 struct cifsFileInfo *cfile;
279 struct cifs_fid_locks *fdlocks;
280 struct cifs_tcon *tcon = tlink_tcon(tlink);
281 struct TCP_Server_Info *server = tcon->ses->server;
283 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
287 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
293 INIT_LIST_HEAD(&fdlocks->locks);
294 fdlocks->cfile = cfile;
295 cfile->llist = fdlocks;
296 down_write(&cinode->lock_sem);
297 list_add(&fdlocks->llist, &cinode->llist);
298 up_write(&cinode->lock_sem);
301 cfile->pid = current->tgid;
302 cfile->uid = current_fsuid();
303 cfile->dentry = dget(dentry);
304 cfile->f_flags = file->f_flags;
305 cfile->invalidHandle = false;
306 cfile->tlink = cifs_get_tlink(tlink);
307 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
308 mutex_init(&cfile->fh_mutex);
310 cifs_sb_active(inode->i_sb);
313 * If the server returned a read oplock and we have mandatory brlocks,
314 * set oplock level to None.
316 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
317 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
321 spin_lock(&cifs_file_list_lock);
322 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
323 oplock = fid->pending_open->oplock;
324 list_del(&fid->pending_open->olist);
326 fid->purge_cache = false;
327 server->ops->set_fid(cfile, fid, oplock);
329 list_add(&cfile->tlist, &tcon->openFileList);
330 /* if readable file instance put first in list*/
331 if (file->f_mode & FMODE_READ)
332 list_add(&cfile->flist, &cinode->openFileList);
334 list_add_tail(&cfile->flist, &cinode->openFileList);
335 spin_unlock(&cifs_file_list_lock);
337 if (fid->purge_cache)
338 cifs_zap_mapping(inode);
340 file->private_data = cfile;
344 struct cifsFileInfo *
345 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
347 spin_lock(&cifs_file_list_lock);
348 cifsFileInfo_get_locked(cifs_file);
349 spin_unlock(&cifs_file_list_lock);
354 * Release a reference on the file private data. This may involve closing
355 * the filehandle out on the server. Must be called without holding
356 * cifs_file_list_lock.
358 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
360 struct inode *inode = cifs_file->dentry->d_inode;
361 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
362 struct TCP_Server_Info *server = tcon->ses->server;
363 struct cifsInodeInfo *cifsi = CIFS_I(inode);
364 struct super_block *sb = inode->i_sb;
365 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
366 struct cifsLockInfo *li, *tmp;
368 struct cifs_pending_open open;
370 spin_lock(&cifs_file_list_lock);
371 if (--cifs_file->count > 0) {
372 spin_unlock(&cifs_file_list_lock);
376 if (server->ops->get_lease_key)
377 server->ops->get_lease_key(inode, &fid);
379 /* store open in pending opens to make sure we don't miss lease break */
380 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
382 /* remove it from the lists */
383 list_del(&cifs_file->flist);
384 list_del(&cifs_file->tlist);
386 if (list_empty(&cifsi->openFileList)) {
387 cifs_dbg(FYI, "closing last open instance for inode %p\n",
388 cifs_file->dentry->d_inode);
390 * In strict cache mode we need invalidate mapping on the last
391 * close because it may cause a error when we open this file
392 * again and get at least level II oplock.
394 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
395 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
396 cifs_set_oplock_level(cifsi, 0);
398 spin_unlock(&cifs_file_list_lock);
400 cancel_work_sync(&cifs_file->oplock_break);
402 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
403 struct TCP_Server_Info *server = tcon->ses->server;
407 if (server->ops->close)
408 server->ops->close(xid, tcon, &cifs_file->fid);
412 cifs_del_pending_open(&open);
415 * Delete any outstanding lock records. We'll lose them when the file
418 down_write(&cifsi->lock_sem);
419 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
420 list_del(&li->llist);
421 cifs_del_lock_waiters(li);
424 list_del(&cifs_file->llist->llist);
425 kfree(cifs_file->llist);
426 up_write(&cifsi->lock_sem);
428 cifs_put_tlink(cifs_file->tlink);
429 dput(cifs_file->dentry);
430 cifs_sb_deactive(sb);
434 int cifs_open(struct inode *inode, struct file *file)
440 struct cifs_sb_info *cifs_sb;
441 struct TCP_Server_Info *server;
442 struct cifs_tcon *tcon;
443 struct tcon_link *tlink;
444 struct cifsFileInfo *cfile = NULL;
445 char *full_path = NULL;
446 bool posix_open_ok = false;
448 struct cifs_pending_open open;
452 cifs_sb = CIFS_SB(inode->i_sb);
453 tlink = cifs_sb_tlink(cifs_sb);
456 return PTR_ERR(tlink);
458 tcon = tlink_tcon(tlink);
459 server = tcon->ses->server;
461 full_path = build_path_from_dentry(file->f_path.dentry);
462 if (full_path == NULL) {
467 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
468 inode, file->f_flags, full_path);
470 if (file->f_flags & O_DIRECT &&
471 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
472 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
473 file->f_op = &cifs_file_direct_nobrl_ops;
475 file->f_op = &cifs_file_direct_ops;
483 if (!tcon->broken_posix_open && tcon->unix_ext &&
484 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
485 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
486 /* can not refresh inode info since size could be stale */
487 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
488 cifs_sb->mnt_file_mode /* ignored */,
489 file->f_flags, &oplock, &fid.netfid, xid);
491 cifs_dbg(FYI, "posix open succeeded\n");
492 posix_open_ok = true;
493 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
494 if (tcon->ses->serverNOS)
495 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
496 tcon->ses->serverName,
497 tcon->ses->serverNOS);
498 tcon->broken_posix_open = true;
499 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
500 (rc != -EOPNOTSUPP)) /* path not found or net err */
503 * Else fallthrough to retry open the old way on network i/o
508 if (server->ops->get_lease_key)
509 server->ops->get_lease_key(inode, &fid);
511 cifs_add_pending_open(&fid, tlink, &open);
513 if (!posix_open_ok) {
514 if (server->ops->get_lease_key)
515 server->ops->get_lease_key(inode, &fid);
517 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
518 file->f_flags, &oplock, &fid, xid);
520 cifs_del_pending_open(&open);
525 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
527 if (server->ops->close)
528 server->ops->close(xid, tcon, &fid);
529 cifs_del_pending_open(&open);
534 cifs_fscache_set_inode_cookie(inode, file);
536 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
538 * Time to set mode which we can not set earlier due to
539 * problems creating new read-only files.
541 struct cifs_unix_set_info_args args = {
542 .mode = inode->i_mode,
543 .uid = INVALID_UID, /* no change */
544 .gid = INVALID_GID, /* no change */
545 .ctime = NO_CHANGE_64,
546 .atime = NO_CHANGE_64,
547 .mtime = NO_CHANGE_64,
550 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
557 cifs_put_tlink(tlink);
561 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
564 * Try to reacquire byte range locks that were released when session
565 * to server was lost.
568 cifs_relock_file(struct cifsFileInfo *cfile)
570 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
571 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
572 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
575 down_read(&cinode->lock_sem);
576 if (cinode->can_cache_brlcks) {
577 /* can cache locks - no need to relock */
578 up_read(&cinode->lock_sem);
582 if (cap_unix(tcon->ses) &&
583 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
584 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
585 rc = cifs_push_posix_locks(cfile);
587 rc = tcon->ses->server->ops->push_mand_locks(cfile);
589 up_read(&cinode->lock_sem);
594 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
599 struct cifs_sb_info *cifs_sb;
600 struct cifs_tcon *tcon;
601 struct TCP_Server_Info *server;
602 struct cifsInodeInfo *cinode;
604 char *full_path = NULL;
606 int disposition = FILE_OPEN;
607 int create_options = CREATE_NOT_DIR;
608 struct cifs_open_parms oparms;
611 mutex_lock(&cfile->fh_mutex);
612 if (!cfile->invalidHandle) {
613 mutex_unlock(&cfile->fh_mutex);
619 inode = cfile->dentry->d_inode;
620 cifs_sb = CIFS_SB(inode->i_sb);
621 tcon = tlink_tcon(cfile->tlink);
622 server = tcon->ses->server;
625 * Can not grab rename sem here because various ops, including those
626 * that already have the rename sem can end up causing writepage to get
627 * called and if the server was down that means we end up here, and we
628 * can never tell if the caller already has the rename_sem.
630 full_path = build_path_from_dentry(cfile->dentry);
631 if (full_path == NULL) {
633 mutex_unlock(&cfile->fh_mutex);
638 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
639 inode, cfile->f_flags, full_path);
641 if (tcon->ses->server->oplocks)
646 if (tcon->unix_ext && cap_unix(tcon->ses) &&
647 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
648 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
650 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
651 * original open. Must mask them off for a reopen.
653 unsigned int oflags = cfile->f_flags &
654 ~(O_CREAT | O_EXCL | O_TRUNC);
656 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
657 cifs_sb->mnt_file_mode /* ignored */,
658 oflags, &oplock, &cfile->fid.netfid, xid);
660 cifs_dbg(FYI, "posix reopen succeeded\n");
661 oparms.reconnect = true;
665 * fallthrough to retry open the old way on errors, especially
666 * in the reconnect path it is important to retry hard
670 desired_access = cifs_convert_flags(cfile->f_flags);
672 if (backup_cred(cifs_sb))
673 create_options |= CREATE_OPEN_BACKUP_INTENT;
675 if (server->ops->get_lease_key)
676 server->ops->get_lease_key(inode, &cfile->fid);
679 oparms.cifs_sb = cifs_sb;
680 oparms.desired_access = desired_access;
681 oparms.create_options = create_options;
682 oparms.disposition = disposition;
683 oparms.path = full_path;
684 oparms.fid = &cfile->fid;
685 oparms.reconnect = true;
688 * Can not refresh inode by passing in file_info buf to be returned by
689 * ops->open and then calling get_inode_info with returned buf since
690 * file might have write behind data that needs to be flushed and server
691 * version of file size can be stale. If we knew for sure that inode was
692 * not dirty locally we could do this.
694 rc = server->ops->open(xid, &oparms, &oplock, NULL);
695 if (rc == -ENOENT && oparms.reconnect == false) {
696 /* durable handle timeout is expired - open the file again */
697 rc = server->ops->open(xid, &oparms, &oplock, NULL);
698 /* indicate that we need to relock the file */
699 oparms.reconnect = true;
703 mutex_unlock(&cfile->fh_mutex);
704 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
705 cifs_dbg(FYI, "oplock: %d\n", oplock);
706 goto reopen_error_exit;
710 cfile->invalidHandle = false;
711 mutex_unlock(&cfile->fh_mutex);
712 cinode = CIFS_I(inode);
715 rc = filemap_write_and_wait(inode->i_mapping);
716 mapping_set_error(inode->i_mapping, rc);
719 rc = cifs_get_inode_info_unix(&inode, full_path,
722 rc = cifs_get_inode_info(&inode, full_path, NULL,
723 inode->i_sb, xid, NULL);
726 * Else we are writing out data to server already and could deadlock if
727 * we tried to flush data, and since we do not know if we have data that
728 * would invalidate the current end of file on the server we can not go
729 * to the server to get the new inode info.
732 server->ops->set_fid(cfile, &cfile->fid, oplock);
733 if (oparms.reconnect)
734 cifs_relock_file(cfile);
742 int cifs_close(struct inode *inode, struct file *file)
744 if (file->private_data != NULL) {
745 cifsFileInfo_put(file->private_data);
746 file->private_data = NULL;
749 /* return code from the ->release op is always ignored */
753 int cifs_closedir(struct inode *inode, struct file *file)
757 struct cifsFileInfo *cfile = file->private_data;
758 struct cifs_tcon *tcon;
759 struct TCP_Server_Info *server;
762 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
768 tcon = tlink_tcon(cfile->tlink);
769 server = tcon->ses->server;
771 cifs_dbg(FYI, "Freeing private data in close dir\n");
772 spin_lock(&cifs_file_list_lock);
773 if (server->ops->dir_needs_close(cfile)) {
774 cfile->invalidHandle = true;
775 spin_unlock(&cifs_file_list_lock);
776 if (server->ops->close_dir)
777 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
780 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
781 /* not much we can do if it fails anyway, ignore rc */
784 spin_unlock(&cifs_file_list_lock);
786 buf = cfile->srch_inf.ntwrk_buf_start;
788 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
789 cfile->srch_inf.ntwrk_buf_start = NULL;
790 if (cfile->srch_inf.smallBuf)
791 cifs_small_buf_release(buf);
793 cifs_buf_release(buf);
796 cifs_put_tlink(cfile->tlink);
797 kfree(file->private_data);
798 file->private_data = NULL;
799 /* BB can we lock the filestruct while this is going on? */
804 static struct cifsLockInfo *
805 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
807 struct cifsLockInfo *lock =
808 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
811 lock->offset = offset;
812 lock->length = length;
814 lock->pid = current->tgid;
815 INIT_LIST_HEAD(&lock->blist);
816 init_waitqueue_head(&lock->block_q);
821 cifs_del_lock_waiters(struct cifsLockInfo *lock)
823 struct cifsLockInfo *li, *tmp;
824 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
825 list_del_init(&li->blist);
826 wake_up(&li->block_q);
830 #define CIFS_LOCK_OP 0
831 #define CIFS_READ_OP 1
832 #define CIFS_WRITE_OP 2
834 /* @rw_check : 0 - no op, 1 - read, 2 - write */
836 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
837 __u64 length, __u8 type, struct cifsFileInfo *cfile,
838 struct cifsLockInfo **conf_lock, int rw_check)
840 struct cifsLockInfo *li;
841 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
842 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
844 list_for_each_entry(li, &fdlocks->locks, llist) {
845 if (offset + length <= li->offset ||
846 offset >= li->offset + li->length)
848 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
849 server->ops->compare_fids(cfile, cur_cfile)) {
850 /* shared lock prevents write op through the same fid */
851 if (!(li->type & server->vals->shared_lock_type) ||
852 rw_check != CIFS_WRITE_OP)
855 if ((type & server->vals->shared_lock_type) &&
856 ((server->ops->compare_fids(cfile, cur_cfile) &&
857 current->tgid == li->pid) || type == li->type))
867 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
868 __u8 type, struct cifsLockInfo **conf_lock,
872 struct cifs_fid_locks *cur;
873 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
875 list_for_each_entry(cur, &cinode->llist, llist) {
876 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
877 cfile, conf_lock, rw_check);
886 * Check if there is another lock that prevents us to set the lock (mandatory
887 * style). If such a lock exists, update the flock structure with its
888 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
889 * or leave it the same if we can't. Returns 0 if we don't need to request to
890 * the server or 1 otherwise.
893 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
894 __u8 type, struct file_lock *flock)
897 struct cifsLockInfo *conf_lock;
898 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
899 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
902 down_read(&cinode->lock_sem);
904 exist = cifs_find_lock_conflict(cfile, offset, length, type,
905 &conf_lock, CIFS_LOCK_OP);
907 flock->fl_start = conf_lock->offset;
908 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
909 flock->fl_pid = conf_lock->pid;
910 if (conf_lock->type & server->vals->shared_lock_type)
911 flock->fl_type = F_RDLCK;
913 flock->fl_type = F_WRLCK;
914 } else if (!cinode->can_cache_brlcks)
917 flock->fl_type = F_UNLCK;
919 up_read(&cinode->lock_sem);
924 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
926 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
927 down_write(&cinode->lock_sem);
928 list_add_tail(&lock->llist, &cfile->llist->locks);
929 up_write(&cinode->lock_sem);
933 * Set the byte-range lock (mandatory style). Returns:
934 * 1) 0, if we set the lock and don't need to request to the server;
935 * 2) 1, if no locks prevent us but we need to request to the server;
936 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
939 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
942 struct cifsLockInfo *conf_lock;
943 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
949 down_write(&cinode->lock_sem);
951 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
952 lock->type, &conf_lock, CIFS_LOCK_OP);
953 if (!exist && cinode->can_cache_brlcks) {
954 list_add_tail(&lock->llist, &cfile->llist->locks);
955 up_write(&cinode->lock_sem);
964 list_add_tail(&lock->blist, &conf_lock->blist);
965 up_write(&cinode->lock_sem);
966 rc = wait_event_interruptible(lock->block_q,
967 (lock->blist.prev == &lock->blist) &&
968 (lock->blist.next == &lock->blist));
971 down_write(&cinode->lock_sem);
972 list_del_init(&lock->blist);
975 up_write(&cinode->lock_sem);
980 * Check if there is another lock that prevents us to set the lock (posix
981 * style). If such a lock exists, update the flock structure with its
982 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
983 * or leave it the same if we can't. Returns 0 if we don't need to request to
984 * the server or 1 otherwise.
987 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
990 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
991 unsigned char saved_type = flock->fl_type;
993 if ((flock->fl_flags & FL_POSIX) == 0)
996 down_read(&cinode->lock_sem);
997 posix_test_lock(file, flock);
999 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1000 flock->fl_type = saved_type;
1004 up_read(&cinode->lock_sem);
1009 * Set the byte-range lock (posix style). Returns:
1010 * 1) 0, if we set the lock and don't need to request to the server;
1011 * 2) 1, if we need to request to the server;
1012 * 3) <0, if the error occurs while setting the lock.
1015 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1017 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1020 if ((flock->fl_flags & FL_POSIX) == 0)
1024 down_write(&cinode->lock_sem);
1025 if (!cinode->can_cache_brlcks) {
1026 up_write(&cinode->lock_sem);
1030 rc = posix_lock_file(file, flock, NULL);
1031 up_write(&cinode->lock_sem);
1032 if (rc == FILE_LOCK_DEFERRED) {
1033 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1036 posix_unblock_lock(flock);
1042 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1045 int rc = 0, stored_rc;
1046 struct cifsLockInfo *li, *tmp;
1047 struct cifs_tcon *tcon;
1048 unsigned int num, max_num, max_buf;
1049 LOCKING_ANDX_RANGE *buf, *cur;
1050 int types[] = {LOCKING_ANDX_LARGE_FILES,
1051 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1055 tcon = tlink_tcon(cfile->tlink);
1058 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1059 * and check it for zero before using.
1061 max_buf = tcon->ses->server->maxBuf;
1067 max_num = (max_buf - sizeof(struct smb_hdr)) /
1068 sizeof(LOCKING_ANDX_RANGE);
1069 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1075 for (i = 0; i < 2; i++) {
1078 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1079 if (li->type != types[i])
1081 cur->Pid = cpu_to_le16(li->pid);
1082 cur->LengthLow = cpu_to_le32((u32)li->length);
1083 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1084 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1085 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1086 if (++num == max_num) {
1087 stored_rc = cifs_lockv(xid, tcon,
1089 (__u8)li->type, 0, num,
1100 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1101 (__u8)types[i], 0, num, buf);
1112 /* copied from fs/locks.c with a name change */
1113 #define cifs_for_each_lock(inode, lockp) \
1114 for (lockp = &inode->i_flock; *lockp != NULL; \
1115 lockp = &(*lockp)->fl_next)
1117 struct lock_to_push {
1118 struct list_head llist;
1127 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1129 struct inode *inode = cfile->dentry->d_inode;
1130 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1131 struct file_lock *flock, **before;
1132 unsigned int count = 0, i = 0;
1133 int rc = 0, xid, type;
1134 struct list_head locks_to_send, *el;
1135 struct lock_to_push *lck, *tmp;
1140 spin_lock(&inode->i_lock);
1141 cifs_for_each_lock(inode, before) {
1142 if ((*before)->fl_flags & FL_POSIX)
1145 spin_unlock(&inode->i_lock);
1147 INIT_LIST_HEAD(&locks_to_send);
1150 * Allocating count locks is enough because no FL_POSIX locks can be
1151 * added to the list while we are holding cinode->lock_sem that
1152 * protects locking operations of this inode.
1154 for (; i < count; i++) {
1155 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1160 list_add_tail(&lck->llist, &locks_to_send);
1163 el = locks_to_send.next;
1164 spin_lock(&inode->i_lock);
1165 cifs_for_each_lock(inode, before) {
1167 if ((flock->fl_flags & FL_POSIX) == 0)
1169 if (el == &locks_to_send) {
1171 * The list ended. We don't have enough allocated
1172 * structures - something is really wrong.
1174 cifs_dbg(VFS, "Can't push all brlocks!\n");
1177 length = 1 + flock->fl_end - flock->fl_start;
1178 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1182 lck = list_entry(el, struct lock_to_push, llist);
1183 lck->pid = flock->fl_pid;
1184 lck->netfid = cfile->fid.netfid;
1185 lck->length = length;
1187 lck->offset = flock->fl_start;
1190 spin_unlock(&inode->i_lock);
1192 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1195 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1196 lck->offset, lck->length, NULL,
1200 list_del(&lck->llist);
1208 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1209 list_del(&lck->llist);
1216 cifs_push_locks(struct cifsFileInfo *cfile)
1218 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1219 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1220 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1223 /* we are going to update can_cache_brlcks here - need a write access */
1224 down_write(&cinode->lock_sem);
1225 if (!cinode->can_cache_brlcks) {
1226 up_write(&cinode->lock_sem);
1230 if (cap_unix(tcon->ses) &&
1231 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1232 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1233 rc = cifs_push_posix_locks(cfile);
1235 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1237 cinode->can_cache_brlcks = false;
1238 up_write(&cinode->lock_sem);
1243 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1244 bool *wait_flag, struct TCP_Server_Info *server)
1246 if (flock->fl_flags & FL_POSIX)
1247 cifs_dbg(FYI, "Posix\n");
1248 if (flock->fl_flags & FL_FLOCK)
1249 cifs_dbg(FYI, "Flock\n");
1250 if (flock->fl_flags & FL_SLEEP) {
1251 cifs_dbg(FYI, "Blocking lock\n");
1254 if (flock->fl_flags & FL_ACCESS)
1255 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1256 if (flock->fl_flags & FL_LEASE)
1257 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1258 if (flock->fl_flags &
1259 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1260 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1261 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1263 *type = server->vals->large_lock_type;
1264 if (flock->fl_type == F_WRLCK) {
1265 cifs_dbg(FYI, "F_WRLCK\n");
1266 *type |= server->vals->exclusive_lock_type;
1268 } else if (flock->fl_type == F_UNLCK) {
1269 cifs_dbg(FYI, "F_UNLCK\n");
1270 *type |= server->vals->unlock_lock_type;
1272 /* Check if unlock includes more than one lock range */
1273 } else if (flock->fl_type == F_RDLCK) {
1274 cifs_dbg(FYI, "F_RDLCK\n");
1275 *type |= server->vals->shared_lock_type;
1277 } else if (flock->fl_type == F_EXLCK) {
1278 cifs_dbg(FYI, "F_EXLCK\n");
1279 *type |= server->vals->exclusive_lock_type;
1281 } else if (flock->fl_type == F_SHLCK) {
1282 cifs_dbg(FYI, "F_SHLCK\n");
1283 *type |= server->vals->shared_lock_type;
1286 cifs_dbg(FYI, "Unknown type of lock\n");
1290 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1291 bool wait_flag, bool posix_lck, unsigned int xid)
1294 __u64 length = 1 + flock->fl_end - flock->fl_start;
1295 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1296 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1297 struct TCP_Server_Info *server = tcon->ses->server;
1298 __u16 netfid = cfile->fid.netfid;
1301 int posix_lock_type;
1303 rc = cifs_posix_lock_test(file, flock);
1307 if (type & server->vals->shared_lock_type)
1308 posix_lock_type = CIFS_RDLCK;
1310 posix_lock_type = CIFS_WRLCK;
1311 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1312 flock->fl_start, length, flock,
1313 posix_lock_type, wait_flag);
1317 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1321 /* BB we could chain these into one lock request BB */
1322 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1325 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1327 flock->fl_type = F_UNLCK;
1329 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1334 if (type & server->vals->shared_lock_type) {
1335 flock->fl_type = F_WRLCK;
1339 type &= ~server->vals->exclusive_lock_type;
1341 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1342 type | server->vals->shared_lock_type,
1345 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1346 type | server->vals->shared_lock_type, 0, 1, false);
1347 flock->fl_type = F_RDLCK;
1349 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1352 flock->fl_type = F_WRLCK;
1358 cifs_move_llist(struct list_head *source, struct list_head *dest)
1360 struct list_head *li, *tmp;
1361 list_for_each_safe(li, tmp, source)
1362 list_move(li, dest);
1366 cifs_free_llist(struct list_head *llist)
1368 struct cifsLockInfo *li, *tmp;
1369 list_for_each_entry_safe(li, tmp, llist, llist) {
1370 cifs_del_lock_waiters(li);
1371 list_del(&li->llist);
1377 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1380 int rc = 0, stored_rc;
1381 int types[] = {LOCKING_ANDX_LARGE_FILES,
1382 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1384 unsigned int max_num, num, max_buf;
1385 LOCKING_ANDX_RANGE *buf, *cur;
1386 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1387 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1388 struct cifsLockInfo *li, *tmp;
1389 __u64 length = 1 + flock->fl_end - flock->fl_start;
1390 struct list_head tmp_llist;
1392 INIT_LIST_HEAD(&tmp_llist);
1395 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1396 * and check it for zero before using.
1398 max_buf = tcon->ses->server->maxBuf;
1402 max_num = (max_buf - sizeof(struct smb_hdr)) /
1403 sizeof(LOCKING_ANDX_RANGE);
1404 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1408 down_write(&cinode->lock_sem);
1409 for (i = 0; i < 2; i++) {
1412 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1413 if (flock->fl_start > li->offset ||
1414 (flock->fl_start + length) <
1415 (li->offset + li->length))
1417 if (current->tgid != li->pid)
1419 if (types[i] != li->type)
1421 if (cinode->can_cache_brlcks) {
1423 * We can cache brlock requests - simply remove
1424 * a lock from the file's list.
1426 list_del(&li->llist);
1427 cifs_del_lock_waiters(li);
1431 cur->Pid = cpu_to_le16(li->pid);
1432 cur->LengthLow = cpu_to_le32((u32)li->length);
1433 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1434 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1435 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1437 * We need to save a lock here to let us add it again to
1438 * the file's list if the unlock range request fails on
1441 list_move(&li->llist, &tmp_llist);
1442 if (++num == max_num) {
1443 stored_rc = cifs_lockv(xid, tcon,
1445 li->type, num, 0, buf);
1448 * We failed on the unlock range
1449 * request - add all locks from the tmp
1450 * list to the head of the file's list.
1452 cifs_move_llist(&tmp_llist,
1453 &cfile->llist->locks);
1457 * The unlock range request succeed -
1458 * free the tmp list.
1460 cifs_free_llist(&tmp_llist);
1467 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1468 types[i], num, 0, buf);
1470 cifs_move_llist(&tmp_llist,
1471 &cfile->llist->locks);
1474 cifs_free_llist(&tmp_llist);
1478 up_write(&cinode->lock_sem);
1484 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1485 bool wait_flag, bool posix_lck, int lock, int unlock,
1489 __u64 length = 1 + flock->fl_end - flock->fl_start;
1490 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1491 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1492 struct TCP_Server_Info *server = tcon->ses->server;
1493 struct inode *inode = cfile->dentry->d_inode;
1496 int posix_lock_type;
1498 rc = cifs_posix_lock_set(file, flock);
1502 if (type & server->vals->shared_lock_type)
1503 posix_lock_type = CIFS_RDLCK;
1505 posix_lock_type = CIFS_WRLCK;
1508 posix_lock_type = CIFS_UNLCK;
1510 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1511 current->tgid, flock->fl_start, length,
1512 NULL, posix_lock_type, wait_flag);
1517 struct cifsLockInfo *lock;
1519 lock = cifs_lock_init(flock->fl_start, length, type);
1523 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1532 * Windows 7 server can delay breaking lease from read to None
1533 * if we set a byte-range lock on a file - break it explicitly
1534 * before sending the lock to the server to be sure the next
1535 * read won't conflict with non-overlapted locks due to
1538 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1539 CIFS_CACHE_READ(CIFS_I(inode))) {
1540 cifs_zap_mapping(inode);
1541 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1543 CIFS_I(inode)->oplock = 0;
1546 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1547 type, 1, 0, wait_flag);
1553 cifs_lock_add(cfile, lock);
1555 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1558 if (flock->fl_flags & FL_POSIX)
1559 posix_lock_file_wait(file, flock);
1563 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1566 int lock = 0, unlock = 0;
1567 bool wait_flag = false;
1568 bool posix_lck = false;
1569 struct cifs_sb_info *cifs_sb;
1570 struct cifs_tcon *tcon;
1571 struct cifsInodeInfo *cinode;
1572 struct cifsFileInfo *cfile;
1579 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1580 cmd, flock->fl_flags, flock->fl_type,
1581 flock->fl_start, flock->fl_end);
1583 cfile = (struct cifsFileInfo *)file->private_data;
1584 tcon = tlink_tcon(cfile->tlink);
1586 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1589 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1590 netfid = cfile->fid.netfid;
1591 cinode = CIFS_I(file_inode(file));
1593 if (cap_unix(tcon->ses) &&
1594 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1595 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1598 * BB add code here to normalize offset and length to account for
1599 * negative length which we can not accept over the wire.
1601 if (IS_GETLK(cmd)) {
1602 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1607 if (!lock && !unlock) {
1609 * if no lock or unlock then nothing to do since we do not
1616 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1623 * update the file size (if needed) after a write. Should be called with
1624 * the inode->i_lock held
1627 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1628 unsigned int bytes_written)
1630 loff_t end_of_write = offset + bytes_written;
1632 if (end_of_write > cifsi->server_eof)
1633 cifsi->server_eof = end_of_write;
1637 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1638 size_t write_size, loff_t *offset)
1641 unsigned int bytes_written = 0;
1642 unsigned int total_written;
1643 struct cifs_sb_info *cifs_sb;
1644 struct cifs_tcon *tcon;
1645 struct TCP_Server_Info *server;
1647 struct dentry *dentry = open_file->dentry;
1648 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1649 struct cifs_io_parms io_parms;
1651 cifs_sb = CIFS_SB(dentry->d_sb);
1653 cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
1654 write_size, *offset, dentry->d_name.name);
1656 tcon = tlink_tcon(open_file->tlink);
1657 server = tcon->ses->server;
1659 if (!server->ops->sync_write)
1664 for (total_written = 0; write_size > total_written;
1665 total_written += bytes_written) {
1667 while (rc == -EAGAIN) {
1671 if (open_file->invalidHandle) {
1672 /* we could deadlock if we called
1673 filemap_fdatawait from here so tell
1674 reopen_file not to flush data to
1676 rc = cifs_reopen_file(open_file, false);
1681 len = min(server->ops->wp_retry_size(dentry->d_inode),
1682 (unsigned int)write_size - total_written);
1683 /* iov[0] is reserved for smb header */
1684 iov[1].iov_base = (char *)write_data + total_written;
1685 iov[1].iov_len = len;
1687 io_parms.tcon = tcon;
1688 io_parms.offset = *offset;
1689 io_parms.length = len;
1690 rc = server->ops->sync_write(xid, open_file, &io_parms,
1691 &bytes_written, iov, 1);
1693 if (rc || (bytes_written == 0)) {
1701 spin_lock(&dentry->d_inode->i_lock);
1702 cifs_update_eof(cifsi, *offset, bytes_written);
1703 spin_unlock(&dentry->d_inode->i_lock);
1704 *offset += bytes_written;
1708 cifs_stats_bytes_written(tcon, total_written);
1710 if (total_written > 0) {
1711 spin_lock(&dentry->d_inode->i_lock);
1712 if (*offset > dentry->d_inode->i_size)
1713 i_size_write(dentry->d_inode, *offset);
1714 spin_unlock(&dentry->d_inode->i_lock);
1716 mark_inode_dirty_sync(dentry->d_inode);
1718 return total_written;
1721 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1724 struct cifsFileInfo *open_file = NULL;
1725 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1727 /* only filter by fsuid on multiuser mounts */
1728 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1731 spin_lock(&cifs_file_list_lock);
1732 /* we could simply get the first_list_entry since write-only entries
1733 are always at the end of the list but since the first entry might
1734 have a close pending, we go through the whole list */
1735 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1736 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1738 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1739 if (!open_file->invalidHandle) {
1740 /* found a good file */
1741 /* lock it so it will not be closed on us */
1742 cifsFileInfo_get_locked(open_file);
1743 spin_unlock(&cifs_file_list_lock);
1745 } /* else might as well continue, and look for
1746 another, or simply have the caller reopen it
1747 again rather than trying to fix this handle */
1748 } else /* write only file */
1749 break; /* write only files are last so must be done */
1751 spin_unlock(&cifs_file_list_lock);
1755 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1758 struct cifsFileInfo *open_file, *inv_file = NULL;
1759 struct cifs_sb_info *cifs_sb;
1760 bool any_available = false;
1762 unsigned int refind = 0;
1764 /* Having a null inode here (because mapping->host was set to zero by
1765 the VFS or MM) should not happen but we had reports of on oops (due to
1766 it being zero) during stress testcases so we need to check for it */
1768 if (cifs_inode == NULL) {
1769 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1774 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1776 /* only filter by fsuid on multiuser mounts */
1777 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1780 spin_lock(&cifs_file_list_lock);
1782 if (refind > MAX_REOPEN_ATT) {
1783 spin_unlock(&cifs_file_list_lock);
1786 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1787 if (!any_available && open_file->pid != current->tgid)
1789 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1791 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1792 if (!open_file->invalidHandle) {
1793 /* found a good writable file */
1794 cifsFileInfo_get_locked(open_file);
1795 spin_unlock(&cifs_file_list_lock);
1799 inv_file = open_file;
1803 /* couldn't find useable FH with same pid, try any available */
1804 if (!any_available) {
1805 any_available = true;
1806 goto refind_writable;
1810 any_available = false;
1811 cifsFileInfo_get_locked(inv_file);
1814 spin_unlock(&cifs_file_list_lock);
1817 rc = cifs_reopen_file(inv_file, false);
1821 spin_lock(&cifs_file_list_lock);
1822 list_move_tail(&inv_file->flist,
1823 &cifs_inode->openFileList);
1824 spin_unlock(&cifs_file_list_lock);
1825 cifsFileInfo_put(inv_file);
1826 spin_lock(&cifs_file_list_lock);
1828 goto refind_writable;
1835 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1837 struct address_space *mapping = page->mapping;
1838 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1841 int bytes_written = 0;
1842 struct inode *inode;
1843 struct cifsFileInfo *open_file;
1845 if (!mapping || !mapping->host)
1848 inode = page->mapping->host;
1850 offset += (loff_t)from;
1851 write_data = kmap(page);
1854 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1859 /* racing with truncate? */
1860 if (offset > mapping->host->i_size) {
1862 return 0; /* don't care */
1865 /* check to make sure that we are not extending the file */
1866 if (mapping->host->i_size - offset < (loff_t)to)
1867 to = (unsigned)(mapping->host->i_size - offset);
1869 open_file = find_writable_file(CIFS_I(mapping->host), false);
1871 bytes_written = cifs_write(open_file, open_file->pid,
1872 write_data, to - from, &offset);
1873 cifsFileInfo_put(open_file);
1874 /* Does mm or vfs already set times? */
1875 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1876 if ((bytes_written > 0) && (offset))
1878 else if (bytes_written < 0)
1881 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1889 static struct cifs_writedata *
1890 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1891 pgoff_t end, pgoff_t *index,
1892 unsigned int *found_pages)
1894 unsigned int nr_pages;
1895 struct page **pages;
1896 struct cifs_writedata *wdata;
1898 wdata = cifs_writedata_alloc((unsigned int)tofind,
1899 cifs_writev_complete);
1904 * find_get_pages_tag seems to return a max of 256 on each
1905 * iteration, so we must call it several times in order to
1906 * fill the array or the wsize is effectively limited to
1907 * 256 * PAGE_CACHE_SIZE.
1910 pages = wdata->pages;
1912 nr_pages = find_get_pages_tag(mapping, index,
1913 PAGECACHE_TAG_DIRTY, tofind,
1915 *found_pages += nr_pages;
1918 } while (nr_pages && tofind && *index <= end);
1924 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1925 struct address_space *mapping,
1926 struct writeback_control *wbc,
1927 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1929 unsigned int nr_pages = 0, i;
1932 for (i = 0; i < found_pages; i++) {
1933 page = wdata->pages[i];
1935 * At this point we hold neither mapping->tree_lock nor
1936 * lock on the page itself: the page may be truncated or
1937 * invalidated (changing page->mapping to NULL), or even
1938 * swizzled back from swapper_space to tmpfs file
1944 else if (!trylock_page(page))
1947 if (unlikely(page->mapping != mapping)) {
1952 if (!wbc->range_cyclic && page->index > end) {
1958 if (*next && (page->index != *next)) {
1959 /* Not next consecutive page */
1964 if (wbc->sync_mode != WB_SYNC_NONE)
1965 wait_on_page_writeback(page);
1967 if (PageWriteback(page) ||
1968 !clear_page_dirty_for_io(page)) {
1974 * This actually clears the dirty bit in the radix tree.
1975 * See cifs_writepage() for more commentary.
1977 set_page_writeback(page);
1978 if (page_offset(page) >= i_size_read(mapping->host)) {
1981 end_page_writeback(page);
1985 wdata->pages[i] = page;
1986 *next = page->index + 1;
1990 /* reset index to refind any pages skipped */
1992 *index = wdata->pages[0]->index + 1;
1994 /* put any pages we aren't going to use */
1995 for (i = nr_pages; i < found_pages; i++) {
1996 page_cache_release(wdata->pages[i]);
1997 wdata->pages[i] = NULL;
2004 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2005 struct address_space *mapping, struct writeback_control *wbc)
2008 struct TCP_Server_Info *server;
2011 wdata->sync_mode = wbc->sync_mode;
2012 wdata->nr_pages = nr_pages;
2013 wdata->offset = page_offset(wdata->pages[0]);
2014 wdata->pagesz = PAGE_CACHE_SIZE;
2015 wdata->tailsz = min(i_size_read(mapping->host) -
2016 page_offset(wdata->pages[nr_pages - 1]),
2017 (loff_t)PAGE_CACHE_SIZE);
2018 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2020 if (wdata->cfile != NULL)
2021 cifsFileInfo_put(wdata->cfile);
2022 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2023 if (!wdata->cfile) {
2024 cifs_dbg(VFS, "No writable handles for inode\n");
2027 wdata->pid = wdata->cfile->pid;
2028 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2029 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2032 for (i = 0; i < nr_pages; ++i)
2033 unlock_page(wdata->pages[i]);
2038 static int cifs_writepages(struct address_space *mapping,
2039 struct writeback_control *wbc)
2041 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2042 struct TCP_Server_Info *server;
2043 bool done = false, scanned = false, range_whole = false;
2045 struct cifs_writedata *wdata;
2049 * If wsize is smaller than the page cache size, default to writing
2050 * one page at a time via cifs_writepage
2052 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
2053 return generic_writepages(mapping, wbc);
2055 if (wbc->range_cyclic) {
2056 index = mapping->writeback_index; /* Start from prev offset */
2059 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2060 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2061 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2065 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2067 while (!done && index <= end) {
2068 unsigned int i, nr_pages, found_pages, wsize, credits;
2069 pgoff_t next = 0, tofind, saved_index = index;
2071 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2076 tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
2078 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2082 add_credits_and_wake_if(server, credits, 0);
2086 if (found_pages == 0) {
2087 kref_put(&wdata->refcount, cifs_writedata_release);
2088 add_credits_and_wake_if(server, credits, 0);
2092 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2093 end, &index, &next, &done);
2095 /* nothing to write? */
2096 if (nr_pages == 0) {
2097 kref_put(&wdata->refcount, cifs_writedata_release);
2098 add_credits_and_wake_if(server, credits, 0);
2102 wdata->credits = credits;
2104 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2106 /* send failure -- clean up the mess */
2108 add_credits_and_wake_if(server, wdata->credits, 0);
2109 for (i = 0; i < nr_pages; ++i) {
2111 redirty_page_for_writepage(wbc,
2114 SetPageError(wdata->pages[i]);
2115 end_page_writeback(wdata->pages[i]);
2116 page_cache_release(wdata->pages[i]);
2119 mapping_set_error(mapping, rc);
2121 kref_put(&wdata->refcount, cifs_writedata_release);
2123 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2124 index = saved_index;
2128 wbc->nr_to_write -= nr_pages;
2129 if (wbc->nr_to_write <= 0)
2135 if (!scanned && !done) {
2137 * We hit the last page and there is more work to be done: wrap
2138 * back to the start of the file
2145 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2146 mapping->writeback_index = index;
2152 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2158 /* BB add check for wbc flags */
2159 page_cache_get(page);
2160 if (!PageUptodate(page))
2161 cifs_dbg(FYI, "ppw - page not up to date\n");
2164 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2166 * A writepage() implementation always needs to do either this,
2167 * or re-dirty the page with "redirty_page_for_writepage()" in
2168 * the case of a failure.
2170 * Just unlocking the page will cause the radix tree tag-bits
2171 * to fail to update with the state of the page correctly.
2173 set_page_writeback(page);
2175 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2176 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2178 else if (rc == -EAGAIN)
2179 redirty_page_for_writepage(wbc, page);
2183 SetPageUptodate(page);
2184 end_page_writeback(page);
2185 page_cache_release(page);
2190 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2192 int rc = cifs_writepage_locked(page, wbc);
2197 static int cifs_write_end(struct file *file, struct address_space *mapping,
2198 loff_t pos, unsigned len, unsigned copied,
2199 struct page *page, void *fsdata)
2202 struct inode *inode = mapping->host;
2203 struct cifsFileInfo *cfile = file->private_data;
2204 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2207 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2210 pid = current->tgid;
2212 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2215 if (PageChecked(page)) {
2217 SetPageUptodate(page);
2218 ClearPageChecked(page);
2219 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2220 SetPageUptodate(page);
2222 if (!PageUptodate(page)) {
2224 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2228 /* this is probably better than directly calling
2229 partialpage_write since in this function the file handle is
2230 known which we might as well leverage */
2231 /* BB check if anything else missing out of ppw
2232 such as updating last write time */
2233 page_data = kmap(page);
2234 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2235 /* if (rc < 0) should we set writebehind rc? */
2242 set_page_dirty(page);
2246 spin_lock(&inode->i_lock);
2247 if (pos > inode->i_size)
2248 i_size_write(inode, pos);
2249 spin_unlock(&inode->i_lock);
2253 page_cache_release(page);
2258 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2263 struct cifs_tcon *tcon;
2264 struct TCP_Server_Info *server;
2265 struct cifsFileInfo *smbfile = file->private_data;
2266 struct inode *inode = file_inode(file);
2267 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2269 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2272 mutex_lock(&inode->i_mutex);
2276 cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2277 file->f_path.dentry->d_name.name, datasync);
2279 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2280 rc = cifs_zap_mapping(inode);
2282 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2283 rc = 0; /* don't care about it in fsync */
2287 tcon = tlink_tcon(smbfile->tlink);
2288 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2289 server = tcon->ses->server;
2290 if (server->ops->flush)
2291 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2297 mutex_unlock(&inode->i_mutex);
2301 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2305 struct cifs_tcon *tcon;
2306 struct TCP_Server_Info *server;
2307 struct cifsFileInfo *smbfile = file->private_data;
2308 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2309 struct inode *inode = file->f_mapping->host;
2311 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2314 mutex_lock(&inode->i_mutex);
2318 cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2319 file->f_path.dentry->d_name.name, datasync);
2321 tcon = tlink_tcon(smbfile->tlink);
2322 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2323 server = tcon->ses->server;
2324 if (server->ops->flush)
2325 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2331 mutex_unlock(&inode->i_mutex);
2336 * As file closes, flush all cached write data for this inode checking
2337 * for write behind errors.
2339 int cifs_flush(struct file *file, fl_owner_t id)
2341 struct inode *inode = file_inode(file);
2344 if (file->f_mode & FMODE_WRITE)
2345 rc = filemap_write_and_wait(inode->i_mapping);
2347 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2353 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2358 for (i = 0; i < num_pages; i++) {
2359 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2362 * save number of pages we have already allocated and
2363 * return with ENOMEM error
2372 for (i = 0; i < num_pages; i++)
2379 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2384 clen = min_t(const size_t, len, wsize);
2385 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2394 cifs_uncached_writedata_release(struct kref *refcount)
2397 struct cifs_writedata *wdata = container_of(refcount,
2398 struct cifs_writedata, refcount);
2400 for (i = 0; i < wdata->nr_pages; i++)
2401 put_page(wdata->pages[i]);
2402 cifs_writedata_release(refcount);
2406 cifs_uncached_writev_complete(struct work_struct *work)
2408 struct cifs_writedata *wdata = container_of(work,
2409 struct cifs_writedata, work);
2410 struct inode *inode = wdata->cfile->dentry->d_inode;
2411 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2413 spin_lock(&inode->i_lock);
2414 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2415 if (cifsi->server_eof > inode->i_size)
2416 i_size_write(inode, cifsi->server_eof);
2417 spin_unlock(&inode->i_lock);
2419 complete(&wdata->done);
2421 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2425 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2426 size_t *len, unsigned long *num_pages)
2428 size_t save_len, copied, bytes, cur_len = *len;
2429 unsigned long i, nr_pages = *num_pages;
2432 for (i = 0; i < nr_pages; i++) {
2433 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2434 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2437 * If we didn't copy as much as we expected, then that
2438 * may mean we trod into an unmapped area. Stop copying
2439 * at that point. On the next pass through the big
2440 * loop, we'll likely end up getting a zero-length
2441 * write and bailing out of it.
2446 cur_len = save_len - cur_len;
2450 * If we have no data to send, then that probably means that
2451 * the copy above failed altogether. That's most likely because
2452 * the address in the iovec was bogus. Return -EFAULT and let
2453 * the caller free anything we allocated and bail out.
2459 * i + 1 now represents the number of pages we actually used in
2460 * the copy phase above.
2467 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2468 struct cifsFileInfo *open_file,
2469 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2473 unsigned long nr_pages, num_pages, i;
2474 struct cifs_writedata *wdata;
2475 struct iov_iter saved_from;
2476 loff_t saved_offset = offset;
2478 struct TCP_Server_Info *server;
2480 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2481 pid = open_file->pid;
2483 pid = current->tgid;
2485 server = tlink_tcon(open_file->tlink)->ses->server;
2486 memcpy(&saved_from, from, sizeof(struct iov_iter));
2489 unsigned int wsize, credits;
2491 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2496 nr_pages = get_numpages(wsize, len, &cur_len);
2497 wdata = cifs_writedata_alloc(nr_pages,
2498 cifs_uncached_writev_complete);
2501 add_credits_and_wake_if(server, credits, 0);
2505 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2508 add_credits_and_wake_if(server, credits, 0);
2512 num_pages = nr_pages;
2513 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2515 for (i = 0; i < nr_pages; i++)
2516 put_page(wdata->pages[i]);
2518 add_credits_and_wake_if(server, credits, 0);
2523 * Bring nr_pages down to the number of pages we actually used,
2524 * and free any pages that we didn't use.
2526 for ( ; nr_pages > num_pages; nr_pages--)
2527 put_page(wdata->pages[nr_pages - 1]);
2529 wdata->sync_mode = WB_SYNC_ALL;
2530 wdata->nr_pages = nr_pages;
2531 wdata->offset = (__u64)offset;
2532 wdata->cfile = cifsFileInfo_get(open_file);
2534 wdata->bytes = cur_len;
2535 wdata->pagesz = PAGE_SIZE;
2536 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2537 wdata->credits = credits;
2539 if (!wdata->cfile->invalidHandle ||
2540 !cifs_reopen_file(wdata->cfile, false))
2541 rc = server->ops->async_writev(wdata,
2542 cifs_uncached_writedata_release);
2544 add_credits_and_wake_if(server, wdata->credits, 0);
2545 kref_put(&wdata->refcount,
2546 cifs_uncached_writedata_release);
2547 if (rc == -EAGAIN) {
2548 memcpy(from, &saved_from,
2549 sizeof(struct iov_iter));
2550 iov_iter_advance(from, offset - saved_offset);
2556 list_add_tail(&wdata->list, wdata_list);
2565 cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2568 ssize_t total_written = 0;
2569 struct cifsFileInfo *open_file;
2570 struct cifs_tcon *tcon;
2571 struct cifs_sb_info *cifs_sb;
2572 struct cifs_writedata *wdata, *tmp;
2573 struct list_head wdata_list;
2574 struct iov_iter saved_from;
2577 len = iov_iter_count(from);
2578 rc = generic_write_checks(file, poffset, &len, 0);
2585 iov_iter_truncate(from, len);
2587 INIT_LIST_HEAD(&wdata_list);
2588 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2589 open_file = file->private_data;
2590 tcon = tlink_tcon(open_file->tlink);
2592 if (!tcon->ses->server->ops->async_writev)
2595 memcpy(&saved_from, from, sizeof(struct iov_iter));
2597 rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
2601 * If at least one write was successfully sent, then discard any rc
2602 * value from the later writes. If the other write succeeds, then
2603 * we'll end up returning whatever was written. If it fails, then
2604 * we'll get a new rc value from that.
2606 if (!list_empty(&wdata_list))
2610 * Wait for and collect replies for any successful sends in order of
2611 * increasing offset. Once an error is hit or we get a fatal signal
2612 * while waiting, then return without waiting for any more replies.
2615 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2617 /* FIXME: freezable too? */
2618 rc = wait_for_completion_killable(&wdata->done);
2621 else if (wdata->result)
2624 total_written += wdata->bytes;
2626 /* resend call if it's a retryable error */
2627 if (rc == -EAGAIN) {
2628 struct list_head tmp_list;
2629 struct iov_iter tmp_from;
2631 INIT_LIST_HEAD(&tmp_list);
2632 list_del_init(&wdata->list);
2634 memcpy(&tmp_from, &saved_from,
2635 sizeof(struct iov_iter));
2636 iov_iter_advance(&tmp_from,
2637 wdata->offset - *poffset);
2639 rc = cifs_write_from_iter(wdata->offset,
2640 wdata->bytes, &tmp_from,
2641 open_file, cifs_sb, &tmp_list);
2643 list_splice(&tmp_list, &wdata_list);
2645 kref_put(&wdata->refcount,
2646 cifs_uncached_writedata_release);
2650 list_del_init(&wdata->list);
2651 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2654 if (total_written > 0)
2655 *poffset += total_written;
2657 cifs_stats_bytes_written(tcon, total_written);
2658 return total_written ? total_written : (ssize_t)rc;
2661 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2664 struct inode *inode;
2665 loff_t pos = iocb->ki_pos;
2667 inode = file_inode(iocb->ki_filp);
2670 * BB - optimize the way when signing is disabled. We can drop this
2671 * extra memory-to-memory copying and use iovec buffers for constructing
2675 written = cifs_iovec_write(iocb->ki_filp, from, &pos);
2677 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
2685 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2687 struct file *file = iocb->ki_filp;
2688 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2689 struct inode *inode = file->f_mapping->host;
2690 struct cifsInodeInfo *cinode = CIFS_I(inode);
2691 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2692 ssize_t rc = -EACCES;
2693 loff_t lock_pos = iocb->ki_pos;
2696 * We need to hold the sem to be sure nobody modifies lock list
2697 * with a brlock that prevents writing.
2699 down_read(&cinode->lock_sem);
2700 mutex_lock(&inode->i_mutex);
2701 if (file->f_flags & O_APPEND)
2702 lock_pos = i_size_read(inode);
2703 if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
2704 server->vals->exclusive_lock_type, NULL,
2706 rc = __generic_file_write_iter(iocb, from);
2707 mutex_unlock(&inode->i_mutex);
2712 err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2717 mutex_unlock(&inode->i_mutex);
2719 up_read(&cinode->lock_sem);
2724 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2726 struct inode *inode = file_inode(iocb->ki_filp);
2727 struct cifsInodeInfo *cinode = CIFS_I(inode);
2728 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2729 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2730 iocb->ki_filp->private_data;
2731 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2734 written = cifs_get_writer(cinode);
2738 if (CIFS_CACHE_WRITE(cinode)) {
2739 if (cap_unix(tcon->ses) &&
2740 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2741 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2742 written = generic_file_write_iter(iocb, from);
2745 written = cifs_writev(iocb, from);
2749 * For non-oplocked files in strict cache mode we need to write the data
2750 * to the server exactly from the pos to pos+len-1 rather than flush all
2751 * affected pages because it may cause a error with mandatory locks on
2752 * these pages but not on the region from pos to ppos+len-1.
2754 written = cifs_user_writev(iocb, from);
2755 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2757 * Windows 7 server can delay breaking level2 oplock if a write
2758 * request comes - break it on the client to prevent reading
2761 cifs_zap_mapping(inode);
2762 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2767 cifs_put_writer(cinode);
2771 static struct cifs_readdata *
2772 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2774 struct cifs_readdata *rdata;
2776 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2778 if (rdata != NULL) {
2779 kref_init(&rdata->refcount);
2780 INIT_LIST_HEAD(&rdata->list);
2781 init_completion(&rdata->done);
2782 INIT_WORK(&rdata->work, complete);
2789 cifs_readdata_release(struct kref *refcount)
2791 struct cifs_readdata *rdata = container_of(refcount,
2792 struct cifs_readdata, refcount);
2795 cifsFileInfo_put(rdata->cfile);
2801 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2807 for (i = 0; i < nr_pages; i++) {
2808 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2813 rdata->pages[i] = page;
2817 for (i = 0; i < nr_pages; i++) {
2818 put_page(rdata->pages[i]);
2819 rdata->pages[i] = NULL;
2826 cifs_uncached_readdata_release(struct kref *refcount)
2828 struct cifs_readdata *rdata = container_of(refcount,
2829 struct cifs_readdata, refcount);
2832 for (i = 0; i < rdata->nr_pages; i++) {
2833 put_page(rdata->pages[i]);
2834 rdata->pages[i] = NULL;
2836 cifs_readdata_release(refcount);
2840 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2841 * @rdata: the readdata response with list of pages holding data
2842 * @iter: destination for our data
2844 * This function copies data from a list of pages in a readdata response into
2845 * an array of iovecs. It will first calculate where the data should go
2846 * based on the info in the readdata and then copy the data into that spot.
2849 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2851 size_t remaining = rdata->got_bytes;
2854 for (i = 0; i < rdata->nr_pages; i++) {
2855 struct page *page = rdata->pages[i];
2856 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2857 size_t written = copy_page_to_iter(page, 0, copy, iter);
2858 remaining -= written;
2859 if (written < copy && iov_iter_count(iter) > 0)
2862 return remaining ? -EFAULT : 0;
2866 cifs_uncached_readv_complete(struct work_struct *work)
2868 struct cifs_readdata *rdata = container_of(work,
2869 struct cifs_readdata, work);
2871 complete(&rdata->done);
2872 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2876 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2877 struct cifs_readdata *rdata, unsigned int len)
2881 unsigned int nr_pages = rdata->nr_pages;
2884 rdata->got_bytes = 0;
2885 rdata->tailsz = PAGE_SIZE;
2886 for (i = 0; i < nr_pages; i++) {
2887 struct page *page = rdata->pages[i];
2889 if (len >= PAGE_SIZE) {
2890 /* enough data to fill the page */
2891 iov.iov_base = kmap(page);
2892 iov.iov_len = PAGE_SIZE;
2893 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2894 i, iov.iov_base, iov.iov_len);
2896 } else if (len > 0) {
2897 /* enough for partial page, fill and zero the rest */
2898 iov.iov_base = kmap(page);
2900 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2901 i, iov.iov_base, iov.iov_len);
2902 memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2903 rdata->tailsz = len;
2906 /* no need to hold page hostage */
2907 rdata->pages[i] = NULL;
2913 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2918 rdata->got_bytes += result;
2921 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2922 rdata->got_bytes : result;
2926 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2927 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2929 struct cifs_readdata *rdata;
2930 unsigned int npages, rsize, credits;
2934 struct TCP_Server_Info *server;
2936 server = tlink_tcon(open_file->tlink)->ses->server;
2938 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2939 pid = open_file->pid;
2941 pid = current->tgid;
2944 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2949 cur_len = min_t(const size_t, len, rsize);
2950 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2952 /* allocate a readdata struct */
2953 rdata = cifs_readdata_alloc(npages,
2954 cifs_uncached_readv_complete);
2956 add_credits_and_wake_if(server, credits, 0);
2961 rc = cifs_read_allocate_pages(rdata, npages);
2965 rdata->cfile = cifsFileInfo_get(open_file);
2966 rdata->nr_pages = npages;
2967 rdata->offset = offset;
2968 rdata->bytes = cur_len;
2970 rdata->pagesz = PAGE_SIZE;
2971 rdata->read_into_pages = cifs_uncached_read_into_pages;
2972 rdata->credits = credits;
2974 if (!rdata->cfile->invalidHandle ||
2975 !cifs_reopen_file(rdata->cfile, true))
2976 rc = server->ops->async_readv(rdata);
2979 add_credits_and_wake_if(server, rdata->credits, 0);
2980 kref_put(&rdata->refcount,
2981 cifs_uncached_readdata_release);
2987 list_add_tail(&rdata->list, rdata_list);
2995 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2997 struct file *file = iocb->ki_filp;
3000 ssize_t total_read = 0;
3001 loff_t offset = iocb->ki_pos;
3002 struct cifs_sb_info *cifs_sb;
3003 struct cifs_tcon *tcon;
3004 struct cifsFileInfo *open_file;
3005 struct cifs_readdata *rdata, *tmp;
3006 struct list_head rdata_list;
3008 len = iov_iter_count(to);
3012 INIT_LIST_HEAD(&rdata_list);
3013 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3014 open_file = file->private_data;
3015 tcon = tlink_tcon(open_file->tlink);
3017 if (!tcon->ses->server->ops->async_readv)
3020 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3021 cifs_dbg(FYI, "attempting read on write only file instance\n");
3023 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3025 /* if at least one read request send succeeded, then reset rc */
3026 if (!list_empty(&rdata_list))
3029 len = iov_iter_count(to);
3030 /* the loop below should proceed in the order of increasing offsets */
3032 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3034 /* FIXME: freezable sleep too? */
3035 rc = wait_for_completion_killable(&rdata->done);
3038 else if (rdata->result == -EAGAIN) {
3039 /* resend call if it's a retryable error */
3040 struct list_head tmp_list;
3041 unsigned int got_bytes = rdata->got_bytes;
3043 list_del_init(&rdata->list);
3044 INIT_LIST_HEAD(&tmp_list);
3047 * Got a part of data and then reconnect has
3048 * happened -- fill the buffer and continue
3051 if (got_bytes && got_bytes < rdata->bytes) {
3052 rc = cifs_readdata_to_iov(rdata, to);
3054 kref_put(&rdata->refcount,
3055 cifs_uncached_readdata_release);
3060 rc = cifs_send_async_read(
3061 rdata->offset + got_bytes,
3062 rdata->bytes - got_bytes,
3063 rdata->cfile, cifs_sb,
3066 list_splice(&tmp_list, &rdata_list);
3068 kref_put(&rdata->refcount,
3069 cifs_uncached_readdata_release);
3071 } else if (rdata->result)
3074 rc = cifs_readdata_to_iov(rdata, to);
3076 /* if there was a short read -- discard anything left */
3077 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3080 list_del_init(&rdata->list);
3081 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3084 total_read = len - iov_iter_count(to);
3086 cifs_stats_bytes_read(tcon, total_read);
3088 /* mask nodata case */
3093 iocb->ki_pos += total_read;
3100 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3102 struct inode *inode = file_inode(iocb->ki_filp);
3103 struct cifsInodeInfo *cinode = CIFS_I(inode);
3104 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3105 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3106 iocb->ki_filp->private_data;
3107 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3111 * In strict cache mode we need to read from the server all the time
3112 * if we don't have level II oplock because the server can delay mtime
3113 * change - so we can't make a decision about inode invalidating.
3114 * And we can also fail with pagereading if there are mandatory locks
3115 * on pages affected by this read but not on the region from pos to
3118 if (!CIFS_CACHE_READ(cinode))
3119 return cifs_user_readv(iocb, to);
3121 if (cap_unix(tcon->ses) &&
3122 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3123 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3124 return generic_file_read_iter(iocb, to);
3127 * We need to hold the sem to be sure nobody modifies lock list
3128 * with a brlock that prevents reading.
3130 down_read(&cinode->lock_sem);
3131 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3132 tcon->ses->server->vals->shared_lock_type,
3133 NULL, CIFS_READ_OP))
3134 rc = generic_file_read_iter(iocb, to);
3135 up_read(&cinode->lock_sem);
3140 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3143 unsigned int bytes_read = 0;
3144 unsigned int total_read;
3145 unsigned int current_read_size;
3147 struct cifs_sb_info *cifs_sb;
3148 struct cifs_tcon *tcon;
3149 struct TCP_Server_Info *server;
3152 struct cifsFileInfo *open_file;
3153 struct cifs_io_parms io_parms;
3154 int buf_type = CIFS_NO_BUFFER;
3158 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3160 /* FIXME: set up handlers for larger reads and/or convert to async */
3161 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3163 if (file->private_data == NULL) {
3168 open_file = file->private_data;
3169 tcon = tlink_tcon(open_file->tlink);
3170 server = tcon->ses->server;
3172 if (!server->ops->sync_read) {
3177 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3178 pid = open_file->pid;
3180 pid = current->tgid;
3182 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3183 cifs_dbg(FYI, "attempting read on write only file instance\n");
3185 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3186 total_read += bytes_read, cur_offset += bytes_read) {
3188 current_read_size = min_t(uint, read_size - total_read,
3191 * For windows me and 9x we do not want to request more
3192 * than it negotiated since it will refuse the read
3195 if ((tcon->ses) && !(tcon->ses->capabilities &
3196 tcon->ses->server->vals->cap_large_files)) {
3197 current_read_size = min_t(uint,
3198 current_read_size, CIFSMaxBufSize);
3200 if (open_file->invalidHandle) {
3201 rc = cifs_reopen_file(open_file, true);
3206 io_parms.tcon = tcon;
3207 io_parms.offset = *offset;
3208 io_parms.length = current_read_size;
3209 rc = server->ops->sync_read(xid, open_file, &io_parms,
3210 &bytes_read, &cur_offset,
3212 } while (rc == -EAGAIN);
3214 if (rc || (bytes_read == 0)) {
3222 cifs_stats_bytes_read(tcon, total_read);
3223 *offset += bytes_read;
3231 * If the page is mmap'ed into a process' page tables, then we need to make
3232 * sure that it doesn't change while being written back.
3235 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3237 struct page *page = vmf->page;
3240 return VM_FAULT_LOCKED;
3243 static struct vm_operations_struct cifs_file_vm_ops = {
3244 .fault = filemap_fault,
3245 .map_pages = filemap_map_pages,
3246 .page_mkwrite = cifs_page_mkwrite,
3247 .remap_pages = generic_file_remap_pages,
3250 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3253 struct inode *inode = file_inode(file);
3257 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3258 rc = cifs_zap_mapping(inode);
3263 rc = generic_file_mmap(file, vma);
3265 vma->vm_ops = &cifs_file_vm_ops;
3270 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3275 rc = cifs_revalidate_file(file);
3277 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3282 rc = generic_file_mmap(file, vma);
3284 vma->vm_ops = &cifs_file_vm_ops;
3290 cifs_readv_complete(struct work_struct *work)
3292 unsigned int i, got_bytes;
3293 struct cifs_readdata *rdata = container_of(work,
3294 struct cifs_readdata, work);
3296 got_bytes = rdata->got_bytes;
3297 for (i = 0; i < rdata->nr_pages; i++) {
3298 struct page *page = rdata->pages[i];
3300 lru_cache_add_file(page);
3302 if (rdata->result == 0 ||
3303 (rdata->result == -EAGAIN && got_bytes)) {
3304 flush_dcache_page(page);
3305 SetPageUptodate(page);
3310 if (rdata->result == 0 ||
3311 (rdata->result == -EAGAIN && got_bytes))
3312 cifs_readpage_to_fscache(rdata->mapping->host, page);
3314 got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3316 page_cache_release(page);
3317 rdata->pages[i] = NULL;
3319 kref_put(&rdata->refcount, cifs_readdata_release);
3323 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3324 struct cifs_readdata *rdata, unsigned int len)
3330 unsigned int nr_pages = rdata->nr_pages;
3333 /* determine the eof that the server (probably) has */
3334 eof = CIFS_I(rdata->mapping->host)->server_eof;
3335 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3336 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3338 rdata->got_bytes = 0;
3339 rdata->tailsz = PAGE_CACHE_SIZE;
3340 for (i = 0; i < nr_pages; i++) {
3341 struct page *page = rdata->pages[i];
3343 if (len >= PAGE_CACHE_SIZE) {
3344 /* enough data to fill the page */
3345 iov.iov_base = kmap(page);
3346 iov.iov_len = PAGE_CACHE_SIZE;
3347 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3348 i, page->index, iov.iov_base, iov.iov_len);
3349 len -= PAGE_CACHE_SIZE;
3350 } else if (len > 0) {
3351 /* enough for partial page, fill and zero the rest */
3352 iov.iov_base = kmap(page);
3354 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3355 i, page->index, iov.iov_base, iov.iov_len);
3356 memset(iov.iov_base + len,
3357 '\0', PAGE_CACHE_SIZE - len);
3358 rdata->tailsz = len;
3360 } else if (page->index > eof_index) {
3362 * The VFS will not try to do readahead past the
3363 * i_size, but it's possible that we have outstanding
3364 * writes with gaps in the middle and the i_size hasn't
3365 * caught up yet. Populate those with zeroed out pages
3366 * to prevent the VFS from repeatedly attempting to
3367 * fill them until the writes are flushed.
3369 zero_user(page, 0, PAGE_CACHE_SIZE);
3370 lru_cache_add_file(page);
3371 flush_dcache_page(page);
3372 SetPageUptodate(page);
3374 page_cache_release(page);
3375 rdata->pages[i] = NULL;
3379 /* no need to hold page hostage */
3380 lru_cache_add_file(page);
3382 page_cache_release(page);
3383 rdata->pages[i] = NULL;
3388 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3393 rdata->got_bytes += result;
3396 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3397 rdata->got_bytes : result;
3401 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3402 unsigned int rsize, struct list_head *tmplist,
3403 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3405 struct page *page, *tpage;
3406 unsigned int expected_index;
3409 INIT_LIST_HEAD(tmplist);
3411 page = list_entry(page_list->prev, struct page, lru);
3414 * Lock the page and put it in the cache. Since no one else
3415 * should have access to this page, we're safe to simply set
3416 * PG_locked without checking it first.
3418 __set_page_locked(page);
3419 rc = add_to_page_cache_locked(page, mapping,
3420 page->index, GFP_KERNEL);
3422 /* give up if we can't stick it in the cache */
3424 __clear_page_locked(page);
3428 /* move first page to the tmplist */
3429 *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3430 *bytes = PAGE_CACHE_SIZE;
3432 list_move_tail(&page->lru, tmplist);
3434 /* now try and add more pages onto the request */
3435 expected_index = page->index + 1;
3436 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3437 /* discontinuity ? */
3438 if (page->index != expected_index)
3441 /* would this page push the read over the rsize? */
3442 if (*bytes + PAGE_CACHE_SIZE > rsize)
3445 __set_page_locked(page);
3446 if (add_to_page_cache_locked(page, mapping, page->index,
3448 __clear_page_locked(page);
3451 list_move_tail(&page->lru, tmplist);
3452 (*bytes) += PAGE_CACHE_SIZE;
3459 static int cifs_readpages(struct file *file, struct address_space *mapping,
3460 struct list_head *page_list, unsigned num_pages)
3463 struct list_head tmplist;
3464 struct cifsFileInfo *open_file = file->private_data;
3465 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3466 struct TCP_Server_Info *server;
3470 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3471 * immediately if the cookie is negative
3473 * After this point, every page in the list might have PG_fscache set,
3474 * so we will need to clean that up off of every page we don't use.
3476 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3481 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3482 pid = open_file->pid;
3484 pid = current->tgid;
3487 server = tlink_tcon(open_file->tlink)->ses->server;
3489 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3490 __func__, file, mapping, num_pages);
3493 * Start with the page at end of list and move it to private
3494 * list. Do the same with any following pages until we hit
3495 * the rsize limit, hit an index discontinuity, or run out of
3496 * pages. Issue the async read and then start the loop again
3497 * until the list is empty.
3499 * Note that list order is important. The page_list is in
3500 * the order of declining indexes. When we put the pages in
3501 * the rdata->pages, then we want them in increasing order.
3503 while (!list_empty(page_list)) {
3504 unsigned int i, nr_pages, bytes, rsize;
3506 struct page *page, *tpage;
3507 struct cifs_readdata *rdata;
3510 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3516 * Give up immediately if rsize is too small to read an entire
3517 * page. The VFS will fall back to readpage. We should never
3518 * reach this point however since we set ra_pages to 0 when the
3519 * rsize is smaller than a cache page.
3521 if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3522 add_credits_and_wake_if(server, credits, 0);
3526 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3527 &nr_pages, &offset, &bytes);
3529 add_credits_and_wake_if(server, credits, 0);
3533 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3535 /* best to give up if we're out of mem */
3536 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3537 list_del(&page->lru);
3538 lru_cache_add_file(page);
3540 page_cache_release(page);
3543 add_credits_and_wake_if(server, credits, 0);
3547 rdata->cfile = cifsFileInfo_get(open_file);
3548 rdata->mapping = mapping;
3549 rdata->offset = offset;
3550 rdata->bytes = bytes;
3552 rdata->pagesz = PAGE_CACHE_SIZE;
3553 rdata->read_into_pages = cifs_readpages_read_into_pages;
3554 rdata->credits = credits;
3556 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3557 list_del(&page->lru);
3558 rdata->pages[rdata->nr_pages++] = page;
3561 if (!rdata->cfile->invalidHandle ||
3562 !cifs_reopen_file(rdata->cfile, true))
3563 rc = server->ops->async_readv(rdata);
3565 add_credits_and_wake_if(server, rdata->credits, 0);
3566 for (i = 0; i < rdata->nr_pages; i++) {
3567 page = rdata->pages[i];
3568 lru_cache_add_file(page);
3570 page_cache_release(page);
3572 list_add_tail(&page->lru, &tmplist);
3574 kref_put(&rdata->refcount, cifs_readdata_release);
3575 if (rc == -EAGAIN) {
3576 /* Re-add pages to the page_list and retry */
3577 list_splice(&tmplist, page_list);
3583 kref_put(&rdata->refcount, cifs_readdata_release);
3586 /* Any pages that have been shown to fscache but didn't get added to
3587 * the pagecache must be uncached before they get returned to the
3590 cifs_fscache_readpages_cancel(mapping->host, page_list);
3595 * cifs_readpage_worker must be called with the page pinned
3597 static int cifs_readpage_worker(struct file *file, struct page *page,
3603 /* Is the page cached? */
3604 rc = cifs_readpage_from_fscache(file_inode(file), page);
3608 read_data = kmap(page);
3609 /* for reads over a certain size could initiate async read ahead */
3611 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3616 cifs_dbg(FYI, "Bytes read %d\n", rc);
3618 file_inode(file)->i_atime =
3619 current_fs_time(file_inode(file)->i_sb);
3621 if (PAGE_CACHE_SIZE > rc)
3622 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3624 flush_dcache_page(page);
3625 SetPageUptodate(page);
3627 /* send this page to the cache */
3628 cifs_readpage_to_fscache(file_inode(file), page);
3640 static int cifs_readpage(struct file *file, struct page *page)
3642 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3648 if (file->private_data == NULL) {
3654 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3655 page, (int)offset, (int)offset);
3657 rc = cifs_readpage_worker(file, page, &offset);
3663 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3665 struct cifsFileInfo *open_file;
3667 spin_lock(&cifs_file_list_lock);
3668 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3669 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3670 spin_unlock(&cifs_file_list_lock);
3674 spin_unlock(&cifs_file_list_lock);
3678 /* We do not want to update the file size from server for inodes
3679 open for write - to avoid races with writepage extending
3680 the file - in the future we could consider allowing
3681 refreshing the inode only on increases in the file size
3682 but this is tricky to do without racing with writebehind
3683 page caching in the current Linux kernel design */
3684 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3689 if (is_inode_writable(cifsInode)) {
3690 /* This inode is open for write at least once */
3691 struct cifs_sb_info *cifs_sb;
3693 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3694 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3695 /* since no page cache to corrupt on directio
3696 we can change size safely */
3700 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3708 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3709 loff_t pos, unsigned len, unsigned flags,
3710 struct page **pagep, void **fsdata)
3713 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3714 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3715 loff_t page_start = pos & PAGE_MASK;
3720 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3723 page = grab_cache_page_write_begin(mapping, index, flags);
3729 if (PageUptodate(page))
3733 * If we write a full page it will be up to date, no need to read from
3734 * the server. If the write is short, we'll end up doing a sync write
3737 if (len == PAGE_CACHE_SIZE)
3741 * optimize away the read when we have an oplock, and we're not
3742 * expecting to use any of the data we'd be reading in. That
3743 * is, when the page lies beyond the EOF, or straddles the EOF
3744 * and the write will cover all of the existing data.
3746 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3747 i_size = i_size_read(mapping->host);
3748 if (page_start >= i_size ||
3749 (offset == 0 && (pos + len) >= i_size)) {
3750 zero_user_segments(page, 0, offset,
3754 * PageChecked means that the parts of the page
3755 * to which we're not writing are considered up
3756 * to date. Once the data is copied to the
3757 * page, it can be set uptodate.
3759 SetPageChecked(page);
3764 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3766 * might as well read a page, it is fast enough. If we get
3767 * an error, we don't need to return it. cifs_write_end will
3768 * do a sync write instead since PG_uptodate isn't set.
3770 cifs_readpage_worker(file, page, &page_start);
3771 page_cache_release(page);
3775 /* we could try using another file handle if there is one -
3776 but how would we lock it to prevent close of that handle
3777 racing with this read? In any case
3778 this will be written out by write_end so is fine */
3785 static int cifs_release_page(struct page *page, gfp_t gfp)
3787 if (PagePrivate(page))
3790 return cifs_fscache_release_page(page, gfp);
3793 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3794 unsigned int length)
3796 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3798 if (offset == 0 && length == PAGE_CACHE_SIZE)
3799 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3802 static int cifs_launder_page(struct page *page)
3805 loff_t range_start = page_offset(page);
3806 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3807 struct writeback_control wbc = {
3808 .sync_mode = WB_SYNC_ALL,
3810 .range_start = range_start,
3811 .range_end = range_end,
3814 cifs_dbg(FYI, "Launder page: %p\n", page);
3816 if (clear_page_dirty_for_io(page))
3817 rc = cifs_writepage_locked(page, &wbc);
3819 cifs_fscache_invalidate_page(page, page->mapping->host);
3823 void cifs_oplock_break(struct work_struct *work)
3825 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3827 struct inode *inode = cfile->dentry->d_inode;
3828 struct cifsInodeInfo *cinode = CIFS_I(inode);
3829 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3830 struct TCP_Server_Info *server = tcon->ses->server;
3833 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3834 TASK_UNINTERRUPTIBLE);
3836 server->ops->downgrade_oplock(server, cinode,
3837 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3839 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3840 cifs_has_mand_locks(cinode)) {
3841 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3846 if (inode && S_ISREG(inode->i_mode)) {
3847 if (CIFS_CACHE_READ(cinode))
3848 break_lease(inode, O_RDONLY);
3850 break_lease(inode, O_WRONLY);
3851 rc = filemap_fdatawrite(inode->i_mapping);
3852 if (!CIFS_CACHE_READ(cinode)) {
3853 rc = filemap_fdatawait(inode->i_mapping);
3854 mapping_set_error(inode->i_mapping, rc);
3855 cifs_zap_mapping(inode);
3857 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3860 rc = cifs_push_locks(cfile);
3862 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3865 * releasing stale oplock after recent reconnect of smb session using
3866 * a now incorrect file handle is not a data integrity issue but do
3867 * not bother sending an oplock release if session to server still is
3868 * disconnected since oplock already released by the server
3870 if (!cfile->oplock_break_cancelled) {
3871 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3873 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3875 cifs_done_oplock_break(cinode);
3879 * The presence of cifs_direct_io() in the address space ops vector
3880 * allowes open() O_DIRECT flags which would have failed otherwise.
3882 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3883 * so this method should never be called.
3885 * Direct IO is not yet supported in the cached mode.
3888 cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
3893 * Eventually need to support direct IO for non forcedirectio mounts
3899 const struct address_space_operations cifs_addr_ops = {
3900 .readpage = cifs_readpage,
3901 .readpages = cifs_readpages,
3902 .writepage = cifs_writepage,
3903 .writepages = cifs_writepages,
3904 .write_begin = cifs_write_begin,
3905 .write_end = cifs_write_end,
3906 .set_page_dirty = __set_page_dirty_nobuffers,
3907 .releasepage = cifs_release_page,
3908 .direct_IO = cifs_direct_io,
3909 .invalidatepage = cifs_invalidate_page,
3910 .launder_page = cifs_launder_page,
3914 * cifs_readpages requires the server to support a buffer large enough to
3915 * contain the header plus one complete page of data. Otherwise, we need
3916 * to leave cifs_readpages out of the address space operations.
3918 const struct address_space_operations cifs_addr_ops_smallbuf = {
3919 .readpage = cifs_readpage,
3920 .writepage = cifs_writepage,
3921 .writepages = cifs_writepages,
3922 .write_begin = cifs_write_begin,
3923 .write_end = cifs_write_end,
3924 .set_page_dirty = __set_page_dirty_nobuffers,
3925 .releasepage = cifs_release_page,
3926 .invalidatepage = cifs_invalidate_page,
3927 .launder_page = cifs_launder_page,