4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
47 static inline int cifs_convert_flags(unsigned int flags)
49 if ((flags & O_ACCMODE) == O_RDONLY)
51 else if ((flags & O_ACCMODE) == O_WRONLY)
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
65 static u32 cifs_posix_convert_flags(unsigned int flags)
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
94 posix_flags |= SMB_O_DIRECT;
99 static inline int cifs_get_disposition(unsigned int flags)
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
125 cifs_dbg(FYI, "posix open %s\n", full_path);
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
131 tlink = cifs_sb_tlink(cifs_sb);
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_sb->mnt_cifs_flags &
144 CIFS_MOUNT_MAP_SPECIAL_CHR);
145 cifs_put_tlink(tlink);
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
154 goto posix_open_ret; /* caller does not need info */
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
167 cifs_fattr_to_inode(*pinode, &fattr);
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
183 int create_options = CREATE_NOT_DIR;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
188 if (!server->ops->open)
191 desired_access = cifs_convert_flags(f_flags);
193 /*********************************************************************
194 * open flag mapping table:
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
217 disposition = cifs_get_disposition(f_flags);
219 /* BB pass O_SYNC flag through on file attributes .. BB */
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
229 oparms.cifs_sb = cifs_sb;
230 oparms.desired_access = desired_access;
231 oparms.create_options = create_options;
232 oparms.disposition = disposition;
233 oparms.path = full_path;
235 oparms.reconnect = false;
237 rc = server->ops->open(xid, &oparms, oplock, buf);
243 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
246 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
255 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
257 struct cifs_fid_locks *cur;
258 bool has_locks = false;
260 down_read(&cinode->lock_sem);
261 list_for_each_entry(cur, &cinode->llist, llist) {
262 if (!list_empty(&cur->locks)) {
267 up_read(&cinode->lock_sem);
271 struct cifsFileInfo *
272 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
273 struct tcon_link *tlink, __u32 oplock)
275 struct dentry *dentry = file->f_path.dentry;
276 struct inode *inode = dentry->d_inode;
277 struct cifsInodeInfo *cinode = CIFS_I(inode);
278 struct cifsFileInfo *cfile;
279 struct cifs_fid_locks *fdlocks;
280 struct cifs_tcon *tcon = tlink_tcon(tlink);
281 struct TCP_Server_Info *server = tcon->ses->server;
283 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
287 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
293 INIT_LIST_HEAD(&fdlocks->locks);
294 fdlocks->cfile = cfile;
295 cfile->llist = fdlocks;
296 down_write(&cinode->lock_sem);
297 list_add(&fdlocks->llist, &cinode->llist);
298 up_write(&cinode->lock_sem);
301 cfile->pid = current->tgid;
302 cfile->uid = current_fsuid();
303 cfile->dentry = dget(dentry);
304 cfile->f_flags = file->f_flags;
305 cfile->invalidHandle = false;
306 cfile->tlink = cifs_get_tlink(tlink);
307 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
308 mutex_init(&cfile->fh_mutex);
310 cifs_sb_active(inode->i_sb);
313 * If the server returned a read oplock and we have mandatory brlocks,
314 * set oplock level to None.
316 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
317 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
321 spin_lock(&cifs_file_list_lock);
322 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
323 oplock = fid->pending_open->oplock;
324 list_del(&fid->pending_open->olist);
326 fid->purge_cache = false;
327 server->ops->set_fid(cfile, fid, oplock);
329 list_add(&cfile->tlist, &tcon->openFileList);
330 /* if readable file instance put first in list*/
331 if (file->f_mode & FMODE_READ)
332 list_add(&cfile->flist, &cinode->openFileList);
334 list_add_tail(&cfile->flist, &cinode->openFileList);
335 spin_unlock(&cifs_file_list_lock);
337 if (fid->purge_cache)
338 cifs_zap_mapping(inode);
340 file->private_data = cfile;
344 struct cifsFileInfo *
345 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
347 spin_lock(&cifs_file_list_lock);
348 cifsFileInfo_get_locked(cifs_file);
349 spin_unlock(&cifs_file_list_lock);
354 * Release a reference on the file private data. This may involve closing
355 * the filehandle out on the server. Must be called without holding
356 * cifs_file_list_lock.
358 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
360 struct inode *inode = cifs_file->dentry->d_inode;
361 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
362 struct TCP_Server_Info *server = tcon->ses->server;
363 struct cifsInodeInfo *cifsi = CIFS_I(inode);
364 struct super_block *sb = inode->i_sb;
365 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
366 struct cifsLockInfo *li, *tmp;
368 struct cifs_pending_open open;
369 bool oplock_break_cancelled;
371 spin_lock(&cifs_file_list_lock);
372 if (--cifs_file->count > 0) {
373 spin_unlock(&cifs_file_list_lock);
377 if (server->ops->get_lease_key)
378 server->ops->get_lease_key(inode, &fid);
380 /* store open in pending opens to make sure we don't miss lease break */
381 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
383 /* remove it from the lists */
384 list_del(&cifs_file->flist);
385 list_del(&cifs_file->tlist);
387 if (list_empty(&cifsi->openFileList)) {
388 cifs_dbg(FYI, "closing last open instance for inode %p\n",
389 cifs_file->dentry->d_inode);
391 * In strict cache mode we need invalidate mapping on the last
392 * close because it may cause a error when we open this file
393 * again and get at least level II oplock.
395 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
396 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
397 cifs_set_oplock_level(cifsi, 0);
399 spin_unlock(&cifs_file_list_lock);
401 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
403 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
404 struct TCP_Server_Info *server = tcon->ses->server;
408 if (server->ops->close)
409 server->ops->close(xid, tcon, &cifs_file->fid);
413 if (oplock_break_cancelled)
414 cifs_done_oplock_break(cifsi);
416 cifs_del_pending_open(&open);
419 * Delete any outstanding lock records. We'll lose them when the file
422 down_write(&cifsi->lock_sem);
423 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
424 list_del(&li->llist);
425 cifs_del_lock_waiters(li);
428 list_del(&cifs_file->llist->llist);
429 kfree(cifs_file->llist);
430 up_write(&cifsi->lock_sem);
432 cifs_put_tlink(cifs_file->tlink);
433 dput(cifs_file->dentry);
434 cifs_sb_deactive(sb);
438 int cifs_open(struct inode *inode, struct file *file)
444 struct cifs_sb_info *cifs_sb;
445 struct TCP_Server_Info *server;
446 struct cifs_tcon *tcon;
447 struct tcon_link *tlink;
448 struct cifsFileInfo *cfile = NULL;
449 char *full_path = NULL;
450 bool posix_open_ok = false;
452 struct cifs_pending_open open;
456 cifs_sb = CIFS_SB(inode->i_sb);
457 tlink = cifs_sb_tlink(cifs_sb);
460 return PTR_ERR(tlink);
462 tcon = tlink_tcon(tlink);
463 server = tcon->ses->server;
465 full_path = build_path_from_dentry(file->f_path.dentry);
466 if (full_path == NULL) {
471 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
472 inode, file->f_flags, full_path);
474 if (file->f_flags & O_DIRECT &&
475 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
476 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
477 file->f_op = &cifs_file_direct_nobrl_ops;
479 file->f_op = &cifs_file_direct_ops;
487 if (!tcon->broken_posix_open && tcon->unix_ext &&
488 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
489 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
490 /* can not refresh inode info since size could be stale */
491 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
492 cifs_sb->mnt_file_mode /* ignored */,
493 file->f_flags, &oplock, &fid.netfid, xid);
495 cifs_dbg(FYI, "posix open succeeded\n");
496 posix_open_ok = true;
497 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
498 if (tcon->ses->serverNOS)
499 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
500 tcon->ses->serverName,
501 tcon->ses->serverNOS);
502 tcon->broken_posix_open = true;
503 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
504 (rc != -EOPNOTSUPP)) /* path not found or net err */
507 * Else fallthrough to retry open the old way on network i/o
512 if (server->ops->get_lease_key)
513 server->ops->get_lease_key(inode, &fid);
515 cifs_add_pending_open(&fid, tlink, &open);
517 if (!posix_open_ok) {
518 if (server->ops->get_lease_key)
519 server->ops->get_lease_key(inode, &fid);
521 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
522 file->f_flags, &oplock, &fid, xid);
524 cifs_del_pending_open(&open);
529 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
531 if (server->ops->close)
532 server->ops->close(xid, tcon, &fid);
533 cifs_del_pending_open(&open);
538 cifs_fscache_set_inode_cookie(inode, file);
540 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
542 * Time to set mode which we can not set earlier due to
543 * problems creating new read-only files.
545 struct cifs_unix_set_info_args args = {
546 .mode = inode->i_mode,
547 .uid = INVALID_UID, /* no change */
548 .gid = INVALID_GID, /* no change */
549 .ctime = NO_CHANGE_64,
550 .atime = NO_CHANGE_64,
551 .mtime = NO_CHANGE_64,
554 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
561 cifs_put_tlink(tlink);
565 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
568 * Try to reacquire byte range locks that were released when session
569 * to server was lost.
572 cifs_relock_file(struct cifsFileInfo *cfile)
574 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
575 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
576 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
579 down_read(&cinode->lock_sem);
580 if (cinode->can_cache_brlcks) {
581 /* can cache locks - no need to relock */
582 up_read(&cinode->lock_sem);
586 if (cap_unix(tcon->ses) &&
587 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
588 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
589 rc = cifs_push_posix_locks(cfile);
591 rc = tcon->ses->server->ops->push_mand_locks(cfile);
593 up_read(&cinode->lock_sem);
598 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
603 struct cifs_sb_info *cifs_sb;
604 struct cifs_tcon *tcon;
605 struct TCP_Server_Info *server;
606 struct cifsInodeInfo *cinode;
608 char *full_path = NULL;
610 int disposition = FILE_OPEN;
611 int create_options = CREATE_NOT_DIR;
612 struct cifs_open_parms oparms;
615 mutex_lock(&cfile->fh_mutex);
616 if (!cfile->invalidHandle) {
617 mutex_unlock(&cfile->fh_mutex);
623 inode = cfile->dentry->d_inode;
624 cifs_sb = CIFS_SB(inode->i_sb);
625 tcon = tlink_tcon(cfile->tlink);
626 server = tcon->ses->server;
629 * Can not grab rename sem here because various ops, including those
630 * that already have the rename sem can end up causing writepage to get
631 * called and if the server was down that means we end up here, and we
632 * can never tell if the caller already has the rename_sem.
634 full_path = build_path_from_dentry(cfile->dentry);
635 if (full_path == NULL) {
637 mutex_unlock(&cfile->fh_mutex);
642 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
643 inode, cfile->f_flags, full_path);
645 if (tcon->ses->server->oplocks)
650 if (tcon->unix_ext && cap_unix(tcon->ses) &&
651 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
652 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
654 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
655 * original open. Must mask them off for a reopen.
657 unsigned int oflags = cfile->f_flags &
658 ~(O_CREAT | O_EXCL | O_TRUNC);
660 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
661 cifs_sb->mnt_file_mode /* ignored */,
662 oflags, &oplock, &cfile->fid.netfid, xid);
664 cifs_dbg(FYI, "posix reopen succeeded\n");
665 oparms.reconnect = true;
669 * fallthrough to retry open the old way on errors, especially
670 * in the reconnect path it is important to retry hard
674 desired_access = cifs_convert_flags(cfile->f_flags);
676 if (backup_cred(cifs_sb))
677 create_options |= CREATE_OPEN_BACKUP_INTENT;
679 if (server->ops->get_lease_key)
680 server->ops->get_lease_key(inode, &cfile->fid);
683 oparms.cifs_sb = cifs_sb;
684 oparms.desired_access = desired_access;
685 oparms.create_options = create_options;
686 oparms.disposition = disposition;
687 oparms.path = full_path;
688 oparms.fid = &cfile->fid;
689 oparms.reconnect = true;
692 * Can not refresh inode by passing in file_info buf to be returned by
693 * ops->open and then calling get_inode_info with returned buf since
694 * file might have write behind data that needs to be flushed and server
695 * version of file size can be stale. If we knew for sure that inode was
696 * not dirty locally we could do this.
698 rc = server->ops->open(xid, &oparms, &oplock, NULL);
699 if (rc == -ENOENT && oparms.reconnect == false) {
700 /* durable handle timeout is expired - open the file again */
701 rc = server->ops->open(xid, &oparms, &oplock, NULL);
702 /* indicate that we need to relock the file */
703 oparms.reconnect = true;
707 mutex_unlock(&cfile->fh_mutex);
708 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
709 cifs_dbg(FYI, "oplock: %d\n", oplock);
710 goto reopen_error_exit;
714 cfile->invalidHandle = false;
715 mutex_unlock(&cfile->fh_mutex);
716 cinode = CIFS_I(inode);
719 rc = filemap_write_and_wait(inode->i_mapping);
720 mapping_set_error(inode->i_mapping, rc);
723 rc = cifs_get_inode_info_unix(&inode, full_path,
726 rc = cifs_get_inode_info(&inode, full_path, NULL,
727 inode->i_sb, xid, NULL);
730 * Else we are writing out data to server already and could deadlock if
731 * we tried to flush data, and since we do not know if we have data that
732 * would invalidate the current end of file on the server we can not go
733 * to the server to get the new inode info.
736 server->ops->set_fid(cfile, &cfile->fid, oplock);
737 if (oparms.reconnect)
738 cifs_relock_file(cfile);
746 int cifs_close(struct inode *inode, struct file *file)
748 if (file->private_data != NULL) {
749 cifsFileInfo_put(file->private_data);
750 file->private_data = NULL;
753 /* return code from the ->release op is always ignored */
757 int cifs_closedir(struct inode *inode, struct file *file)
761 struct cifsFileInfo *cfile = file->private_data;
762 struct cifs_tcon *tcon;
763 struct TCP_Server_Info *server;
766 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
772 tcon = tlink_tcon(cfile->tlink);
773 server = tcon->ses->server;
775 cifs_dbg(FYI, "Freeing private data in close dir\n");
776 spin_lock(&cifs_file_list_lock);
777 if (server->ops->dir_needs_close(cfile)) {
778 cfile->invalidHandle = true;
779 spin_unlock(&cifs_file_list_lock);
780 if (server->ops->close_dir)
781 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
784 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
785 /* not much we can do if it fails anyway, ignore rc */
788 spin_unlock(&cifs_file_list_lock);
790 buf = cfile->srch_inf.ntwrk_buf_start;
792 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
793 cfile->srch_inf.ntwrk_buf_start = NULL;
794 if (cfile->srch_inf.smallBuf)
795 cifs_small_buf_release(buf);
797 cifs_buf_release(buf);
800 cifs_put_tlink(cfile->tlink);
801 kfree(file->private_data);
802 file->private_data = NULL;
803 /* BB can we lock the filestruct while this is going on? */
808 static struct cifsLockInfo *
809 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
811 struct cifsLockInfo *lock =
812 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
815 lock->offset = offset;
816 lock->length = length;
818 lock->pid = current->tgid;
819 INIT_LIST_HEAD(&lock->blist);
820 init_waitqueue_head(&lock->block_q);
825 cifs_del_lock_waiters(struct cifsLockInfo *lock)
827 struct cifsLockInfo *li, *tmp;
828 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
829 list_del_init(&li->blist);
830 wake_up(&li->block_q);
834 #define CIFS_LOCK_OP 0
835 #define CIFS_READ_OP 1
836 #define CIFS_WRITE_OP 2
838 /* @rw_check : 0 - no op, 1 - read, 2 - write */
840 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
841 __u64 length, __u8 type, struct cifsFileInfo *cfile,
842 struct cifsLockInfo **conf_lock, int rw_check)
844 struct cifsLockInfo *li;
845 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
846 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
848 list_for_each_entry(li, &fdlocks->locks, llist) {
849 if (offset + length <= li->offset ||
850 offset >= li->offset + li->length)
852 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
853 server->ops->compare_fids(cfile, cur_cfile)) {
854 /* shared lock prevents write op through the same fid */
855 if (!(li->type & server->vals->shared_lock_type) ||
856 rw_check != CIFS_WRITE_OP)
859 if ((type & server->vals->shared_lock_type) &&
860 ((server->ops->compare_fids(cfile, cur_cfile) &&
861 current->tgid == li->pid) || type == li->type))
871 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
872 __u8 type, struct cifsLockInfo **conf_lock,
876 struct cifs_fid_locks *cur;
877 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
879 list_for_each_entry(cur, &cinode->llist, llist) {
880 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
881 cfile, conf_lock, rw_check);
890 * Check if there is another lock that prevents us to set the lock (mandatory
891 * style). If such a lock exists, update the flock structure with its
892 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
893 * or leave it the same if we can't. Returns 0 if we don't need to request to
894 * the server or 1 otherwise.
897 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
898 __u8 type, struct file_lock *flock)
901 struct cifsLockInfo *conf_lock;
902 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
903 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
906 down_read(&cinode->lock_sem);
908 exist = cifs_find_lock_conflict(cfile, offset, length, type,
909 &conf_lock, CIFS_LOCK_OP);
911 flock->fl_start = conf_lock->offset;
912 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
913 flock->fl_pid = conf_lock->pid;
914 if (conf_lock->type & server->vals->shared_lock_type)
915 flock->fl_type = F_RDLCK;
917 flock->fl_type = F_WRLCK;
918 } else if (!cinode->can_cache_brlcks)
921 flock->fl_type = F_UNLCK;
923 up_read(&cinode->lock_sem);
928 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
930 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
931 down_write(&cinode->lock_sem);
932 list_add_tail(&lock->llist, &cfile->llist->locks);
933 up_write(&cinode->lock_sem);
937 * Set the byte-range lock (mandatory style). Returns:
938 * 1) 0, if we set the lock and don't need to request to the server;
939 * 2) 1, if no locks prevent us but we need to request to the server;
940 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
943 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
946 struct cifsLockInfo *conf_lock;
947 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
953 down_write(&cinode->lock_sem);
955 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
956 lock->type, &conf_lock, CIFS_LOCK_OP);
957 if (!exist && cinode->can_cache_brlcks) {
958 list_add_tail(&lock->llist, &cfile->llist->locks);
959 up_write(&cinode->lock_sem);
968 list_add_tail(&lock->blist, &conf_lock->blist);
969 up_write(&cinode->lock_sem);
970 rc = wait_event_interruptible(lock->block_q,
971 (lock->blist.prev == &lock->blist) &&
972 (lock->blist.next == &lock->blist));
975 down_write(&cinode->lock_sem);
976 list_del_init(&lock->blist);
979 up_write(&cinode->lock_sem);
984 * Check if there is another lock that prevents us to set the lock (posix
985 * style). If such a lock exists, update the flock structure with its
986 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
987 * or leave it the same if we can't. Returns 0 if we don't need to request to
988 * the server or 1 otherwise.
991 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
994 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
995 unsigned char saved_type = flock->fl_type;
997 if ((flock->fl_flags & FL_POSIX) == 0)
1000 down_read(&cinode->lock_sem);
1001 posix_test_lock(file, flock);
1003 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1004 flock->fl_type = saved_type;
1008 up_read(&cinode->lock_sem);
1013 * Set the byte-range lock (posix style). Returns:
1014 * 1) 0, if we set the lock and don't need to request to the server;
1015 * 2) 1, if we need to request to the server;
1016 * 3) <0, if the error occurs while setting the lock.
1019 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1021 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1024 if ((flock->fl_flags & FL_POSIX) == 0)
1028 down_write(&cinode->lock_sem);
1029 if (!cinode->can_cache_brlcks) {
1030 up_write(&cinode->lock_sem);
1034 rc = posix_lock_file(file, flock, NULL);
1035 up_write(&cinode->lock_sem);
1036 if (rc == FILE_LOCK_DEFERRED) {
1037 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1040 posix_unblock_lock(flock);
1046 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1049 int rc = 0, stored_rc;
1050 struct cifsLockInfo *li, *tmp;
1051 struct cifs_tcon *tcon;
1052 unsigned int num, max_num, max_buf;
1053 LOCKING_ANDX_RANGE *buf, *cur;
1054 int types[] = {LOCKING_ANDX_LARGE_FILES,
1055 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1059 tcon = tlink_tcon(cfile->tlink);
1062 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1063 * and check it for zero before using.
1065 max_buf = tcon->ses->server->maxBuf;
1071 max_num = (max_buf - sizeof(struct smb_hdr)) /
1072 sizeof(LOCKING_ANDX_RANGE);
1073 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1079 for (i = 0; i < 2; i++) {
1082 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1083 if (li->type != types[i])
1085 cur->Pid = cpu_to_le16(li->pid);
1086 cur->LengthLow = cpu_to_le32((u32)li->length);
1087 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1088 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1089 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1090 if (++num == max_num) {
1091 stored_rc = cifs_lockv(xid, tcon,
1093 (__u8)li->type, 0, num,
1104 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1105 (__u8)types[i], 0, num, buf);
1116 struct lock_to_push {
1117 struct list_head llist;
1126 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1128 struct inode *inode = cfile->dentry->d_inode;
1129 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1130 struct file_lock *flock;
1131 struct file_lock_context *flctx = inode->i_flctx;
1132 unsigned int count = 0, i;
1133 int rc = 0, xid, type;
1134 struct list_head locks_to_send, *el;
1135 struct lock_to_push *lck, *tmp;
1143 spin_lock(&flctx->flc_lock);
1144 list_for_each(el, &flctx->flc_posix) {
1147 spin_unlock(&flctx->flc_lock);
1149 INIT_LIST_HEAD(&locks_to_send);
1152 * Allocating count locks is enough because no FL_POSIX locks can be
1153 * added to the list while we are holding cinode->lock_sem that
1154 * protects locking operations of this inode.
1156 for (i = 0; i < count; i++) {
1157 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1162 list_add_tail(&lck->llist, &locks_to_send);
1165 el = locks_to_send.next;
1166 spin_lock(&flctx->flc_lock);
1167 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1168 if (el == &locks_to_send) {
1170 * The list ended. We don't have enough allocated
1171 * structures - something is really wrong.
1173 cifs_dbg(VFS, "Can't push all brlocks!\n");
1176 length = 1 + flock->fl_end - flock->fl_start;
1177 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1181 lck = list_entry(el, struct lock_to_push, llist);
1182 lck->pid = flock->fl_pid;
1183 lck->netfid = cfile->fid.netfid;
1184 lck->length = length;
1186 lck->offset = flock->fl_start;
1188 spin_unlock(&flctx->flc_lock);
1190 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1193 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1194 lck->offset, lck->length, NULL,
1198 list_del(&lck->llist);
1206 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1207 list_del(&lck->llist);
1214 cifs_push_locks(struct cifsFileInfo *cfile)
1216 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1217 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1218 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1221 /* we are going to update can_cache_brlcks here - need a write access */
1222 down_write(&cinode->lock_sem);
1223 if (!cinode->can_cache_brlcks) {
1224 up_write(&cinode->lock_sem);
1228 if (cap_unix(tcon->ses) &&
1229 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1230 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1231 rc = cifs_push_posix_locks(cfile);
1233 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1235 cinode->can_cache_brlcks = false;
1236 up_write(&cinode->lock_sem);
1241 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1242 bool *wait_flag, struct TCP_Server_Info *server)
1244 if (flock->fl_flags & FL_POSIX)
1245 cifs_dbg(FYI, "Posix\n");
1246 if (flock->fl_flags & FL_FLOCK)
1247 cifs_dbg(FYI, "Flock\n");
1248 if (flock->fl_flags & FL_SLEEP) {
1249 cifs_dbg(FYI, "Blocking lock\n");
1252 if (flock->fl_flags & FL_ACCESS)
1253 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1254 if (flock->fl_flags & FL_LEASE)
1255 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1256 if (flock->fl_flags &
1257 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1258 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1259 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1261 *type = server->vals->large_lock_type;
1262 if (flock->fl_type == F_WRLCK) {
1263 cifs_dbg(FYI, "F_WRLCK\n");
1264 *type |= server->vals->exclusive_lock_type;
1266 } else if (flock->fl_type == F_UNLCK) {
1267 cifs_dbg(FYI, "F_UNLCK\n");
1268 *type |= server->vals->unlock_lock_type;
1270 /* Check if unlock includes more than one lock range */
1271 } else if (flock->fl_type == F_RDLCK) {
1272 cifs_dbg(FYI, "F_RDLCK\n");
1273 *type |= server->vals->shared_lock_type;
1275 } else if (flock->fl_type == F_EXLCK) {
1276 cifs_dbg(FYI, "F_EXLCK\n");
1277 *type |= server->vals->exclusive_lock_type;
1279 } else if (flock->fl_type == F_SHLCK) {
1280 cifs_dbg(FYI, "F_SHLCK\n");
1281 *type |= server->vals->shared_lock_type;
1284 cifs_dbg(FYI, "Unknown type of lock\n");
1288 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1289 bool wait_flag, bool posix_lck, unsigned int xid)
1292 __u64 length = 1 + flock->fl_end - flock->fl_start;
1293 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1294 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1295 struct TCP_Server_Info *server = tcon->ses->server;
1296 __u16 netfid = cfile->fid.netfid;
1299 int posix_lock_type;
1301 rc = cifs_posix_lock_test(file, flock);
1305 if (type & server->vals->shared_lock_type)
1306 posix_lock_type = CIFS_RDLCK;
1308 posix_lock_type = CIFS_WRLCK;
1309 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1310 flock->fl_start, length, flock,
1311 posix_lock_type, wait_flag);
1315 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1319 /* BB we could chain these into one lock request BB */
1320 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1323 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1325 flock->fl_type = F_UNLCK;
1327 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1332 if (type & server->vals->shared_lock_type) {
1333 flock->fl_type = F_WRLCK;
1337 type &= ~server->vals->exclusive_lock_type;
1339 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1340 type | server->vals->shared_lock_type,
1343 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1344 type | server->vals->shared_lock_type, 0, 1, false);
1345 flock->fl_type = F_RDLCK;
1347 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1350 flock->fl_type = F_WRLCK;
1356 cifs_move_llist(struct list_head *source, struct list_head *dest)
1358 struct list_head *li, *tmp;
1359 list_for_each_safe(li, tmp, source)
1360 list_move(li, dest);
1364 cifs_free_llist(struct list_head *llist)
1366 struct cifsLockInfo *li, *tmp;
1367 list_for_each_entry_safe(li, tmp, llist, llist) {
1368 cifs_del_lock_waiters(li);
1369 list_del(&li->llist);
1375 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1378 int rc = 0, stored_rc;
1379 int types[] = {LOCKING_ANDX_LARGE_FILES,
1380 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1382 unsigned int max_num, num, max_buf;
1383 LOCKING_ANDX_RANGE *buf, *cur;
1384 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1385 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1386 struct cifsLockInfo *li, *tmp;
1387 __u64 length = 1 + flock->fl_end - flock->fl_start;
1388 struct list_head tmp_llist;
1390 INIT_LIST_HEAD(&tmp_llist);
1393 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1394 * and check it for zero before using.
1396 max_buf = tcon->ses->server->maxBuf;
1400 max_num = (max_buf - sizeof(struct smb_hdr)) /
1401 sizeof(LOCKING_ANDX_RANGE);
1402 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1406 down_write(&cinode->lock_sem);
1407 for (i = 0; i < 2; i++) {
1410 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1411 if (flock->fl_start > li->offset ||
1412 (flock->fl_start + length) <
1413 (li->offset + li->length))
1415 if (current->tgid != li->pid)
1417 if (types[i] != li->type)
1419 if (cinode->can_cache_brlcks) {
1421 * We can cache brlock requests - simply remove
1422 * a lock from the file's list.
1424 list_del(&li->llist);
1425 cifs_del_lock_waiters(li);
1429 cur->Pid = cpu_to_le16(li->pid);
1430 cur->LengthLow = cpu_to_le32((u32)li->length);
1431 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1432 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1433 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1435 * We need to save a lock here to let us add it again to
1436 * the file's list if the unlock range request fails on
1439 list_move(&li->llist, &tmp_llist);
1440 if (++num == max_num) {
1441 stored_rc = cifs_lockv(xid, tcon,
1443 li->type, num, 0, buf);
1446 * We failed on the unlock range
1447 * request - add all locks from the tmp
1448 * list to the head of the file's list.
1450 cifs_move_llist(&tmp_llist,
1451 &cfile->llist->locks);
1455 * The unlock range request succeed -
1456 * free the tmp list.
1458 cifs_free_llist(&tmp_llist);
1465 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1466 types[i], num, 0, buf);
1468 cifs_move_llist(&tmp_llist,
1469 &cfile->llist->locks);
1472 cifs_free_llist(&tmp_llist);
1476 up_write(&cinode->lock_sem);
1482 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1483 bool wait_flag, bool posix_lck, int lock, int unlock,
1487 __u64 length = 1 + flock->fl_end - flock->fl_start;
1488 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1489 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1490 struct TCP_Server_Info *server = tcon->ses->server;
1491 struct inode *inode = cfile->dentry->d_inode;
1494 int posix_lock_type;
1496 rc = cifs_posix_lock_set(file, flock);
1500 if (type & server->vals->shared_lock_type)
1501 posix_lock_type = CIFS_RDLCK;
1503 posix_lock_type = CIFS_WRLCK;
1506 posix_lock_type = CIFS_UNLCK;
1508 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1509 current->tgid, flock->fl_start, length,
1510 NULL, posix_lock_type, wait_flag);
1515 struct cifsLockInfo *lock;
1517 lock = cifs_lock_init(flock->fl_start, length, type);
1521 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1530 * Windows 7 server can delay breaking lease from read to None
1531 * if we set a byte-range lock on a file - break it explicitly
1532 * before sending the lock to the server to be sure the next
1533 * read won't conflict with non-overlapted locks due to
1536 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1537 CIFS_CACHE_READ(CIFS_I(inode))) {
1538 cifs_zap_mapping(inode);
1539 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1541 CIFS_I(inode)->oplock = 0;
1544 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1545 type, 1, 0, wait_flag);
1551 cifs_lock_add(cfile, lock);
1553 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1556 if (flock->fl_flags & FL_POSIX)
1557 posix_lock_file_wait(file, flock);
1561 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1564 int lock = 0, unlock = 0;
1565 bool wait_flag = false;
1566 bool posix_lck = false;
1567 struct cifs_sb_info *cifs_sb;
1568 struct cifs_tcon *tcon;
1569 struct cifsInodeInfo *cinode;
1570 struct cifsFileInfo *cfile;
1577 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1578 cmd, flock->fl_flags, flock->fl_type,
1579 flock->fl_start, flock->fl_end);
1581 cfile = (struct cifsFileInfo *)file->private_data;
1582 tcon = tlink_tcon(cfile->tlink);
1584 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1587 cifs_sb = CIFS_FILE_SB(file);
1588 netfid = cfile->fid.netfid;
1589 cinode = CIFS_I(file_inode(file));
1591 if (cap_unix(tcon->ses) &&
1592 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1593 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1596 * BB add code here to normalize offset and length to account for
1597 * negative length which we can not accept over the wire.
1599 if (IS_GETLK(cmd)) {
1600 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1605 if (!lock && !unlock) {
1607 * if no lock or unlock then nothing to do since we do not
1614 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1621 * update the file size (if needed) after a write. Should be called with
1622 * the inode->i_lock held
1625 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1626 unsigned int bytes_written)
1628 loff_t end_of_write = offset + bytes_written;
1630 if (end_of_write > cifsi->server_eof)
1631 cifsi->server_eof = end_of_write;
1635 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1636 size_t write_size, loff_t *offset)
1639 unsigned int bytes_written = 0;
1640 unsigned int total_written;
1641 struct cifs_sb_info *cifs_sb;
1642 struct cifs_tcon *tcon;
1643 struct TCP_Server_Info *server;
1645 struct dentry *dentry = open_file->dentry;
1646 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1647 struct cifs_io_parms io_parms;
1649 cifs_sb = CIFS_SB(dentry->d_sb);
1651 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1652 write_size, *offset, dentry);
1654 tcon = tlink_tcon(open_file->tlink);
1655 server = tcon->ses->server;
1657 if (!server->ops->sync_write)
1662 for (total_written = 0; write_size > total_written;
1663 total_written += bytes_written) {
1665 while (rc == -EAGAIN) {
1669 if (open_file->invalidHandle) {
1670 /* we could deadlock if we called
1671 filemap_fdatawait from here so tell
1672 reopen_file not to flush data to
1674 rc = cifs_reopen_file(open_file, false);
1679 len = min(server->ops->wp_retry_size(dentry->d_inode),
1680 (unsigned int)write_size - total_written);
1681 /* iov[0] is reserved for smb header */
1682 iov[1].iov_base = (char *)write_data + total_written;
1683 iov[1].iov_len = len;
1685 io_parms.tcon = tcon;
1686 io_parms.offset = *offset;
1687 io_parms.length = len;
1688 rc = server->ops->sync_write(xid, &open_file->fid,
1689 &io_parms, &bytes_written, iov, 1);
1691 if (rc || (bytes_written == 0)) {
1699 spin_lock(&dentry->d_inode->i_lock);
1700 cifs_update_eof(cifsi, *offset, bytes_written);
1701 spin_unlock(&dentry->d_inode->i_lock);
1702 *offset += bytes_written;
1706 cifs_stats_bytes_written(tcon, total_written);
1708 if (total_written > 0) {
1709 spin_lock(&dentry->d_inode->i_lock);
1710 if (*offset > dentry->d_inode->i_size)
1711 i_size_write(dentry->d_inode, *offset);
1712 spin_unlock(&dentry->d_inode->i_lock);
1714 mark_inode_dirty_sync(dentry->d_inode);
1716 return total_written;
1719 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1722 struct cifsFileInfo *open_file = NULL;
1723 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1725 /* only filter by fsuid on multiuser mounts */
1726 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1729 spin_lock(&cifs_file_list_lock);
1730 /* we could simply get the first_list_entry since write-only entries
1731 are always at the end of the list but since the first entry might
1732 have a close pending, we go through the whole list */
1733 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1734 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1736 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1737 if (!open_file->invalidHandle) {
1738 /* found a good file */
1739 /* lock it so it will not be closed on us */
1740 cifsFileInfo_get_locked(open_file);
1741 spin_unlock(&cifs_file_list_lock);
1743 } /* else might as well continue, and look for
1744 another, or simply have the caller reopen it
1745 again rather than trying to fix this handle */
1746 } else /* write only file */
1747 break; /* write only files are last so must be done */
1749 spin_unlock(&cifs_file_list_lock);
1753 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1756 struct cifsFileInfo *open_file, *inv_file = NULL;
1757 struct cifs_sb_info *cifs_sb;
1758 bool any_available = false;
1760 unsigned int refind = 0;
1762 /* Having a null inode here (because mapping->host was set to zero by
1763 the VFS or MM) should not happen but we had reports of on oops (due to
1764 it being zero) during stress testcases so we need to check for it */
1766 if (cifs_inode == NULL) {
1767 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1772 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1774 /* only filter by fsuid on multiuser mounts */
1775 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1778 spin_lock(&cifs_file_list_lock);
1780 if (refind > MAX_REOPEN_ATT) {
1781 spin_unlock(&cifs_file_list_lock);
1784 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1785 if (!any_available && open_file->pid != current->tgid)
1787 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1789 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1790 if (!open_file->invalidHandle) {
1791 /* found a good writable file */
1792 cifsFileInfo_get_locked(open_file);
1793 spin_unlock(&cifs_file_list_lock);
1797 inv_file = open_file;
1801 /* couldn't find useable FH with same pid, try any available */
1802 if (!any_available) {
1803 any_available = true;
1804 goto refind_writable;
1808 any_available = false;
1809 cifsFileInfo_get_locked(inv_file);
1812 spin_unlock(&cifs_file_list_lock);
1815 rc = cifs_reopen_file(inv_file, false);
1819 spin_lock(&cifs_file_list_lock);
1820 list_move_tail(&inv_file->flist,
1821 &cifs_inode->openFileList);
1822 spin_unlock(&cifs_file_list_lock);
1823 cifsFileInfo_put(inv_file);
1824 spin_lock(&cifs_file_list_lock);
1827 goto refind_writable;
1834 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1836 struct address_space *mapping = page->mapping;
1837 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1840 int bytes_written = 0;
1841 struct inode *inode;
1842 struct cifsFileInfo *open_file;
1844 if (!mapping || !mapping->host)
1847 inode = page->mapping->host;
1849 offset += (loff_t)from;
1850 write_data = kmap(page);
1853 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1858 /* racing with truncate? */
1859 if (offset > mapping->host->i_size) {
1861 return 0; /* don't care */
1864 /* check to make sure that we are not extending the file */
1865 if (mapping->host->i_size - offset < (loff_t)to)
1866 to = (unsigned)(mapping->host->i_size - offset);
1868 open_file = find_writable_file(CIFS_I(mapping->host), false);
1870 bytes_written = cifs_write(open_file, open_file->pid,
1871 write_data, to - from, &offset);
1872 cifsFileInfo_put(open_file);
1873 /* Does mm or vfs already set times? */
1874 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1875 if ((bytes_written > 0) && (offset))
1877 else if (bytes_written < 0)
1880 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1888 static struct cifs_writedata *
1889 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1890 pgoff_t end, pgoff_t *index,
1891 unsigned int *found_pages)
1893 unsigned int nr_pages;
1894 struct page **pages;
1895 struct cifs_writedata *wdata;
1897 wdata = cifs_writedata_alloc((unsigned int)tofind,
1898 cifs_writev_complete);
1903 * find_get_pages_tag seems to return a max of 256 on each
1904 * iteration, so we must call it several times in order to
1905 * fill the array or the wsize is effectively limited to
1906 * 256 * PAGE_CACHE_SIZE.
1909 pages = wdata->pages;
1911 nr_pages = find_get_pages_tag(mapping, index,
1912 PAGECACHE_TAG_DIRTY, tofind,
1914 *found_pages += nr_pages;
1917 } while (nr_pages && tofind && *index <= end);
1923 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1924 struct address_space *mapping,
1925 struct writeback_control *wbc,
1926 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1928 unsigned int nr_pages = 0, i;
1931 for (i = 0; i < found_pages; i++) {
1932 page = wdata->pages[i];
1934 * At this point we hold neither mapping->tree_lock nor
1935 * lock on the page itself: the page may be truncated or
1936 * invalidated (changing page->mapping to NULL), or even
1937 * swizzled back from swapper_space to tmpfs file
1943 else if (!trylock_page(page))
1946 if (unlikely(page->mapping != mapping)) {
1951 if (!wbc->range_cyclic && page->index > end) {
1957 if (*next && (page->index != *next)) {
1958 /* Not next consecutive page */
1963 if (wbc->sync_mode != WB_SYNC_NONE)
1964 wait_on_page_writeback(page);
1966 if (PageWriteback(page) ||
1967 !clear_page_dirty_for_io(page)) {
1973 * This actually clears the dirty bit in the radix tree.
1974 * See cifs_writepage() for more commentary.
1976 set_page_writeback(page);
1977 if (page_offset(page) >= i_size_read(mapping->host)) {
1980 end_page_writeback(page);
1984 wdata->pages[i] = page;
1985 *next = page->index + 1;
1989 /* reset index to refind any pages skipped */
1991 *index = wdata->pages[0]->index + 1;
1993 /* put any pages we aren't going to use */
1994 for (i = nr_pages; i < found_pages; i++) {
1995 page_cache_release(wdata->pages[i]);
1996 wdata->pages[i] = NULL;
2003 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2004 struct address_space *mapping, struct writeback_control *wbc)
2007 struct TCP_Server_Info *server;
2010 wdata->sync_mode = wbc->sync_mode;
2011 wdata->nr_pages = nr_pages;
2012 wdata->offset = page_offset(wdata->pages[0]);
2013 wdata->pagesz = PAGE_CACHE_SIZE;
2014 wdata->tailsz = min(i_size_read(mapping->host) -
2015 page_offset(wdata->pages[nr_pages - 1]),
2016 (loff_t)PAGE_CACHE_SIZE);
2017 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2019 if (wdata->cfile != NULL)
2020 cifsFileInfo_put(wdata->cfile);
2021 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2022 if (!wdata->cfile) {
2023 cifs_dbg(VFS, "No writable handles for inode\n");
2026 wdata->pid = wdata->cfile->pid;
2027 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2028 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2031 for (i = 0; i < nr_pages; ++i)
2032 unlock_page(wdata->pages[i]);
2037 static int cifs_writepages(struct address_space *mapping,
2038 struct writeback_control *wbc)
2040 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2041 struct TCP_Server_Info *server;
2042 bool done = false, scanned = false, range_whole = false;
2044 struct cifs_writedata *wdata;
2048 * If wsize is smaller than the page cache size, default to writing
2049 * one page at a time via cifs_writepage
2051 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
2052 return generic_writepages(mapping, wbc);
2054 if (wbc->range_cyclic) {
2055 index = mapping->writeback_index; /* Start from prev offset */
2058 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2059 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2060 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2064 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2066 while (!done && index <= end) {
2067 unsigned int i, nr_pages, found_pages, wsize, credits;
2068 pgoff_t next = 0, tofind, saved_index = index;
2070 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2075 tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
2077 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2081 add_credits_and_wake_if(server, credits, 0);
2085 if (found_pages == 0) {
2086 kref_put(&wdata->refcount, cifs_writedata_release);
2087 add_credits_and_wake_if(server, credits, 0);
2091 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2092 end, &index, &next, &done);
2094 /* nothing to write? */
2095 if (nr_pages == 0) {
2096 kref_put(&wdata->refcount, cifs_writedata_release);
2097 add_credits_and_wake_if(server, credits, 0);
2101 wdata->credits = credits;
2103 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2105 /* send failure -- clean up the mess */
2107 add_credits_and_wake_if(server, wdata->credits, 0);
2108 for (i = 0; i < nr_pages; ++i) {
2110 redirty_page_for_writepage(wbc,
2113 SetPageError(wdata->pages[i]);
2114 end_page_writeback(wdata->pages[i]);
2115 page_cache_release(wdata->pages[i]);
2118 mapping_set_error(mapping, rc);
2120 kref_put(&wdata->refcount, cifs_writedata_release);
2122 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2123 index = saved_index;
2127 wbc->nr_to_write -= nr_pages;
2128 if (wbc->nr_to_write <= 0)
2134 if (!scanned && !done) {
2136 * We hit the last page and there is more work to be done: wrap
2137 * back to the start of the file
2144 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2145 mapping->writeback_index = index;
2151 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2157 /* BB add check for wbc flags */
2158 page_cache_get(page);
2159 if (!PageUptodate(page))
2160 cifs_dbg(FYI, "ppw - page not up to date\n");
2163 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2165 * A writepage() implementation always needs to do either this,
2166 * or re-dirty the page with "redirty_page_for_writepage()" in
2167 * the case of a failure.
2169 * Just unlocking the page will cause the radix tree tag-bits
2170 * to fail to update with the state of the page correctly.
2172 set_page_writeback(page);
2174 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2175 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2177 else if (rc == -EAGAIN)
2178 redirty_page_for_writepage(wbc, page);
2182 SetPageUptodate(page);
2183 end_page_writeback(page);
2184 page_cache_release(page);
2189 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2191 int rc = cifs_writepage_locked(page, wbc);
2196 static int cifs_write_end(struct file *file, struct address_space *mapping,
2197 loff_t pos, unsigned len, unsigned copied,
2198 struct page *page, void *fsdata)
2201 struct inode *inode = mapping->host;
2202 struct cifsFileInfo *cfile = file->private_data;
2203 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2206 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2209 pid = current->tgid;
2211 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2214 if (PageChecked(page)) {
2216 SetPageUptodate(page);
2217 ClearPageChecked(page);
2218 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2219 SetPageUptodate(page);
2221 if (!PageUptodate(page)) {
2223 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2227 /* this is probably better than directly calling
2228 partialpage_write since in this function the file handle is
2229 known which we might as well leverage */
2230 /* BB check if anything else missing out of ppw
2231 such as updating last write time */
2232 page_data = kmap(page);
2233 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2234 /* if (rc < 0) should we set writebehind rc? */
2241 set_page_dirty(page);
2245 spin_lock(&inode->i_lock);
2246 if (pos > inode->i_size)
2247 i_size_write(inode, pos);
2248 spin_unlock(&inode->i_lock);
2252 page_cache_release(page);
2257 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2262 struct cifs_tcon *tcon;
2263 struct TCP_Server_Info *server;
2264 struct cifsFileInfo *smbfile = file->private_data;
2265 struct inode *inode = file_inode(file);
2266 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2268 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2271 mutex_lock(&inode->i_mutex);
2275 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2278 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2279 rc = cifs_zap_mapping(inode);
2281 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2282 rc = 0; /* don't care about it in fsync */
2286 tcon = tlink_tcon(smbfile->tlink);
2287 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2288 server = tcon->ses->server;
2289 if (server->ops->flush)
2290 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2296 mutex_unlock(&inode->i_mutex);
2300 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2304 struct cifs_tcon *tcon;
2305 struct TCP_Server_Info *server;
2306 struct cifsFileInfo *smbfile = file->private_data;
2307 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2308 struct inode *inode = file->f_mapping->host;
2310 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2313 mutex_lock(&inode->i_mutex);
2317 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2320 tcon = tlink_tcon(smbfile->tlink);
2321 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2322 server = tcon->ses->server;
2323 if (server->ops->flush)
2324 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2330 mutex_unlock(&inode->i_mutex);
2335 * As file closes, flush all cached write data for this inode checking
2336 * for write behind errors.
2338 int cifs_flush(struct file *file, fl_owner_t id)
2340 struct inode *inode = file_inode(file);
2343 if (file->f_mode & FMODE_WRITE)
2344 rc = filemap_write_and_wait(inode->i_mapping);
2346 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2352 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2357 for (i = 0; i < num_pages; i++) {
2358 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2361 * save number of pages we have already allocated and
2362 * return with ENOMEM error
2371 for (i = 0; i < num_pages; i++)
2378 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2383 clen = min_t(const size_t, len, wsize);
2384 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2393 cifs_uncached_writedata_release(struct kref *refcount)
2396 struct cifs_writedata *wdata = container_of(refcount,
2397 struct cifs_writedata, refcount);
2399 for (i = 0; i < wdata->nr_pages; i++)
2400 put_page(wdata->pages[i]);
2401 cifs_writedata_release(refcount);
2405 cifs_uncached_writev_complete(struct work_struct *work)
2407 struct cifs_writedata *wdata = container_of(work,
2408 struct cifs_writedata, work);
2409 struct inode *inode = wdata->cfile->dentry->d_inode;
2410 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2412 spin_lock(&inode->i_lock);
2413 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2414 if (cifsi->server_eof > inode->i_size)
2415 i_size_write(inode, cifsi->server_eof);
2416 spin_unlock(&inode->i_lock);
2418 complete(&wdata->done);
2420 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2424 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2425 size_t *len, unsigned long *num_pages)
2427 size_t save_len, copied, bytes, cur_len = *len;
2428 unsigned long i, nr_pages = *num_pages;
2431 for (i = 0; i < nr_pages; i++) {
2432 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2433 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2436 * If we didn't copy as much as we expected, then that
2437 * may mean we trod into an unmapped area. Stop copying
2438 * at that point. On the next pass through the big
2439 * loop, we'll likely end up getting a zero-length
2440 * write and bailing out of it.
2445 cur_len = save_len - cur_len;
2449 * If we have no data to send, then that probably means that
2450 * the copy above failed altogether. That's most likely because
2451 * the address in the iovec was bogus. Return -EFAULT and let
2452 * the caller free anything we allocated and bail out.
2458 * i + 1 now represents the number of pages we actually used in
2459 * the copy phase above.
2466 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2467 struct cifsFileInfo *open_file,
2468 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2472 unsigned long nr_pages, num_pages, i;
2473 struct cifs_writedata *wdata;
2474 struct iov_iter saved_from;
2475 loff_t saved_offset = offset;
2477 struct TCP_Server_Info *server;
2479 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2480 pid = open_file->pid;
2482 pid = current->tgid;
2484 server = tlink_tcon(open_file->tlink)->ses->server;
2485 memcpy(&saved_from, from, sizeof(struct iov_iter));
2488 unsigned int wsize, credits;
2490 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2495 nr_pages = get_numpages(wsize, len, &cur_len);
2496 wdata = cifs_writedata_alloc(nr_pages,
2497 cifs_uncached_writev_complete);
2500 add_credits_and_wake_if(server, credits, 0);
2504 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2507 add_credits_and_wake_if(server, credits, 0);
2511 num_pages = nr_pages;
2512 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2514 for (i = 0; i < nr_pages; i++)
2515 put_page(wdata->pages[i]);
2517 add_credits_and_wake_if(server, credits, 0);
2522 * Bring nr_pages down to the number of pages we actually used,
2523 * and free any pages that we didn't use.
2525 for ( ; nr_pages > num_pages; nr_pages--)
2526 put_page(wdata->pages[nr_pages - 1]);
2528 wdata->sync_mode = WB_SYNC_ALL;
2529 wdata->nr_pages = nr_pages;
2530 wdata->offset = (__u64)offset;
2531 wdata->cfile = cifsFileInfo_get(open_file);
2533 wdata->bytes = cur_len;
2534 wdata->pagesz = PAGE_SIZE;
2535 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2536 wdata->credits = credits;
2538 if (!wdata->cfile->invalidHandle ||
2539 !cifs_reopen_file(wdata->cfile, false))
2540 rc = server->ops->async_writev(wdata,
2541 cifs_uncached_writedata_release);
2543 add_credits_and_wake_if(server, wdata->credits, 0);
2544 kref_put(&wdata->refcount,
2545 cifs_uncached_writedata_release);
2546 if (rc == -EAGAIN) {
2547 memcpy(from, &saved_from,
2548 sizeof(struct iov_iter));
2549 iov_iter_advance(from, offset - saved_offset);
2555 list_add_tail(&wdata->list, wdata_list);
2564 cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2567 ssize_t total_written = 0;
2568 struct cifsFileInfo *open_file;
2569 struct cifs_tcon *tcon;
2570 struct cifs_sb_info *cifs_sb;
2571 struct cifs_writedata *wdata, *tmp;
2572 struct list_head wdata_list;
2573 struct iov_iter saved_from;
2576 len = iov_iter_count(from);
2577 rc = generic_write_checks(file, poffset, &len, 0);
2584 iov_iter_truncate(from, len);
2586 INIT_LIST_HEAD(&wdata_list);
2587 cifs_sb = CIFS_FILE_SB(file);
2588 open_file = file->private_data;
2589 tcon = tlink_tcon(open_file->tlink);
2591 if (!tcon->ses->server->ops->async_writev)
2594 memcpy(&saved_from, from, sizeof(struct iov_iter));
2596 rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
2600 * If at least one write was successfully sent, then discard any rc
2601 * value from the later writes. If the other write succeeds, then
2602 * we'll end up returning whatever was written. If it fails, then
2603 * we'll get a new rc value from that.
2605 if (!list_empty(&wdata_list))
2609 * Wait for and collect replies for any successful sends in order of
2610 * increasing offset. Once an error is hit or we get a fatal signal
2611 * while waiting, then return without waiting for any more replies.
2614 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2616 /* FIXME: freezable too? */
2617 rc = wait_for_completion_killable(&wdata->done);
2620 else if (wdata->result)
2623 total_written += wdata->bytes;
2625 /* resend call if it's a retryable error */
2626 if (rc == -EAGAIN) {
2627 struct list_head tmp_list;
2628 struct iov_iter tmp_from;
2630 INIT_LIST_HEAD(&tmp_list);
2631 list_del_init(&wdata->list);
2633 memcpy(&tmp_from, &saved_from,
2634 sizeof(struct iov_iter));
2635 iov_iter_advance(&tmp_from,
2636 wdata->offset - *poffset);
2638 rc = cifs_write_from_iter(wdata->offset,
2639 wdata->bytes, &tmp_from,
2640 open_file, cifs_sb, &tmp_list);
2642 list_splice(&tmp_list, &wdata_list);
2644 kref_put(&wdata->refcount,
2645 cifs_uncached_writedata_release);
2649 list_del_init(&wdata->list);
2650 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2653 if (total_written > 0)
2654 *poffset += total_written;
2656 cifs_stats_bytes_written(tcon, total_written);
2657 return total_written ? total_written : (ssize_t)rc;
2660 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2663 struct inode *inode;
2664 loff_t pos = iocb->ki_pos;
2666 inode = file_inode(iocb->ki_filp);
2669 * BB - optimize the way when signing is disabled. We can drop this
2670 * extra memory-to-memory copying and use iovec buffers for constructing
2674 written = cifs_iovec_write(iocb->ki_filp, from, &pos);
2676 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
2684 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2686 struct file *file = iocb->ki_filp;
2687 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2688 struct inode *inode = file->f_mapping->host;
2689 struct cifsInodeInfo *cinode = CIFS_I(inode);
2690 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2691 ssize_t rc = -EACCES;
2692 loff_t lock_pos = iocb->ki_pos;
2695 * We need to hold the sem to be sure nobody modifies lock list
2696 * with a brlock that prevents writing.
2698 down_read(&cinode->lock_sem);
2699 mutex_lock(&inode->i_mutex);
2700 if (file->f_flags & O_APPEND)
2701 lock_pos = i_size_read(inode);
2702 if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
2703 server->vals->exclusive_lock_type, NULL,
2705 rc = __generic_file_write_iter(iocb, from);
2706 mutex_unlock(&inode->i_mutex);
2711 err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2716 mutex_unlock(&inode->i_mutex);
2718 up_read(&cinode->lock_sem);
2723 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2725 struct inode *inode = file_inode(iocb->ki_filp);
2726 struct cifsInodeInfo *cinode = CIFS_I(inode);
2727 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2728 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2729 iocb->ki_filp->private_data;
2730 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2733 written = cifs_get_writer(cinode);
2737 if (CIFS_CACHE_WRITE(cinode)) {
2738 if (cap_unix(tcon->ses) &&
2739 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2740 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2741 written = generic_file_write_iter(iocb, from);
2744 written = cifs_writev(iocb, from);
2748 * For non-oplocked files in strict cache mode we need to write the data
2749 * to the server exactly from the pos to pos+len-1 rather than flush all
2750 * affected pages because it may cause a error with mandatory locks on
2751 * these pages but not on the region from pos to ppos+len-1.
2753 written = cifs_user_writev(iocb, from);
2754 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2756 * Windows 7 server can delay breaking level2 oplock if a write
2757 * request comes - break it on the client to prevent reading
2760 cifs_zap_mapping(inode);
2761 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2766 cifs_put_writer(cinode);
2770 static struct cifs_readdata *
2771 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2773 struct cifs_readdata *rdata;
2775 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2777 if (rdata != NULL) {
2778 kref_init(&rdata->refcount);
2779 INIT_LIST_HEAD(&rdata->list);
2780 init_completion(&rdata->done);
2781 INIT_WORK(&rdata->work, complete);
2788 cifs_readdata_release(struct kref *refcount)
2790 struct cifs_readdata *rdata = container_of(refcount,
2791 struct cifs_readdata, refcount);
2794 cifsFileInfo_put(rdata->cfile);
2800 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2806 for (i = 0; i < nr_pages; i++) {
2807 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2812 rdata->pages[i] = page;
2816 for (i = 0; i < nr_pages; i++) {
2817 put_page(rdata->pages[i]);
2818 rdata->pages[i] = NULL;
2825 cifs_uncached_readdata_release(struct kref *refcount)
2827 struct cifs_readdata *rdata = container_of(refcount,
2828 struct cifs_readdata, refcount);
2831 for (i = 0; i < rdata->nr_pages; i++) {
2832 put_page(rdata->pages[i]);
2833 rdata->pages[i] = NULL;
2835 cifs_readdata_release(refcount);
2839 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2840 * @rdata: the readdata response with list of pages holding data
2841 * @iter: destination for our data
2843 * This function copies data from a list of pages in a readdata response into
2844 * an array of iovecs. It will first calculate where the data should go
2845 * based on the info in the readdata and then copy the data into that spot.
2848 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2850 size_t remaining = rdata->got_bytes;
2853 for (i = 0; i < rdata->nr_pages; i++) {
2854 struct page *page = rdata->pages[i];
2855 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2856 size_t written = copy_page_to_iter(page, 0, copy, iter);
2857 remaining -= written;
2858 if (written < copy && iov_iter_count(iter) > 0)
2861 return remaining ? -EFAULT : 0;
2865 cifs_uncached_readv_complete(struct work_struct *work)
2867 struct cifs_readdata *rdata = container_of(work,
2868 struct cifs_readdata, work);
2870 complete(&rdata->done);
2871 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2875 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2876 struct cifs_readdata *rdata, unsigned int len)
2880 unsigned int nr_pages = rdata->nr_pages;
2883 rdata->got_bytes = 0;
2884 rdata->tailsz = PAGE_SIZE;
2885 for (i = 0; i < nr_pages; i++) {
2886 struct page *page = rdata->pages[i];
2888 if (len >= PAGE_SIZE) {
2889 /* enough data to fill the page */
2890 iov.iov_base = kmap(page);
2891 iov.iov_len = PAGE_SIZE;
2892 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2893 i, iov.iov_base, iov.iov_len);
2895 } else if (len > 0) {
2896 /* enough for partial page, fill and zero the rest */
2897 iov.iov_base = kmap(page);
2899 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2900 i, iov.iov_base, iov.iov_len);
2901 memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2902 rdata->tailsz = len;
2905 /* no need to hold page hostage */
2906 rdata->pages[i] = NULL;
2912 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2917 rdata->got_bytes += result;
2920 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2921 rdata->got_bytes : result;
2925 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2926 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2928 struct cifs_readdata *rdata;
2929 unsigned int npages, rsize, credits;
2933 struct TCP_Server_Info *server;
2935 server = tlink_tcon(open_file->tlink)->ses->server;
2937 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2938 pid = open_file->pid;
2940 pid = current->tgid;
2943 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2948 cur_len = min_t(const size_t, len, rsize);
2949 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2951 /* allocate a readdata struct */
2952 rdata = cifs_readdata_alloc(npages,
2953 cifs_uncached_readv_complete);
2955 add_credits_and_wake_if(server, credits, 0);
2960 rc = cifs_read_allocate_pages(rdata, npages);
2964 rdata->cfile = cifsFileInfo_get(open_file);
2965 rdata->nr_pages = npages;
2966 rdata->offset = offset;
2967 rdata->bytes = cur_len;
2969 rdata->pagesz = PAGE_SIZE;
2970 rdata->read_into_pages = cifs_uncached_read_into_pages;
2971 rdata->credits = credits;
2973 if (!rdata->cfile->invalidHandle ||
2974 !cifs_reopen_file(rdata->cfile, true))
2975 rc = server->ops->async_readv(rdata);
2978 add_credits_and_wake_if(server, rdata->credits, 0);
2979 kref_put(&rdata->refcount,
2980 cifs_uncached_readdata_release);
2986 list_add_tail(&rdata->list, rdata_list);
2994 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2996 struct file *file = iocb->ki_filp;
2999 ssize_t total_read = 0;
3000 loff_t offset = iocb->ki_pos;
3001 struct cifs_sb_info *cifs_sb;
3002 struct cifs_tcon *tcon;
3003 struct cifsFileInfo *open_file;
3004 struct cifs_readdata *rdata, *tmp;
3005 struct list_head rdata_list;
3007 len = iov_iter_count(to);
3011 INIT_LIST_HEAD(&rdata_list);
3012 cifs_sb = CIFS_FILE_SB(file);
3013 open_file = file->private_data;
3014 tcon = tlink_tcon(open_file->tlink);
3016 if (!tcon->ses->server->ops->async_readv)
3019 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3020 cifs_dbg(FYI, "attempting read on write only file instance\n");
3022 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3024 /* if at least one read request send succeeded, then reset rc */
3025 if (!list_empty(&rdata_list))
3028 len = iov_iter_count(to);
3029 /* the loop below should proceed in the order of increasing offsets */
3031 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3033 /* FIXME: freezable sleep too? */
3034 rc = wait_for_completion_killable(&rdata->done);
3037 else if (rdata->result == -EAGAIN) {
3038 /* resend call if it's a retryable error */
3039 struct list_head tmp_list;
3040 unsigned int got_bytes = rdata->got_bytes;
3042 list_del_init(&rdata->list);
3043 INIT_LIST_HEAD(&tmp_list);
3046 * Got a part of data and then reconnect has
3047 * happened -- fill the buffer and continue
3050 if (got_bytes && got_bytes < rdata->bytes) {
3051 rc = cifs_readdata_to_iov(rdata, to);
3053 kref_put(&rdata->refcount,
3054 cifs_uncached_readdata_release);
3059 rc = cifs_send_async_read(
3060 rdata->offset + got_bytes,
3061 rdata->bytes - got_bytes,
3062 rdata->cfile, cifs_sb,
3065 list_splice(&tmp_list, &rdata_list);
3067 kref_put(&rdata->refcount,
3068 cifs_uncached_readdata_release);
3070 } else if (rdata->result)
3073 rc = cifs_readdata_to_iov(rdata, to);
3075 /* if there was a short read -- discard anything left */
3076 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3079 list_del_init(&rdata->list);
3080 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3083 total_read = len - iov_iter_count(to);
3085 cifs_stats_bytes_read(tcon, total_read);
3087 /* mask nodata case */
3092 iocb->ki_pos += total_read;
3099 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3101 struct inode *inode = file_inode(iocb->ki_filp);
3102 struct cifsInodeInfo *cinode = CIFS_I(inode);
3103 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3104 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3105 iocb->ki_filp->private_data;
3106 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3110 * In strict cache mode we need to read from the server all the time
3111 * if we don't have level II oplock because the server can delay mtime
3112 * change - so we can't make a decision about inode invalidating.
3113 * And we can also fail with pagereading if there are mandatory locks
3114 * on pages affected by this read but not on the region from pos to
3117 if (!CIFS_CACHE_READ(cinode))
3118 return cifs_user_readv(iocb, to);
3120 if (cap_unix(tcon->ses) &&
3121 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3122 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3123 return generic_file_read_iter(iocb, to);
3126 * We need to hold the sem to be sure nobody modifies lock list
3127 * with a brlock that prevents reading.
3129 down_read(&cinode->lock_sem);
3130 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3131 tcon->ses->server->vals->shared_lock_type,
3132 NULL, CIFS_READ_OP))
3133 rc = generic_file_read_iter(iocb, to);
3134 up_read(&cinode->lock_sem);
3139 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3142 unsigned int bytes_read = 0;
3143 unsigned int total_read;
3144 unsigned int current_read_size;
3146 struct cifs_sb_info *cifs_sb;
3147 struct cifs_tcon *tcon;
3148 struct TCP_Server_Info *server;
3151 struct cifsFileInfo *open_file;
3152 struct cifs_io_parms io_parms;
3153 int buf_type = CIFS_NO_BUFFER;
3157 cifs_sb = CIFS_FILE_SB(file);
3159 /* FIXME: set up handlers for larger reads and/or convert to async */
3160 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3162 if (file->private_data == NULL) {
3167 open_file = file->private_data;
3168 tcon = tlink_tcon(open_file->tlink);
3169 server = tcon->ses->server;
3171 if (!server->ops->sync_read) {
3176 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3177 pid = open_file->pid;
3179 pid = current->tgid;
3181 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3182 cifs_dbg(FYI, "attempting read on write only file instance\n");
3184 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3185 total_read += bytes_read, cur_offset += bytes_read) {
3187 current_read_size = min_t(uint, read_size - total_read,
3190 * For windows me and 9x we do not want to request more
3191 * than it negotiated since it will refuse the read
3194 if ((tcon->ses) && !(tcon->ses->capabilities &
3195 tcon->ses->server->vals->cap_large_files)) {
3196 current_read_size = min_t(uint,
3197 current_read_size, CIFSMaxBufSize);
3199 if (open_file->invalidHandle) {
3200 rc = cifs_reopen_file(open_file, true);
3205 io_parms.tcon = tcon;
3206 io_parms.offset = *offset;
3207 io_parms.length = current_read_size;
3208 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3209 &bytes_read, &cur_offset,
3211 } while (rc == -EAGAIN);
3213 if (rc || (bytes_read == 0)) {
3221 cifs_stats_bytes_read(tcon, total_read);
3222 *offset += bytes_read;
3230 * If the page is mmap'ed into a process' page tables, then we need to make
3231 * sure that it doesn't change while being written back.
3234 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3236 struct page *page = vmf->page;
3239 return VM_FAULT_LOCKED;
3242 static struct vm_operations_struct cifs_file_vm_ops = {
3243 .fault = filemap_fault,
3244 .map_pages = filemap_map_pages,
3245 .page_mkwrite = cifs_page_mkwrite,
3248 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3251 struct inode *inode = file_inode(file);
3255 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3256 rc = cifs_zap_mapping(inode);
3261 rc = generic_file_mmap(file, vma);
3263 vma->vm_ops = &cifs_file_vm_ops;
3268 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3273 rc = cifs_revalidate_file(file);
3275 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3280 rc = generic_file_mmap(file, vma);
3282 vma->vm_ops = &cifs_file_vm_ops;
3288 cifs_readv_complete(struct work_struct *work)
3290 unsigned int i, got_bytes;
3291 struct cifs_readdata *rdata = container_of(work,
3292 struct cifs_readdata, work);
3294 got_bytes = rdata->got_bytes;
3295 for (i = 0; i < rdata->nr_pages; i++) {
3296 struct page *page = rdata->pages[i];
3298 lru_cache_add_file(page);
3300 if (rdata->result == 0 ||
3301 (rdata->result == -EAGAIN && got_bytes)) {
3302 flush_dcache_page(page);
3303 SetPageUptodate(page);
3308 if (rdata->result == 0 ||
3309 (rdata->result == -EAGAIN && got_bytes))
3310 cifs_readpage_to_fscache(rdata->mapping->host, page);
3312 got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3314 page_cache_release(page);
3315 rdata->pages[i] = NULL;
3317 kref_put(&rdata->refcount, cifs_readdata_release);
3321 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3322 struct cifs_readdata *rdata, unsigned int len)
3328 unsigned int nr_pages = rdata->nr_pages;
3331 /* determine the eof that the server (probably) has */
3332 eof = CIFS_I(rdata->mapping->host)->server_eof;
3333 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3334 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3336 rdata->got_bytes = 0;
3337 rdata->tailsz = PAGE_CACHE_SIZE;
3338 for (i = 0; i < nr_pages; i++) {
3339 struct page *page = rdata->pages[i];
3341 if (len >= PAGE_CACHE_SIZE) {
3342 /* enough data to fill the page */
3343 iov.iov_base = kmap(page);
3344 iov.iov_len = PAGE_CACHE_SIZE;
3345 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3346 i, page->index, iov.iov_base, iov.iov_len);
3347 len -= PAGE_CACHE_SIZE;
3348 } else if (len > 0) {
3349 /* enough for partial page, fill and zero the rest */
3350 iov.iov_base = kmap(page);
3352 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3353 i, page->index, iov.iov_base, iov.iov_len);
3354 memset(iov.iov_base + len,
3355 '\0', PAGE_CACHE_SIZE - len);
3356 rdata->tailsz = len;
3358 } else if (page->index > eof_index) {
3360 * The VFS will not try to do readahead past the
3361 * i_size, but it's possible that we have outstanding
3362 * writes with gaps in the middle and the i_size hasn't
3363 * caught up yet. Populate those with zeroed out pages
3364 * to prevent the VFS from repeatedly attempting to
3365 * fill them until the writes are flushed.
3367 zero_user(page, 0, PAGE_CACHE_SIZE);
3368 lru_cache_add_file(page);
3369 flush_dcache_page(page);
3370 SetPageUptodate(page);
3372 page_cache_release(page);
3373 rdata->pages[i] = NULL;
3377 /* no need to hold page hostage */
3378 lru_cache_add_file(page);
3380 page_cache_release(page);
3381 rdata->pages[i] = NULL;
3386 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3391 rdata->got_bytes += result;
3394 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3395 rdata->got_bytes : result;
3399 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3400 unsigned int rsize, struct list_head *tmplist,
3401 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3403 struct page *page, *tpage;
3404 unsigned int expected_index;
3407 INIT_LIST_HEAD(tmplist);
3409 page = list_entry(page_list->prev, struct page, lru);
3412 * Lock the page and put it in the cache. Since no one else
3413 * should have access to this page, we're safe to simply set
3414 * PG_locked without checking it first.
3416 __set_page_locked(page);
3417 rc = add_to_page_cache_locked(page, mapping,
3418 page->index, GFP_KERNEL);
3420 /* give up if we can't stick it in the cache */
3422 __clear_page_locked(page);
3426 /* move first page to the tmplist */
3427 *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3428 *bytes = PAGE_CACHE_SIZE;
3430 list_move_tail(&page->lru, tmplist);
3432 /* now try and add more pages onto the request */
3433 expected_index = page->index + 1;
3434 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3435 /* discontinuity ? */
3436 if (page->index != expected_index)
3439 /* would this page push the read over the rsize? */
3440 if (*bytes + PAGE_CACHE_SIZE > rsize)
3443 __set_page_locked(page);
3444 if (add_to_page_cache_locked(page, mapping, page->index,
3446 __clear_page_locked(page);
3449 list_move_tail(&page->lru, tmplist);
3450 (*bytes) += PAGE_CACHE_SIZE;
3457 static int cifs_readpages(struct file *file, struct address_space *mapping,
3458 struct list_head *page_list, unsigned num_pages)
3461 struct list_head tmplist;
3462 struct cifsFileInfo *open_file = file->private_data;
3463 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3464 struct TCP_Server_Info *server;
3468 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3469 * immediately if the cookie is negative
3471 * After this point, every page in the list might have PG_fscache set,
3472 * so we will need to clean that up off of every page we don't use.
3474 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3479 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3480 pid = open_file->pid;
3482 pid = current->tgid;
3485 server = tlink_tcon(open_file->tlink)->ses->server;
3487 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3488 __func__, file, mapping, num_pages);
3491 * Start with the page at end of list and move it to private
3492 * list. Do the same with any following pages until we hit
3493 * the rsize limit, hit an index discontinuity, or run out of
3494 * pages. Issue the async read and then start the loop again
3495 * until the list is empty.
3497 * Note that list order is important. The page_list is in
3498 * the order of declining indexes. When we put the pages in
3499 * the rdata->pages, then we want them in increasing order.
3501 while (!list_empty(page_list)) {
3502 unsigned int i, nr_pages, bytes, rsize;
3504 struct page *page, *tpage;
3505 struct cifs_readdata *rdata;
3508 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3514 * Give up immediately if rsize is too small to read an entire
3515 * page. The VFS will fall back to readpage. We should never
3516 * reach this point however since we set ra_pages to 0 when the
3517 * rsize is smaller than a cache page.
3519 if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3520 add_credits_and_wake_if(server, credits, 0);
3524 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3525 &nr_pages, &offset, &bytes);
3527 add_credits_and_wake_if(server, credits, 0);
3531 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3533 /* best to give up if we're out of mem */
3534 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3535 list_del(&page->lru);
3536 lru_cache_add_file(page);
3538 page_cache_release(page);
3541 add_credits_and_wake_if(server, credits, 0);
3545 rdata->cfile = cifsFileInfo_get(open_file);
3546 rdata->mapping = mapping;
3547 rdata->offset = offset;
3548 rdata->bytes = bytes;
3550 rdata->pagesz = PAGE_CACHE_SIZE;
3551 rdata->read_into_pages = cifs_readpages_read_into_pages;
3552 rdata->credits = credits;
3554 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3555 list_del(&page->lru);
3556 rdata->pages[rdata->nr_pages++] = page;
3559 if (!rdata->cfile->invalidHandle ||
3560 !cifs_reopen_file(rdata->cfile, true))
3561 rc = server->ops->async_readv(rdata);
3563 add_credits_and_wake_if(server, rdata->credits, 0);
3564 for (i = 0; i < rdata->nr_pages; i++) {
3565 page = rdata->pages[i];
3566 lru_cache_add_file(page);
3568 page_cache_release(page);
3570 /* Fallback to the readpage in error/reconnect cases */
3571 kref_put(&rdata->refcount, cifs_readdata_release);
3575 kref_put(&rdata->refcount, cifs_readdata_release);
3578 /* Any pages that have been shown to fscache but didn't get added to
3579 * the pagecache must be uncached before they get returned to the
3582 cifs_fscache_readpages_cancel(mapping->host, page_list);
3587 * cifs_readpage_worker must be called with the page pinned
3589 static int cifs_readpage_worker(struct file *file, struct page *page,
3595 /* Is the page cached? */
3596 rc = cifs_readpage_from_fscache(file_inode(file), page);
3600 read_data = kmap(page);
3601 /* for reads over a certain size could initiate async read ahead */
3603 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3608 cifs_dbg(FYI, "Bytes read %d\n", rc);
3610 file_inode(file)->i_atime =
3611 current_fs_time(file_inode(file)->i_sb);
3613 if (PAGE_CACHE_SIZE > rc)
3614 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3616 flush_dcache_page(page);
3617 SetPageUptodate(page);
3619 /* send this page to the cache */
3620 cifs_readpage_to_fscache(file_inode(file), page);
3632 static int cifs_readpage(struct file *file, struct page *page)
3634 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3640 if (file->private_data == NULL) {
3646 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3647 page, (int)offset, (int)offset);
3649 rc = cifs_readpage_worker(file, page, &offset);
3655 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3657 struct cifsFileInfo *open_file;
3659 spin_lock(&cifs_file_list_lock);
3660 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3661 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3662 spin_unlock(&cifs_file_list_lock);
3666 spin_unlock(&cifs_file_list_lock);
3670 /* We do not want to update the file size from server for inodes
3671 open for write - to avoid races with writepage extending
3672 the file - in the future we could consider allowing
3673 refreshing the inode only on increases in the file size
3674 but this is tricky to do without racing with writebehind
3675 page caching in the current Linux kernel design */
3676 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3681 if (is_inode_writable(cifsInode)) {
3682 /* This inode is open for write at least once */
3683 struct cifs_sb_info *cifs_sb;
3685 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3686 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3687 /* since no page cache to corrupt on directio
3688 we can change size safely */
3692 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3700 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3701 loff_t pos, unsigned len, unsigned flags,
3702 struct page **pagep, void **fsdata)
3705 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3706 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3707 loff_t page_start = pos & PAGE_MASK;
3712 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3715 page = grab_cache_page_write_begin(mapping, index, flags);
3721 if (PageUptodate(page))
3725 * If we write a full page it will be up to date, no need to read from
3726 * the server. If the write is short, we'll end up doing a sync write
3729 if (len == PAGE_CACHE_SIZE)
3733 * optimize away the read when we have an oplock, and we're not
3734 * expecting to use any of the data we'd be reading in. That
3735 * is, when the page lies beyond the EOF, or straddles the EOF
3736 * and the write will cover all of the existing data.
3738 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3739 i_size = i_size_read(mapping->host);
3740 if (page_start >= i_size ||
3741 (offset == 0 && (pos + len) >= i_size)) {
3742 zero_user_segments(page, 0, offset,
3746 * PageChecked means that the parts of the page
3747 * to which we're not writing are considered up
3748 * to date. Once the data is copied to the
3749 * page, it can be set uptodate.
3751 SetPageChecked(page);
3756 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3758 * might as well read a page, it is fast enough. If we get
3759 * an error, we don't need to return it. cifs_write_end will
3760 * do a sync write instead since PG_uptodate isn't set.
3762 cifs_readpage_worker(file, page, &page_start);
3763 page_cache_release(page);
3767 /* we could try using another file handle if there is one -
3768 but how would we lock it to prevent close of that handle
3769 racing with this read? In any case
3770 this will be written out by write_end so is fine */
3777 static int cifs_release_page(struct page *page, gfp_t gfp)
3779 if (PagePrivate(page))
3782 return cifs_fscache_release_page(page, gfp);
3785 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3786 unsigned int length)
3788 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3790 if (offset == 0 && length == PAGE_CACHE_SIZE)
3791 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3794 static int cifs_launder_page(struct page *page)
3797 loff_t range_start = page_offset(page);
3798 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3799 struct writeback_control wbc = {
3800 .sync_mode = WB_SYNC_ALL,
3802 .range_start = range_start,
3803 .range_end = range_end,
3806 cifs_dbg(FYI, "Launder page: %p\n", page);
3808 if (clear_page_dirty_for_io(page))
3809 rc = cifs_writepage_locked(page, &wbc);
3811 cifs_fscache_invalidate_page(page, page->mapping->host);
3815 void cifs_oplock_break(struct work_struct *work)
3817 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3819 struct inode *inode = cfile->dentry->d_inode;
3820 struct cifsInodeInfo *cinode = CIFS_I(inode);
3821 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3822 struct TCP_Server_Info *server = tcon->ses->server;
3825 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3826 TASK_UNINTERRUPTIBLE);
3828 server->ops->downgrade_oplock(server, cinode,
3829 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3831 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3832 cifs_has_mand_locks(cinode)) {
3833 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3838 if (inode && S_ISREG(inode->i_mode)) {
3839 if (CIFS_CACHE_READ(cinode))
3840 break_lease(inode, O_RDONLY);
3842 break_lease(inode, O_WRONLY);
3843 rc = filemap_fdatawrite(inode->i_mapping);
3844 if (!CIFS_CACHE_READ(cinode)) {
3845 rc = filemap_fdatawait(inode->i_mapping);
3846 mapping_set_error(inode->i_mapping, rc);
3847 cifs_zap_mapping(inode);
3849 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3852 rc = cifs_push_locks(cfile);
3854 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3857 * releasing stale oplock after recent reconnect of smb session using
3858 * a now incorrect file handle is not a data integrity issue but do
3859 * not bother sending an oplock release if session to server still is
3860 * disconnected since oplock already released by the server
3862 if (!cfile->oplock_break_cancelled) {
3863 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3865 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3867 cifs_done_oplock_break(cinode);
3871 * The presence of cifs_direct_io() in the address space ops vector
3872 * allowes open() O_DIRECT flags which would have failed otherwise.
3874 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3875 * so this method should never be called.
3877 * Direct IO is not yet supported in the cached mode.
3880 cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
3885 * Eventually need to support direct IO for non forcedirectio mounts
3891 const struct address_space_operations cifs_addr_ops = {
3892 .readpage = cifs_readpage,
3893 .readpages = cifs_readpages,
3894 .writepage = cifs_writepage,
3895 .writepages = cifs_writepages,
3896 .write_begin = cifs_write_begin,
3897 .write_end = cifs_write_end,
3898 .set_page_dirty = __set_page_dirty_nobuffers,
3899 .releasepage = cifs_release_page,
3900 .direct_IO = cifs_direct_io,
3901 .invalidatepage = cifs_invalidate_page,
3902 .launder_page = cifs_launder_page,
3906 * cifs_readpages requires the server to support a buffer large enough to
3907 * contain the header plus one complete page of data. Otherwise, we need
3908 * to leave cifs_readpages out of the address space operations.
3910 const struct address_space_operations cifs_addr_ops_smallbuf = {
3911 .readpage = cifs_readpage,
3912 .writepage = cifs_writepage,
3913 .writepages = cifs_writepages,
3914 .write_begin = cifs_write_begin,
3915 .write_end = cifs_write_end,
3916 .set_page_dirty = __set_page_dirty_nobuffers,
3917 .releasepage = cifs_release_page,
3918 .invalidatepage = cifs_invalidate_page,
3919 .launder_page = cifs_launder_page,